mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-27 11:39:46 +00:00
Compare commits
15 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ca884aa672 | ||
|
|
c7d7d95a5a | ||
|
|
dfd3023173 | ||
|
|
17546dbdda | ||
|
|
18b7303c54 | ||
|
|
eb5c7665a1 | ||
|
|
6dd092ddb7 | ||
|
|
4397463e73 | ||
|
|
e598191e8e | ||
|
|
1dc88ff75e | ||
|
|
811073c51e | ||
|
|
dac2350c7f | ||
|
|
9594cc0169 | ||
|
|
5f40a5aee5 | ||
|
|
758b4ee818 |
89
CHANGELOG.md
89
CHANGELOG.md
@@ -1,7 +1,94 @@
|
||||
# Changelog
|
||||
|
||||
## Release 535 Entries
|
||||
|
||||
### [535.43.13] 2023-10-11
|
||||
|
||||
### [535.43.11] 2023-10-05
|
||||
|
||||
### [535.43.10] 2023-09-27
|
||||
|
||||
### [535.43.09] 2023-09-01
|
||||
|
||||
### [535.43.08] 2023-08-17
|
||||
|
||||
### [535.43.02] 2023-05-30
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed console restore with traditional VGA consoles.
|
||||
|
||||
#### Added
|
||||
|
||||
- Added support for Run Time D3 (RTD3) on Ampere and later GPUs.
|
||||
- Added support for G-Sync on desktop GPUs.
|
||||
|
||||
## Release 530 Entries
|
||||
|
||||
### [530.41.03] 2023-03-23
|
||||
|
||||
### [530.30.02] 2023-02-28
|
||||
|
||||
#### Changed
|
||||
|
||||
- GSP firmware is now distributed as `gsp_tu10x.bin` and `gsp_ga10x.bin` to better reflect the GPU architectures supported by each firmware file in this release.
|
||||
- The .run installer will continue to install firmware to /lib/firmware/nvidia/<version> and the nvidia.ko kernel module will load the appropriate firmware for each GPU at runtime.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Add support for resizable BAR on Linux when NVreg_EnableResizableBar=1 module param is set. [#3](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/3) by @sjkelly
|
||||
|
||||
#### Added
|
||||
|
||||
- Support for power management features like Suspend, Hibernate and Resume.
|
||||
|
||||
## Release 525 Entries
|
||||
|
||||
### [525.116.04] 2023-05-09
|
||||
|
||||
### [525.116.03] 2023-04-25
|
||||
|
||||
### [525.105.17] 2023-03-30
|
||||
|
||||
### [525.89.02] 2023-02-08
|
||||
|
||||
### [525.85.12] 2023-01-30
|
||||
|
||||
### [525.85.05] 2023-01-19
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix build problems with Clang 15.0, [#377](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/377) by @ptr1337
|
||||
|
||||
### [525.78.01] 2023-01-05
|
||||
|
||||
### [525.60.13] 2022-12-05
|
||||
|
||||
### [525.60.11] 2022-11-28
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed nvenc compatibility with usermode clients [#104](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/104)
|
||||
|
||||
### [525.53] 2022-11-10
|
||||
|
||||
#### Changed
|
||||
|
||||
- GSP firmware is now distributed as multiple firmware files: this release has `gsp_tu10x.bin` and `gsp_ad10x.bin` replacing `gsp.bin` from previous releases.
|
||||
- Each file is named after a GPU architecture and supports GPUs from one or more architectures. This allows GSP firmware to better leverage each architecture's capabilities.
|
||||
- The .run installer will continue to install firmware to `/lib/firmware/nvidia/<version>` and the `nvidia.ko` kernel module will load the appropriate firmware for each GPU at runtime.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Add support for IBT (indirect branch tracking) on supported platforms, [#256](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/256) by @rnd-ash
|
||||
- Return EINVAL when [failing to] allocating memory, [#280](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/280) by @YusufKhan-gamedev
|
||||
- Fix various typos in nvidia/src/kernel, [#16](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/16) by @alexisgeoffrey
|
||||
- Added support for rotation in X11, Quadro Sync, Stereo, and YUV 4:2:0 on Turing.
|
||||
|
||||
## Release 520 Entries
|
||||
|
||||
### [520.61.07] 2022-10-20
|
||||
|
||||
### [520.56.06] 2022-10-12
|
||||
|
||||
#### Added
|
||||
@@ -29,6 +116,8 @@
|
||||
- Improved compatibility with new Linux kernel releases
|
||||
- Fixed possible excessive GPU power draw on an idle X11 or Wayland desktop when driving high resolutions or refresh rates
|
||||
|
||||
### [515.65.07] 2022-10-19
|
||||
|
||||
### [515.65.01] 2022-08-02
|
||||
|
||||
#### Fixed
|
||||
|
||||
112
README.md
112
README.md
@@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 520.56.06.
|
||||
version 535.43.13.
|
||||
|
||||
|
||||
## How to Build
|
||||
@@ -15,9 +15,9 @@ as root:
|
||||
|
||||
make modules_install -j$(nproc)
|
||||
|
||||
Note that the kernel modules built here must be used with gsp.bin
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
520.56.06 driver release. This can be achieved by installing
|
||||
535.43.13 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@@ -162,12 +162,25 @@ for the target kernel.
|
||||
- `src/nvidia/` The OS-agnostic code for nvidia.ko
|
||||
- `src/nvidia-modeset/` The OS-agnostic code for nvidia-modeset.ko
|
||||
- `src/common/` Utility code used by one or more of nvidia.ko and nvidia-modeset.ko
|
||||
- `nouveau/` Tools for integration with the Nouveau device driver
|
||||
|
||||
|
||||
## Nouveau device driver integration
|
||||
|
||||
The Python script in the 'nouveau' directory is used to extract some of the
|
||||
firmware binary images (and related data) encoded in the source code and
|
||||
store them as distinct files. These files are used by the Nouveau device
|
||||
driver to load and communicate with the GSP firmware.
|
||||
|
||||
The layout of the binary files is described in nouveau_firmware_layout.ods,
|
||||
which is an OpenDocument Spreadsheet file, compatible with most spreadsheet
|
||||
software applications.
|
||||
|
||||
|
||||
## Compatible GPUs
|
||||
|
||||
The open-gpu-kernel-modules can be used on any Turing or later GPU
|
||||
(see the table below). However, in the 520.56.06 release,
|
||||
(see the table below). However, in the 535.43.13 release,
|
||||
GeForce and Workstation support is still considered alpha-quality.
|
||||
|
||||
To enable use of the open kernel modules on GeForce and Workstation GPUs,
|
||||
@@ -175,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
|
||||
parameter to 1. For more details, see the NVIDIA GPU driver end user
|
||||
README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/520.56.06/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.43.13/README/kernel_open.html
|
||||
|
||||
In the below table, if three IDs are listed, the first is the PCI Device
|
||||
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
|
||||
@@ -645,13 +658,29 @@ Subsystem Device ID.
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 147F |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1622 |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1623 |
|
||||
| NVIDIA PG506-242 | 20B3 10DE 14A7 |
|
||||
| NVIDIA PG506-243 | 20B3 10DE 14A8 |
|
||||
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A7 |
|
||||
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A8 |
|
||||
| NVIDIA A100 80GB PCIe | 20B5 10DE 1533 |
|
||||
| NVIDIA A100 80GB PCIe | 20B5 10DE 1642 |
|
||||
| NVIDIA PG506-232 | 20B6 10DE 1492 |
|
||||
| NVIDIA A30 | 20B7 10DE 1532 |
|
||||
| NVIDIA A30 | 20B7 10DE 1804 |
|
||||
| NVIDIA A800-SXM4-40GB | 20BD 10DE 17F4 |
|
||||
| NVIDIA A100-PCIE-40GB | 20F1 10DE 145F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179C |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179D |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179E |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A0 |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A1 |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A2 |
|
||||
| NVIDIA A800 80GB PCIe | 20F5 10DE 1799 |
|
||||
| NVIDIA A800 80GB PCIe LC | 20F5 10DE 179A |
|
||||
| NVIDIA A800 40GB Active | 20F6 1028 180A |
|
||||
| NVIDIA A800 40GB Active | 20F6 103C 180A |
|
||||
| NVIDIA A800 40GB Active | 20F6 10DE 180A |
|
||||
| NVIDIA A800 40GB Active | 20F6 17AA 180A |
|
||||
| NVIDIA GeForce GTX 1660 Ti | 2182 |
|
||||
| NVIDIA GeForce GTX 1660 | 2184 |
|
||||
| NVIDIA GeForce GTX 1650 SUPER | 2187 |
|
||||
@@ -710,7 +739,15 @@ Subsystem Device ID.
|
||||
| NVIDIA A10 | 2236 10DE 1482 |
|
||||
| NVIDIA A10G | 2237 10DE 152F |
|
||||
| NVIDIA A10M | 2238 10DE 1677 |
|
||||
| NVIDIA H100 NVL | 2321 10DE 1839 |
|
||||
| NVIDIA H800 PCIe | 2322 10DE 17A4 |
|
||||
| NVIDIA H800 | 2324 10DE 17A6 |
|
||||
| NVIDIA H800 | 2324 10DE 17A8 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
|
||||
| NVIDIA H100 PCIe | 2331 10DE 1626 |
|
||||
| NVIDIA H100 | 2339 10DE 17FC |
|
||||
| NVIDIA H800 NVL | 233A 10DE 183A |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 2414 |
|
||||
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
|
||||
| NVIDIA RTX A5500 Laptop GPU | 2438 |
|
||||
@@ -738,6 +775,7 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX A3000 12GB Laptop GPU | 24B9 |
|
||||
| NVIDIA RTX A4500 Laptop GPU | 24BA |
|
||||
| NVIDIA RTX A3000 12GB Laptop GPU | 24BB |
|
||||
| NVIDIA GeForce RTX 3060 | 24C7 |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 24C9 |
|
||||
| NVIDIA GeForce RTX 3080 Laptop GPU | 24DC |
|
||||
| NVIDIA GeForce RTX 3070 Laptop GPU | 24DD |
|
||||
@@ -761,6 +799,7 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX A2000 12GB | 2571 103C 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 10DE 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 17AA 1611 |
|
||||
| NVIDIA GeForce RTX 3050 | 2582 |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25A0 |
|
||||
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 8928 |
|
||||
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 89F9 |
|
||||
@@ -773,14 +812,73 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 2050 | 25A7 |
|
||||
| NVIDIA GeForce RTX 2050 | 25A9 |
|
||||
| NVIDIA GeForce MX570 A | 25AA |
|
||||
| NVIDIA GeForce RTX 3050 4GB Laptop GPU | 25AB |
|
||||
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25AC |
|
||||
| NVIDIA GeForce RTX 2050 | 25AD |
|
||||
| NVIDIA A16 | 25B6 10DE 14A9 |
|
||||
| NVIDIA A2 | 25B6 10DE 157E |
|
||||
| NVIDIA RTX A2000 Laptop GPU | 25B8 |
|
||||
| NVIDIA RTX A1000 Laptop GPU | 25B9 |
|
||||
| NVIDIA RTX A2000 8GB Laptop GPU | 25BA |
|
||||
| NVIDIA RTX A500 Laptop GPU | 25BB |
|
||||
| NVIDIA RTX A1000 6GB Laptop GPU | 25BC |
|
||||
| NVIDIA RTX A500 Laptop GPU | 25BD |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25E0 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E2 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E5 |
|
||||
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25EC |
|
||||
| NVIDIA GeForce RTX 2050 | 25ED |
|
||||
| NVIDIA RTX A1000 Embedded GPU | 25F9 |
|
||||
| NVIDIA RTX A2000 Embedded GPU | 25FA |
|
||||
| NVIDIA RTX A500 Embedded GPU | 25FB |
|
||||
| NVIDIA GeForce RTX 4090 | 2684 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 1028 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 17AA 16A1 |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 1028 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 103C 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 10DE 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 17AA 17FA |
|
||||
| NVIDIA L40 | 26B5 10DE 169D |
|
||||
| NVIDIA L40 | 26B5 10DE 17DA |
|
||||
| NVIDIA L40S | 26B9 10DE 1851 |
|
||||
| NVIDIA L40S | 26B9 10DE 18CF |
|
||||
| NVIDIA GeForce RTX 4080 | 2704 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
|
||||
| NVIDIA RTX 5000 Ada Generation Laptop GPU | 2730 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2757 |
|
||||
| NVIDIA RTX 5000 Ada Generation Embedded GPU | 2770 |
|
||||
| NVIDIA GeForce RTX 4070 Ti | 2782 |
|
||||
| NVIDIA GeForce RTX 4070 | 2786 |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27A0 |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 1028 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 10DE 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 17AA 16FA |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 1028 180C |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 103C 180C |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 10DE 180C |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 17AA 180C |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 1028 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 103C 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 10DE 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 17AA 181B |
|
||||
| NVIDIA L4 | 27B8 10DE 16CA |
|
||||
| NVIDIA L4 | 27B8 10DE 16EE |
|
||||
| NVIDIA RTX 4000 Ada Generation Laptop GPU | 27BA |
|
||||
| NVIDIA RTX 3500 Ada Generation Laptop GPU | 27BB |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27E0 |
|
||||
| NVIDIA RTX 3500 Ada Generation Embedded GPU | 27FB |
|
||||
| NVIDIA GeForce RTX 4060 Ti | 2803 |
|
||||
| NVIDIA GeForce RTX 4060 Ti | 2805 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2820 |
|
||||
| NVIDIA RTX 3000 Ada Generation Laptop GPU | 2838 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2860 |
|
||||
| NVIDIA GeForce RTX 4060 | 2882 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
|
||||
| NVIDIA RTX 2000 Ada Generation Laptop GPU | 28B8 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
|
||||
| NVIDIA RTX 2000 Ada Generation Embedded GPU | 28F8 |
|
||||
|
||||
@@ -70,9 +70,13 @@ $(foreach _module, $(NV_KERNEL_MODULES), \
|
||||
|
||||
EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"520.56.06\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.43.13\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
endif
|
||||
|
||||
EXTRA_CFLAGS += -Wno-unused-function
|
||||
|
||||
@@ -87,7 +91,8 @@ ifeq ($(ARCH),arm64)
|
||||
endif
|
||||
|
||||
ifeq ($(NV_BUILD_TYPE),debug)
|
||||
EXTRA_CFLAGS += -g -gsplit-dwarf
|
||||
EXTRA_CFLAGS += -g
|
||||
EXTRA_CFLAGS += $(call cc-option,-gsplit-dwarf,)
|
||||
endif
|
||||
|
||||
EXTRA_CFLAGS += -ffreestanding
|
||||
@@ -212,8 +217,10 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_auth.h \
|
||||
drm/drm_gem.h \
|
||||
drm/drm_crtc.h \
|
||||
drm/drm_color_mgmt.h \
|
||||
drm/drm_atomic.h \
|
||||
drm/drm_atomic_helper.h \
|
||||
drm/drm_atomic_state_helper.h \
|
||||
drm/drm_encoder.h \
|
||||
drm/drm_atomic_uapi.h \
|
||||
drm/drm_drv.h \
|
||||
@@ -229,6 +236,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_ioctl.h \
|
||||
drm/drm_device.h \
|
||||
drm/drm_mode_config.h \
|
||||
drm/drm_modeset_lock.h \
|
||||
dt-bindings/interconnect/tegra_icc_id.h \
|
||||
generated/autoconf.h \
|
||||
generated/compile.h \
|
||||
@@ -236,13 +244,14 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/efi.h \
|
||||
linux/kconfig.h \
|
||||
linux/platform/tegra/mc_utils.h \
|
||||
linux/semaphore.h \
|
||||
linux/printk.h \
|
||||
linux/ratelimit.h \
|
||||
linux/prio_tree.h \
|
||||
linux/log2.h \
|
||||
linux/of.h \
|
||||
linux/bug.h \
|
||||
linux/sched.h \
|
||||
linux/sched/mm.h \
|
||||
linux/sched/signal.h \
|
||||
linux/sched/task.h \
|
||||
linux/sched/task_stack.h \
|
||||
@@ -256,6 +265,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/platform/tegra/dce/dce-client-ipc.h \
|
||||
linux/nvhost.h \
|
||||
linux/nvhost_t194.h \
|
||||
linux/host1x-next.h \
|
||||
asm/book3s/64/hash-64k.h \
|
||||
asm/set_memory.h \
|
||||
asm/prom.h \
|
||||
@@ -265,6 +275,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
asm/opal-api.h \
|
||||
sound/hdaudio.h \
|
||||
asm/pgtable_types.h \
|
||||
asm/page.h \
|
||||
linux/stringhash.h \
|
||||
linux/dma-map-ops.h \
|
||||
rdma/peer_mem.h \
|
||||
@@ -286,7 +297,13 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/ioasid.h \
|
||||
linux/stdarg.h \
|
||||
linux/iosys-map.h \
|
||||
asm/coco.h
|
||||
asm/coco.h \
|
||||
linux/vfio_pci_core.h \
|
||||
linux/mdev.h \
|
||||
soc/tegra/bpmp-abi.h \
|
||||
soc/tegra/bpmp.h \
|
||||
linux/cc_platform.h \
|
||||
asm/cpufeature.h
|
||||
|
||||
# Filename to store the define for the header in $(1); this is only consumed by
|
||||
# the rule below that concatenates all of these together.
|
||||
|
||||
@@ -242,7 +242,7 @@
|
||||
#endif
|
||||
|
||||
/* For verification-only features not intended to be included in normal drivers */
|
||||
#if (defined(NV_MODS) || defined(NV_GSP_MODS)) && defined(DEBUG) && !defined(DISABLE_VERIF_FEATURES)
|
||||
#if defined(ENABLE_VERIF_FEATURES)
|
||||
#define NV_VERIF_FEATURES
|
||||
#endif
|
||||
|
||||
@@ -276,12 +276,6 @@
|
||||
#define NV_IS_MODS 0
|
||||
#endif
|
||||
|
||||
#if defined(NV_GSP_MODS)
|
||||
#define NV_IS_GSP_MODS 1
|
||||
#else
|
||||
#define NV_IS_GSP_MODS 0
|
||||
#endif
|
||||
|
||||
#if defined(NV_WINDOWS)
|
||||
#define NVOS_IS_WINDOWS 1
|
||||
#else
|
||||
|
||||
83
kernel-open/common/inc/nv-firmware-registry.h
Normal file
83
kernel-open/common/inc/nv-firmware-registry.h
Normal file
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
//
|
||||
// This file holds GPU firmware related registry key definitions that are
|
||||
// shared between Windows and Unix
|
||||
//
|
||||
|
||||
#ifndef NV_FIRMWARE_REGISTRY_H
|
||||
#define NV_FIRMWARE_REGISTRY_H
|
||||
|
||||
//
|
||||
// Registry key that when enabled, will enable use of GPU firmware.
|
||||
//
|
||||
// Possible mode values:
|
||||
// 0 - Do not enable GPU firmware
|
||||
// 1 - Enable GPU firmware
|
||||
// 2 - (Default) Use the default enablement policy for GPU firmware
|
||||
//
|
||||
// Setting this to anything other than 2 will alter driver firmware-
|
||||
// enablement policies, possibly disabling GPU firmware where it would
|
||||
// have otherwise been enabled by default.
|
||||
//
|
||||
// Policy bits:
|
||||
//
|
||||
// POLICY_ALLOW_FALLBACK:
|
||||
// As the normal behavior is to fail GPU initialization if this registry
|
||||
// entry is set in such a way that results in an invalid configuration, if
|
||||
// instead the user would like the driver to automatically try to fallback
|
||||
// to initializing the failing GPU with firmware disabled, then this bit can
|
||||
// be set (ex: 0x11 means try to enable GPU firmware but fall back if needed).
|
||||
// Note that this can result in a mixed mode configuration (ex: GPU0 has
|
||||
// firmware enabled, but GPU1 does not).
|
||||
//
|
||||
#define NV_REG_STR_ENABLE_GPU_FIRMWARE "EnableGpuFirmware"
|
||||
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_MASK 0x0000000F
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_DISABLED 0x00000000
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_ENABLED 0x00000001
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_DEFAULT 0x00000002
|
||||
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_POLICY_MASK 0x000000F0
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_POLICY_ALLOW_FALLBACK 0x00000010
|
||||
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_DEFAULT_VALUE 0x00000012
|
||||
|
||||
//
|
||||
// Registry key that when enabled, will send GPU firmware logs
|
||||
// to the system log, when possible.
|
||||
//
|
||||
// Possible values:
|
||||
// 0 - Do not send GPU firmware logs to the system log
|
||||
// 1 - Enable sending of GPU firmware logs to the system log
|
||||
// 2 - (Default) Enable sending of GPU firmware logs to the system log for
|
||||
// the debug kernel driver build only
|
||||
//
|
||||
#define NV_REG_STR_ENABLE_GPU_FIRMWARE_LOGS "EnableGpuFirmwareLogs"
|
||||
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_LOGS_DISABLE 0x00000000
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_LOGS_ENABLE 0x00000001
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_LOGS_ENABLE_ON_DEBUG 0x00000002
|
||||
|
||||
#endif // NV_FIRMWARE_REGISTRY_H
|
||||
132
kernel-open/common/inc/nv-firmware.h
Normal file
132
kernel-open/common/inc/nv-firmware.h
Normal file
@@ -0,0 +1,132 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NV_FIRMWARE_H
|
||||
#define NV_FIRMWARE_H
|
||||
|
||||
|
||||
|
||||
#include <nvtypes.h>
|
||||
#include <nvmisc.h>
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_TYPE_GSP,
|
||||
NV_FIRMWARE_TYPE_GSP_LOG
|
||||
} nv_firmware_type_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_CHIP_FAMILY_NULL = 0,
|
||||
NV_FIRMWARE_CHIP_FAMILY_TU10X = 1,
|
||||
NV_FIRMWARE_CHIP_FAMILY_TU11X = 2,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GA100 = 3,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GA10X = 4,
|
||||
NV_FIRMWARE_CHIP_FAMILY_AD10X = 5,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GH100 = 6,
|
||||
NV_FIRMWARE_CHIP_FAMILY_END,
|
||||
} nv_firmware_chip_family_t;
|
||||
|
||||
static inline const char *nv_firmware_chip_family_to_string(
|
||||
nv_firmware_chip_family_t fw_chip_family
|
||||
)
|
||||
{
|
||||
switch (fw_chip_family) {
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: return "gh100";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: return "ad10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X: return "ga10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: return "ga100";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: return "tu11x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X: return "tu10x";
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
return NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// The includer (presumably nv.c) may optionally define
|
||||
// NV_FIRMWARE_PATH_FOR_FILENAME(filename)
|
||||
// to return a string "path" given a gsp_*.bin or gsp_log_*.bin filename.
|
||||
//
|
||||
// The function nv_firmware_path will then be available.
|
||||
#if defined(NV_FIRMWARE_PATH_FOR_FILENAME)
|
||||
static inline const char *nv_firmware_path(
|
||||
nv_firmware_type_t fw_type,
|
||||
nv_firmware_chip_family_t fw_chip_family
|
||||
)
|
||||
{
|
||||
if (fw_type == NV_FIRMWARE_TYPE_GSP)
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_ga10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_tu10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
else if (fw_type == NV_FIRMWARE_TYPE_GSP_LOG)
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_ga10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_tu10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
#endif // defined(NV_FIRMWARE_PATH_FOR_FILENAME)
|
||||
|
||||
// The includer (presumably nv.c) may optionally define
|
||||
// NV_FIRMWARE_DECLARE_GSP_FILENAME(filename)
|
||||
// which will then be invoked (at the top-level) for each
|
||||
// gsp_*.bin (but not gsp_log_*.bin)
|
||||
#if defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
|
||||
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_ga10x.bin")
|
||||
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_tu10x.bin")
|
||||
#endif // defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
|
||||
|
||||
#endif // NV_FIRMWARE_DECLARE_GSP_FILENAME
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-22 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -91,6 +91,6 @@ static inline void _nv_hash_init(struct hlist_head *ht, unsigned int sz)
|
||||
* @key: the key of the objects to iterate over
|
||||
*/
|
||||
#define nv_hash_for_each_possible(name, obj, member, key) \
|
||||
nv_hlist_for_each_entry(obj, &name[NV_HASH_MIN(key, NV_HASH_BITS(name))], member)
|
||||
hlist_for_each_entry(obj, &name[NV_HASH_MIN(key, NV_HASH_BITS(name))], member)
|
||||
|
||||
#endif // __NV_HASH_H__
|
||||
|
||||
@@ -27,15 +27,13 @@
|
||||
#include <nv-kernel-interface-api.h>
|
||||
|
||||
// Enums for supported hypervisor types.
|
||||
// New hypervisor type should be added before OS_HYPERVISOR_CUSTOM_FORCED
|
||||
// New hypervisor type should be added before OS_HYPERVISOR_UNKNOWN
|
||||
typedef enum _HYPERVISOR_TYPE
|
||||
{
|
||||
OS_HYPERVISOR_XEN = 0,
|
||||
OS_HYPERVISOR_VMWARE,
|
||||
OS_HYPERVISOR_HYPERV,
|
||||
OS_HYPERVISOR_KVM,
|
||||
OS_HYPERVISOR_PARALLELS,
|
||||
OS_HYPERVISOR_CUSTOM_FORCED,
|
||||
OS_HYPERVISOR_UNKNOWN
|
||||
} HYPERVISOR_TYPE;
|
||||
|
||||
@@ -43,6 +41,7 @@ typedef enum _HYPERVISOR_TYPE
|
||||
#define CMD_VGPU_VFIO_INJECT_INTERRUPT 1
|
||||
#define CMD_VGPU_VFIO_REGISTER_MDEV 2
|
||||
#define CMD_VGPU_VFIO_PRESENT 3
|
||||
#define CMD_VFIO_PCI_CORE_PRESENT 4
|
||||
|
||||
#define MAX_VF_COUNT_PER_GPU 64
|
||||
|
||||
@@ -59,6 +58,7 @@ typedef struct
|
||||
void *waitQueue;
|
||||
void *nv;
|
||||
NvU32 *vgpuTypeIds;
|
||||
NvU8 **vgpuNames;
|
||||
NvU32 numVgpuTypes;
|
||||
NvU32 domain;
|
||||
NvU8 bus;
|
||||
@@ -92,30 +92,6 @@ typedef enum VGPU_DEVICE_STATE_E
|
||||
NV_VGPU_DEV_IN_USE = 2
|
||||
} VGPU_DEVICE_STATE;
|
||||
|
||||
typedef enum _VMBUS_CMD_TYPE
|
||||
{
|
||||
VMBUS_CMD_TYPE_INVALID = 0,
|
||||
VMBUS_CMD_TYPE_SETUP = 1,
|
||||
VMBUS_CMD_TYPE_SENDPACKET = 2,
|
||||
VMBUS_CMD_TYPE_CLEANUP = 3,
|
||||
} VMBUS_CMD_TYPE;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU32 request_id;
|
||||
NvU32 page_count;
|
||||
NvU64 *pPfns;
|
||||
void *buffer;
|
||||
NvU32 bufferlen;
|
||||
} vmbus_send_packet_cmd_params;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU32 override_sint;
|
||||
NvU8 *nv_guid;
|
||||
} vmbus_setup_cmd_params;
|
||||
|
||||
/*
|
||||
* Function prototypes
|
||||
*/
|
||||
|
||||
@@ -62,6 +62,7 @@ typedef struct nv_ioctl_numa_info
|
||||
uint64_t memblock_size __aligned(8);
|
||||
uint64_t numa_mem_addr __aligned(8);
|
||||
uint64_t numa_mem_size __aligned(8);
|
||||
uint8_t use_auto_online;
|
||||
nv_offline_addresses_t offline_addresses __aligned(8);
|
||||
} nv_ioctl_numa_info_t;
|
||||
|
||||
|
||||
@@ -104,7 +104,7 @@ typedef struct nv_ioctl_rm_api_version
|
||||
|
||||
#define NV_RM_API_VERSION_CMD_STRICT 0
|
||||
#define NV_RM_API_VERSION_CMD_RELAXED '1'
|
||||
#define NV_RM_API_VERSION_CMD_OVERRIDE '2'
|
||||
#define NV_RM_API_VERSION_CMD_QUERY '2'
|
||||
|
||||
#define NV_RM_API_VERSION_REPLY_UNRECOGNIZED 0
|
||||
#define NV_RM_API_VERSION_REPLY_RECOGNIZED 1
|
||||
|
||||
@@ -28,15 +28,10 @@
|
||||
#include <linux/list.h> // list
|
||||
#include <linux/sched.h> // task_struct
|
||||
#include <linux/numa.h> // NUMA_NO_NODE
|
||||
#include <linux/semaphore.h>
|
||||
|
||||
#include "conftest.h"
|
||||
|
||||
#if defined(NV_LINUX_SEMAPHORE_H_PRESENT)
|
||||
#include <linux/semaphore.h>
|
||||
#else
|
||||
#include <asm/semaphore.h>
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// nv_kthread_q:
|
||||
//
|
||||
@@ -115,11 +110,6 @@ struct nv_kthread_q_item
|
||||
void *function_args;
|
||||
};
|
||||
|
||||
#if defined(NV_KTHREAD_CREATE_ON_NODE_PRESENT)
|
||||
#define NV_KTHREAD_Q_SUPPORTS_AFFINITY() 1
|
||||
#else
|
||||
#define NV_KTHREAD_Q_SUPPORTS_AFFINITY() 0
|
||||
#endif
|
||||
|
||||
#ifndef NUMA_NO_NODE
|
||||
#define NUMA_NO_NODE (-1)
|
||||
@@ -142,18 +132,12 @@ struct nv_kthread_q_item
|
||||
//
|
||||
// A short prefix of the qname arg will show up in []'s, via the ps(1) utility.
|
||||
//
|
||||
// The kernel thread stack is preferably allocated on the specified NUMA node if
|
||||
// NUMA-affinity (NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1) is supported, but
|
||||
// fallback to another node is possible because kernel allocators do not
|
||||
// The kernel thread stack is preferably allocated on the specified NUMA node,
|
||||
// but fallback to another node is possible because kernel allocators do not
|
||||
// guarantee affinity. Note that NUMA-affinity applies only to
|
||||
// the kthread stack. This API does not do anything about limiting the CPU
|
||||
// affinity of the kthread. That is left to the caller.
|
||||
//
|
||||
// On kernels, which do not support NUMA-aware kthread stack allocations
|
||||
// (NV_KTHTREAD_Q_SUPPORTS_AFFINITY() == 0), the API will return -ENOTSUPP
|
||||
// if the value supplied for 'preferred_node' is anything other than
|
||||
// NV_KTHREAD_NO_NODE.
|
||||
//
|
||||
// Reusing a queue: once a queue is initialized, it must be safely shut down
|
||||
// (see "Stopping the queue(s)", below), before it can be reused. So, for
|
||||
// a simple queue use case, the following will work:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -191,13 +191,6 @@
|
||||
*/
|
||||
#define NV_CURRENT_EUID() (__kuid_val(current->cred->euid))
|
||||
|
||||
#if !defined(NV_KUID_T_PRESENT)
|
||||
static inline uid_t __kuid_val(uid_t uid)
|
||||
{
|
||||
return uid;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_VGA_ARB)
|
||||
#include <linux/vgaarb.h>
|
||||
#endif
|
||||
@@ -218,6 +211,7 @@ static inline uid_t __kuid_val(uid_t uid)
|
||||
#include <linux/highmem.h>
|
||||
|
||||
#include <linux/nodemask.h>
|
||||
#include <linux/memory.h>
|
||||
|
||||
#include <linux/workqueue.h> /* workqueue */
|
||||
#include "nv-kthread-q.h" /* kthread based queue */
|
||||
@@ -234,18 +228,6 @@ static inline uid_t __kuid_val(uid_t uid)
|
||||
#include <asm-generic/pci-dma-compat.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_EFI_ENABLED_PRESENT) && defined(NV_EFI_ENABLED_ARGUMENT_COUNT)
|
||||
#if (NV_EFI_ENABLED_ARGUMENT_COUNT == 1)
|
||||
#define NV_EFI_ENABLED() efi_enabled(EFI_BOOT)
|
||||
#else
|
||||
#error "NV_EFI_ENABLED_ARGUMENT_COUNT value unrecognized!"
|
||||
#endif
|
||||
#elif (defined(NV_EFI_ENABLED_PRESENT) || defined(efi_enabled))
|
||||
#define NV_EFI_ENABLED() efi_enabled
|
||||
#else
|
||||
#define NV_EFI_ENABLED() 0
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_CRAY_XT)
|
||||
#include <cray/cray_nvidia.h>
|
||||
NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32,
|
||||
@@ -521,7 +503,7 @@ static inline void *nv_vmalloc(unsigned long size)
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void nv_vfree(void *ptr, NvU32 size)
|
||||
static inline void nv_vfree(void *ptr, NvU64 size)
|
||||
{
|
||||
NV_MEMDBG_REMOVE(ptr, size);
|
||||
vfree(ptr);
|
||||
@@ -529,7 +511,11 @@ static inline void nv_vfree(void *ptr, NvU32 size)
|
||||
|
||||
static inline void *nv_ioremap(NvU64 phys, NvU64 size)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_PRESENT)
|
||||
void *ptr = ioremap_driver_hardened(phys, size);
|
||||
#else
|
||||
void *ptr = ioremap(phys, size);
|
||||
#endif
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
@@ -542,11 +528,11 @@ static inline void *nv_ioremap_nocache(NvU64 phys, NvU64 size)
|
||||
|
||||
static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
|
||||
{
|
||||
#if defined(NV_IOREMAP_CACHE_PRESENT)
|
||||
void *ptr = ioremap_cache(phys, size);
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
void *ptr = NULL;
|
||||
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_CACHE_SHARED_PRESENT)
|
||||
ptr = ioremap_cache_shared(phys, size);
|
||||
#elif defined(NV_IOREMAP_CACHE_PRESENT)
|
||||
ptr = ioremap_cache(phys, size);
|
||||
#elif defined(NVCPU_PPC64LE)
|
||||
//
|
||||
// ioremap_cache() has been only implemented correctly for ppc64le with
|
||||
@@ -561,25 +547,32 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
|
||||
// (commit 40f1ce7fb7e8, kernel 3.0+) and that covers all kernels we
|
||||
// support on power.
|
||||
//
|
||||
void *ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
|
||||
#else
|
||||
return nv_ioremap(phys, size);
|
||||
#endif
|
||||
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
|
||||
{
|
||||
#if defined(NV_IOREMAP_WC_PRESENT)
|
||||
void *ptr = ioremap_wc(phys, size);
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
void *ptr = NULL;
|
||||
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT)
|
||||
ptr = ioremap_driver_hardened_wc(phys, size);
|
||||
#elif defined(NV_IOREMAP_WC_PRESENT)
|
||||
ptr = ioremap_wc(phys, size);
|
||||
#else
|
||||
return nv_ioremap_nocache(phys, size);
|
||||
#endif
|
||||
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void nv_iounmap(void *ptr, NvU64 size)
|
||||
@@ -592,11 +585,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
{
|
||||
if (node_id < 0 || node_id >= MAX_NUMNODES)
|
||||
return NV_FALSE;
|
||||
#if defined(NV_NODE_STATES_N_MEMORY_PRESENT)
|
||||
return node_state(node_id, N_MEMORY) ? NV_TRUE : NV_FALSE;
|
||||
#else
|
||||
return node_state(node_id, N_HIGH_MEMORY) ? NV_TRUE : NV_FALSE;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NV_KMALLOC(ptr, size) \
|
||||
@@ -606,6 +595,13 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KZALLOC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kzalloc(size, NV_GFP_KERNEL); \
|
||||
if (ptr) \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KMALLOC_ATOMIC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kmalloc(size, NV_GFP_ATOMIC); \
|
||||
@@ -649,6 +645,26 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
free_pages(ptr, order); \
|
||||
}
|
||||
|
||||
static inline pgprot_t nv_sme_clr(pgprot_t prot)
|
||||
{
|
||||
#if defined(__sme_clr)
|
||||
return __pgprot(__sme_clr(pgprot_val(prot)));
|
||||
#else
|
||||
return prot;
|
||||
#endif // __sme_clr
|
||||
}
|
||||
|
||||
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
|
||||
{
|
||||
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
|
||||
|
||||
#if defined(pgprot_decrypted)
|
||||
return pgprot_decrypted(prot);
|
||||
#else
|
||||
return nv_sme_clr(prot);
|
||||
#endif // pgprot_decrypted
|
||||
}
|
||||
|
||||
#if defined(PAGE_KERNEL_NOENC)
|
||||
#if defined(__pgprot_mask)
|
||||
#define NV_PAGE_KERNEL_NOCACHE_NOENC __pgprot_mask(__PAGE_KERNEL_NOCACHE)
|
||||
@@ -670,7 +686,8 @@ static inline NvUPtr nv_vmap(struct page **pages, NvU32 page_count,
|
||||
#if defined(PAGE_KERNEL_NOENC)
|
||||
if (unencrypted)
|
||||
{
|
||||
prot = cached ? PAGE_KERNEL_NOENC : NV_PAGE_KERNEL_NOCACHE_NOENC;
|
||||
prot = cached ? nv_adjust_pgprot(PAGE_KERNEL_NOENC, 0) :
|
||||
nv_adjust_pgprot(NV_PAGE_KERNEL_NOCACHE_NOENC, 0);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@@ -838,10 +855,8 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
|
||||
})
|
||||
#endif
|
||||
|
||||
#if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.4.9
|
||||
#if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.18-rc1 for aarch64
|
||||
#define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_stop_and_remove_bus_device(pci_dev)
|
||||
#elif defined(NV_PCI_REMOVE_BUS_DEVICE_PRESENT) // introduced in 2.6
|
||||
#define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_remove_bus_device(pci_dev)
|
||||
#endif
|
||||
|
||||
#define NV_PRINT_AT(nv_debug_level,at) \
|
||||
@@ -957,26 +972,6 @@ static inline int nv_remap_page_range(struct vm_area_struct *vma,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
|
||||
{
|
||||
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
|
||||
#if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(NV_PGPROT_DECRYPTED_PRESENT)
|
||||
/*
|
||||
* When AMD memory encryption is enabled, device memory mappings with the
|
||||
* C-bit set read as 0xFF, so ensure the bit is cleared for user mappings.
|
||||
*
|
||||
* If cc_mkdec() is present, then pgprot_decrypted() can't be used.
|
||||
*/
|
||||
#if defined(NV_CC_MKDEC_PRESENT)
|
||||
prot = __pgprot(__sme_clr(pgprot_val(vm_prot)));
|
||||
#else
|
||||
prot = pgprot_decrypted(prot);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return prot;
|
||||
}
|
||||
|
||||
static inline int nv_io_remap_page_range(struct vm_area_struct *vma,
|
||||
NvU64 phys_addr, NvU64 size, NvU32 extra_prot)
|
||||
{
|
||||
@@ -1139,11 +1134,14 @@ static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
|
||||
{
|
||||
nvidia_stack_t *sp = NULL;
|
||||
#if defined(NVCPU_X86_64)
|
||||
sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache);
|
||||
if (sp == NULL)
|
||||
return -ENOMEM;
|
||||
sp->size = sizeof(sp->stack);
|
||||
sp->top = sp->stack + sp->size;
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache);
|
||||
if (sp == NULL)
|
||||
return -ENOMEM;
|
||||
sp->size = sizeof(sp->stack);
|
||||
sp->top = sp->stack + sp->size;
|
||||
}
|
||||
#endif
|
||||
*stack = sp;
|
||||
return 0;
|
||||
@@ -1152,7 +1150,7 @@ static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
|
||||
static inline void nv_kmem_cache_free_stack(nvidia_stack_t *stack)
|
||||
{
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (stack != NULL)
|
||||
if (stack != NULL && rm_is_altstack_in_use())
|
||||
{
|
||||
NV_KMEM_CACHE_FREE(stack, nvidia_stack_t_cache);
|
||||
}
|
||||
@@ -1197,7 +1195,7 @@ typedef struct nv_alloc_s {
|
||||
NvBool zeroed : 1;
|
||||
NvBool aliased : 1;
|
||||
NvBool user : 1;
|
||||
NvBool node0 : 1;
|
||||
NvBool node : 1;
|
||||
NvBool peer_io : 1;
|
||||
NvBool physical : 1;
|
||||
NvBool unencrypted : 1;
|
||||
@@ -1211,6 +1209,7 @@ typedef struct nv_alloc_s {
|
||||
unsigned int pid;
|
||||
struct page **user_pages;
|
||||
NvU64 guest_id; /* id of guest VM */
|
||||
NvS32 node_id; /* Node id for memory allocation when node is set in flags */
|
||||
void *import_priv;
|
||||
struct sg_table *import_sgt;
|
||||
} nv_alloc_t;
|
||||
@@ -1323,7 +1322,7 @@ nv_dma_maps_swiotlb(struct device *dev)
|
||||
* SEV memory encryption") forces SWIOTLB to be enabled when AMD SEV
|
||||
* is active in all cases.
|
||||
*/
|
||||
if (os_sev_enabled)
|
||||
if (os_cc_enabled)
|
||||
swiotlb_in_use = NV_TRUE;
|
||||
#endif
|
||||
|
||||
@@ -1386,8 +1385,7 @@ typedef struct nv_dma_map_s {
|
||||
* xen_swiotlb_map_sg_attrs may try to route to the SWIOTLB. We must only use
|
||||
* single-page sg elements on Xen Server.
|
||||
*/
|
||||
#if defined(NV_SG_ALLOC_TABLE_FROM_PAGES_PRESENT) && \
|
||||
!defined(NV_DOM0_KERNEL_PRESENT)
|
||||
#if !defined(NV_DOM0_KERNEL_PRESENT)
|
||||
#define NV_ALLOC_DMA_SUBMAP_SCATTERLIST(dm, sm, i) \
|
||||
((sg_alloc_table_from_pages(&sm->sgt, \
|
||||
&dm->pages[NV_DMA_SUBMAP_IDX_TO_PAGE_IDX(i)], \
|
||||
@@ -1452,6 +1450,35 @@ struct nv_dma_device {
|
||||
NvBool nvlink;
|
||||
};
|
||||
|
||||
/* Properties of the coherent link */
|
||||
typedef struct coherent_link_info_s {
|
||||
/* Physical Address of the GPU memory in SOC AMAP. In the case of
|
||||
* baremetal OS environment it is System Physical Address(SPA) and in the case
|
||||
* of virutalized OS environment it is Intermediate Physical Address(IPA) */
|
||||
NvU64 gpu_mem_pa;
|
||||
/* Bitmap of NUMA node ids, corresponding to the reserved PXMs,
|
||||
* available for adding GPU memory to the kernel as system RAM */
|
||||
DECLARE_BITMAP(free_node_bitmap, MAX_NUMNODES);
|
||||
} coherent_link_info_t;
|
||||
|
||||
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
|
||||
/*
|
||||
* acpi data storage structure
|
||||
*
|
||||
* This structure retains the pointer to the device,
|
||||
* and any other baggage we want to carry along
|
||||
*
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
nvidia_stack_t *sp;
|
||||
struct acpi_device *device;
|
||||
struct acpi_handle *handle;
|
||||
void *notifier_data;
|
||||
int notify_handler_installed;
|
||||
} nv_acpi_t;
|
||||
#endif
|
||||
|
||||
/* linux-specific version of old nv_state_t */
|
||||
/* this is a general os-specific state structure. the first element *must* be
|
||||
the general state structure, for the generic unix-based code */
|
||||
@@ -1467,6 +1494,13 @@ typedef struct nv_linux_state_s {
|
||||
/* IBM-NPU info associated with this GPU */
|
||||
nv_ibmnpu_info_t *npu;
|
||||
|
||||
/* coherent link information */
|
||||
coherent_link_info_t coherent_link_info;
|
||||
|
||||
/* Dedicated queue to be used for removing FB memory which is onlined
|
||||
* to kernel as a NUMA node. Refer Bug : 3879845*/
|
||||
nv_kthread_q_t remove_numa_memory_q;
|
||||
|
||||
/* NUMA node information for the platforms where GPU memory is presented
|
||||
* as a NUMA node to the kernel */
|
||||
struct {
|
||||
@@ -1477,6 +1511,7 @@ typedef struct nv_linux_state_s {
|
||||
/* NUMA online/offline status for platforms that support GPU memory as
|
||||
* NUMA node */
|
||||
atomic_t status;
|
||||
NvBool use_auto_online;
|
||||
} numa_info;
|
||||
|
||||
nvidia_stack_t *sp[NV_DEV_STACK_COUNT];
|
||||
@@ -1546,8 +1581,13 @@ typedef struct nv_linux_state_s {
|
||||
/* Per-device notifier block for ACPI events */
|
||||
struct notifier_block acpi_nb;
|
||||
|
||||
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
|
||||
nv_acpi_t* nv_acpi_object;
|
||||
#endif
|
||||
|
||||
/* Lock serializing ISRs for different SOC vectors */
|
||||
nv_spinlock_t soc_isr_lock;
|
||||
void *soc_bh_mutex;
|
||||
|
||||
struct nv_timer snapshot_timer;
|
||||
nv_spinlock_t snapshot_timer_lock;
|
||||
@@ -1593,24 +1633,6 @@ extern struct rw_semaphore nv_system_pm_lock;
|
||||
|
||||
extern NvBool nv_ats_supported;
|
||||
|
||||
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
|
||||
/*
|
||||
* acpi data storage structure
|
||||
*
|
||||
* This structure retains the pointer to the device,
|
||||
* and any other baggage we want to carry along
|
||||
*
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
nvidia_stack_t *sp;
|
||||
struct acpi_device *device;
|
||||
struct acpi_handle *handle;
|
||||
int notify_handler_installed;
|
||||
} nv_acpi_t;
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* file-private data
|
||||
* hide a pointer to our data structures in a file-private ptr
|
||||
@@ -1667,6 +1689,27 @@ static inline nv_linux_file_private_t *nv_get_nvlfp_from_nvfp(nv_file_private_t
|
||||
|
||||
#define NV_STATE_PTR(nvl) &(((nv_linux_state_t *)(nvl))->nv_state)
|
||||
|
||||
static inline nvidia_stack_t *nv_nvlfp_get_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which)
|
||||
{
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
down(&nvlfp->fops_sp_lock[which]);
|
||||
return nvlfp->fops_sp[which];
|
||||
}
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void nv_nvlfp_put_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which)
|
||||
{
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
up(&nvlfp->fops_sp_lock[which]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NV_ATOMIC_READ(data) atomic_read(&(data))
|
||||
#define NV_ATOMIC_SET(data,val) atomic_set(&(data), (val))
|
||||
@@ -1739,6 +1782,7 @@ static inline NV_STATUS nv_check_gpu_state(nv_state_t *nv)
|
||||
|
||||
extern NvU32 NVreg_EnableUserNUMAManagement;
|
||||
extern NvU32 NVreg_RegisterPCIDriver;
|
||||
extern NvU32 NVreg_EnableResizableBar;
|
||||
|
||||
extern NvU32 num_probed_nv_devices;
|
||||
extern NvU32 num_nv_devices;
|
||||
@@ -1895,20 +1939,12 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
|
||||
#define NV_GET_UNUSED_FD_FLAGS(flags) (-1)
|
||||
#endif
|
||||
|
||||
#if defined(NV_SET_CLOSE_ON_EXEC_PRESENT)
|
||||
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) __set_close_on_exec(fd, fdt)
|
||||
#elif defined(NV_LINUX_TIME_H_PRESENT) && defined(FD_SET)
|
||||
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) FD_SET(fd, fdt->close_on_exec)
|
||||
#else
|
||||
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) __set_bit(fd, fdt->close_on_exec)
|
||||
#endif
|
||||
|
||||
#define MODULE_BASE_NAME "nvidia"
|
||||
#define MODULE_INSTANCE_NUMBER 0
|
||||
#define MODULE_INSTANCE_STRING ""
|
||||
#define MODULE_NAME MODULE_BASE_NAME MODULE_INSTANCE_STRING
|
||||
|
||||
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32);
|
||||
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32, const char*);
|
||||
|
||||
static inline void nv_mutex_destroy(struct mutex *lock)
|
||||
{
|
||||
@@ -1941,6 +1977,36 @@ static inline int nv_set_numa_status(nv_linux_state_t *nvl, int status)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline NvBool nv_platform_use_auto_online(nv_linux_state_t *nvl)
|
||||
{
|
||||
return nvl->numa_info.use_auto_online;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
NvU64 base;
|
||||
NvU64 size;
|
||||
NvU32 nodeId;
|
||||
int ret;
|
||||
} remove_numa_memory_info_t;
|
||||
|
||||
static void offline_numa_memory_callback
|
||||
(
|
||||
void *args
|
||||
)
|
||||
{
|
||||
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
|
||||
remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
|
||||
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
|
||||
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->nodeId,
|
||||
pNumaInfo->base,
|
||||
pNumaInfo->size);
|
||||
#else
|
||||
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->base,
|
||||
pNumaInfo->size);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_NUMA_STATUS_DISABLED = 0,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -73,21 +73,4 @@
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT)
|
||||
#if NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT == 3
|
||||
#define nv_hlist_for_each_entry(pos, head, member) \
|
||||
hlist_for_each_entry(pos, head, member)
|
||||
#else
|
||||
#if !defined(hlist_entry_safe)
|
||||
#define hlist_entry_safe(ptr, type, member) \
|
||||
(ptr) ? hlist_entry(ptr, type, member) : NULL
|
||||
#endif
|
||||
|
||||
#define nv_hlist_for_each_entry(pos, head, member) \
|
||||
for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member); \
|
||||
pos; \
|
||||
pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
|
||||
#endif
|
||||
#endif // NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT
|
||||
|
||||
#endif // __NV_LIST_HELPERS_H__
|
||||
|
||||
@@ -29,17 +29,12 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/sched.h> /* signal_pending, cond_resched */
|
||||
#include <linux/semaphore.h>
|
||||
|
||||
#if defined(NV_LINUX_SCHED_SIGNAL_H_PRESENT)
|
||||
#include <linux/sched/signal.h> /* signal_pending for kernels >= 4.11 */
|
||||
#endif
|
||||
|
||||
#if defined(NV_LINUX_SEMAPHORE_H_PRESENT)
|
||||
#include <linux/semaphore.h>
|
||||
#else
|
||||
#include <asm/semaphore.h>
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL)
|
||||
typedef raw_spinlock_t nv_spinlock_t;
|
||||
#define NV_SPIN_LOCK_INIT(lock) raw_spin_lock_init(lock)
|
||||
@@ -62,20 +57,7 @@ typedef spinlock_t nv_spinlock_t;
|
||||
#define NV_SPIN_UNLOCK_WAIT(lock) spin_unlock_wait(lock)
|
||||
#endif
|
||||
|
||||
#if defined(NV_CONFIG_PREEMPT_RT)
|
||||
#define NV_INIT_SEMA(sema, val) sema_init(sema,val)
|
||||
#else
|
||||
#if !defined(__SEMAPHORE_INITIALIZER) && defined(__COMPAT_SEMAPHORE_INITIALIZER)
|
||||
#define __SEMAPHORE_INITIALIZER __COMPAT_SEMAPHORE_INITIALIZER
|
||||
#endif
|
||||
#define NV_INIT_SEMA(sema, val) \
|
||||
{ \
|
||||
struct semaphore __sema = \
|
||||
__SEMAPHORE_INITIALIZER(*(sema), val); \
|
||||
*(sema) = __sema; \
|
||||
}
|
||||
#endif
|
||||
#define NV_INIT_MUTEX(mutex) NV_INIT_SEMA(mutex, 1)
|
||||
#define NV_INIT_MUTEX(mutex) sema_init(mutex, 1)
|
||||
|
||||
static inline int nv_down_read_interruptible(struct rw_semaphore *lock)
|
||||
{
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -29,6 +29,34 @@
|
||||
typedef int vm_fault_t;
|
||||
#endif
|
||||
|
||||
/* pin_user_pages
|
||||
* Presence of pin_user_pages() also implies the presence of unpin-user_page().
|
||||
* Both were added in the v5.6-rc1
|
||||
*
|
||||
* pin_user_pages() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6-rc1 (2020-01-30)
|
||||
*
|
||||
* Removed vmas parameter from pin_user_pages() by commit 40896a02751
|
||||
* ("mm/gup: remove vmas parameter from pin_user_pages()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-17)
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#if defined(NV_PIN_USER_PAGES_PRESENT)
|
||||
#if defined(NV_PIN_USER_PAGES_HAS_ARGS_VMAS)
|
||||
#define NV_PIN_USER_PAGES pin_user_pages
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES(start, nr_pages, gup_flags, pages, vmas) \
|
||||
pin_user_pages(start, nr_pages, gup_flags, pages)
|
||||
#endif // NV_PIN_USER_PAGES_HAS_ARGS_VMAS
|
||||
#define NV_UNPIN_USER_PAGE unpin_user_page
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES NV_GET_USER_PAGES
|
||||
#define NV_UNPIN_USER_PAGE put_page
|
||||
#endif // NV_PIN_USER_PAGES_PRESENT
|
||||
|
||||
/* get_user_pages
|
||||
*
|
||||
* The 8-argument version of get_user_pages was deprecated by commit
|
||||
@@ -45,53 +73,73 @@ typedef int vm_fault_t;
|
||||
* commit 8e50b8b07f462ab4b91bc1491b1c91bd75e4ad40 which cherry-picked the
|
||||
* replacement of the write and force parameters with gup_flags
|
||||
*
|
||||
* Removed vmas parameter from get_user_pages() by commit 7bbf9c8c99
|
||||
* ("mm/gup: remove unused vmas parameter from get_user_pages()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-17)
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE)
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages, vmas) \
|
||||
get_user_pages(start, nr_pages, flags, pages)
|
||||
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS)
|
||||
#define NV_GET_USER_PAGES get_user_pages
|
||||
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, write, force, pages, vmas) \
|
||||
get_user_pages(current, current->mm, start, nr_pages, write, force, pages, vmas)
|
||||
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS_VMAS)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages, vmas) \
|
||||
get_user_pages(current, current->mm, start, nr_pages, flags, pages, vmas)
|
||||
#else
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
static inline long NV_GET_USER_PAGES(unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS)
|
||||
return get_user_pages(current, current->mm, start, nr_pages, flags,
|
||||
pages, vmas);
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE_VMAS)
|
||||
return get_user_pages(start, nr_pages, write, force, pages, vmas);
|
||||
#else
|
||||
// remaining defination(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
|
||||
return get_user_pages(start, nr_pages, flags, pages, vmas);
|
||||
#endif
|
||||
// NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS
|
||||
return get_user_pages(current, current->mm, start, nr_pages, write,
|
||||
force, pages, vmas);
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE_VMAS
|
||||
}
|
||||
#endif
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS
|
||||
|
||||
/* pin_user_pages_remote
|
||||
*
|
||||
* pin_user_pages_remote() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6 (2020-01-30)
|
||||
*
|
||||
* pin_user_pages_remote() removed 'tsk' parameter by commit
|
||||
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
|
||||
* in v5.9-rc1 (2020-08-11). *
|
||||
*
|
||||
* Removed unused vmas parameter from pin_user_pages_remote() by commit
|
||||
* 83bcc2e132("mm/gup: remove unused vmas parameter from pin_user_pages_remote()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-14)
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(NV_PIN_USER_PAGES_REMOTE_PRESENT)
|
||||
#if defined(NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK_VMAS)
|
||||
#define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
pin_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
|
||||
#elif defined(NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_VMAS)
|
||||
#define NV_PIN_USER_PAGES_REMOTE pin_user_pages_remote
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
pin_user_pages_remote(mm, start, nr_pages, flags, pages, locked)
|
||||
#endif // NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK_VMAS
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES_REMOTE NV_GET_USER_PAGES_REMOTE
|
||||
#endif // NV_PIN_USER_PAGES_REMOTE_PRESENT
|
||||
|
||||
/*
|
||||
* get_user_pages_remote() was added by commit 1e9877902dc7
|
||||
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6 (2016-02-12).
|
||||
*
|
||||
* The very next commit cde70140fed8 ("mm/gup: Overload get_user_pages()
|
||||
* functions") deprecated the 8-argument version of get_user_pages for the
|
||||
* non-remote case (calling get_user_pages with current and current->mm).
|
||||
*
|
||||
* The guidelines are: call NV_GET_USER_PAGES_REMOTE if you need the 8-argument
|
||||
* version that uses something other than current and current->mm. Use
|
||||
* NV_GET_USER_PAGES if you are refering to current and current->mm.
|
||||
*
|
||||
* Note that get_user_pages_remote() requires the caller to hold a reference on
|
||||
* the task_struct (if non-NULL and if this API has tsk argument) and the mm_struct.
|
||||
* This will always be true when using current and current->mm. If the kernel passes
|
||||
@@ -110,69 +158,66 @@ typedef int vm_fault_t;
|
||||
* commit 64019a2e467a ("mm/gup: remove task_struct pointer for
|
||||
* all gup code") in v5.9-rc1 (2020-08-11).
|
||||
*
|
||||
* Removed vmas parameter from get_user_pages_remote() by commit a4bde14d549
|
||||
* ("mm/gup: remove vmas parameter from get_user_pages_remote()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-14)
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_PRESENT)
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(mm, start, nr_pages, flags, pages, locked)
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED_VMAS)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED_VMAS)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_VMAS)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas)
|
||||
|
||||
#else
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk,
|
||||
struct mm_struct *mm,
|
||||
// NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE_VMAS
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
struct vm_area_struct **vmas,
|
||||
int *locked)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS)
|
||||
return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
|
||||
return get_user_pages_remote(NULL, mm, start, nr_pages, write, force,
|
||||
pages, vmas);
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED)
|
||||
return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
|
||||
pages, vmas, NULL);
|
||||
#else
|
||||
// remaining defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
|
||||
return get_user_pages_remote(mm, start, nr_pages, flags,
|
||||
pages, vmas, NULL);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif // NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED
|
||||
#else
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages
|
||||
#else
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk,
|
||||
struct mm_struct *mm,
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS)
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
struct vm_area_struct **vmas,
|
||||
int *locked)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
return get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
|
||||
return get_user_pages(NULL, mm, start, nr_pages, write, force, pages, vmas);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages(NULL, mm, start, nr_pages, flags, pages, vmas)
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS
|
||||
#endif // NV_GET_USER_PAGES_REMOTE_PRESENT
|
||||
|
||||
/*
|
||||
* The .virtual_address field was effectively renamed to .address, by these
|
||||
@@ -247,4 +292,22 @@ static inline struct rw_semaphore *nv_mmap_get_lock(struct mm_struct *mm)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
{
|
||||
#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
vm_flags_set(vma, flags);
|
||||
#else
|
||||
vma->vm_flags |= flags;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
{
|
||||
#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
vm_flags_clear(vma, flags);
|
||||
#else
|
||||
vma->vm_flags &= ~flags;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // __NV_MM_H__
|
||||
|
||||
@@ -27,16 +27,8 @@
|
||||
#include <linux/pci.h>
|
||||
#include "nv-linux.h"
|
||||
|
||||
#if defined(NV_DEV_IS_PCI_PRESENT)
|
||||
#define nv_dev_is_pci(dev) dev_is_pci(dev)
|
||||
#else
|
||||
/*
|
||||
* Non-PCI devices are only supported on kernels which expose the
|
||||
* dev_is_pci() function. For older kernels, we only support PCI
|
||||
* devices, hence returning true to take all the PCI code paths.
|
||||
*/
|
||||
#define nv_dev_is_pci(dev) (true)
|
||||
#endif
|
||||
#define NV_GPU_BAR1 1
|
||||
#define NV_GPU_BAR3 3
|
||||
|
||||
int nv_pci_register_driver(void);
|
||||
void nv_pci_unregister_driver(void);
|
||||
|
||||
@@ -78,13 +78,8 @@ static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)
|
||||
|
||||
#define NV_PGPROT_UNCACHED_DEVICE(old_prot) pgprot_noncached(old_prot)
|
||||
#if defined(NVCPU_AARCH64)
|
||||
#if defined(NV_MT_DEVICE_GRE_PRESENT)
|
||||
#define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \
|
||||
PTE_ATTRINDX(MT_DEVICE_GRE))
|
||||
#else
|
||||
#define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \
|
||||
PTE_ATTRINDX(MT_DEVICE_nGnRE))
|
||||
#endif
|
||||
#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot) \
|
||||
__pgprot_modify(old_prot, PTE_ATTRINDX_MASK, NV_PROT_WRITE_COMBINED_DEVICE)
|
||||
#define NV_PGPROT_WRITE_COMBINED(old_prot) NV_PGPROT_UNCACHED(old_prot)
|
||||
|
||||
@@ -74,21 +74,8 @@ typedef struct file_operations nv_proc_ops_t;
|
||||
__entry; \
|
||||
})
|
||||
|
||||
/*
|
||||
* proc_mkdir_mode exists in Linux 2.6.9, but isn't exported until Linux 3.0.
|
||||
* Use the older interface instead unless the newer interface is necessary.
|
||||
*/
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
# define NV_PROC_MKDIR_MODE(name, mode, parent) \
|
||||
proc_mkdir_mode(name, mode, parent)
|
||||
#else
|
||||
# define NV_PROC_MKDIR_MODE(name, mode, parent) \
|
||||
({ \
|
||||
struct proc_dir_entry *__entry; \
|
||||
__entry = create_proc_entry(name, mode, parent); \
|
||||
__entry; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#define NV_CREATE_PROC_DIR(name,parent) \
|
||||
({ \
|
||||
@@ -104,16 +91,6 @@ typedef struct file_operations nv_proc_ops_t;
|
||||
#define NV_PDE_DATA(inode) PDE_DATA(inode)
|
||||
#endif
|
||||
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
# define NV_REMOVE_PROC_ENTRY(entry) \
|
||||
proc_remove(entry);
|
||||
#else
|
||||
# define NV_REMOVE_PROC_ENTRY(entry) \
|
||||
remove_proc_entry(entry->name, entry->parent);
|
||||
#endif
|
||||
|
||||
void nv_procfs_unregister_all(struct proc_dir_entry *entry,
|
||||
struct proc_dir_entry *delimiter);
|
||||
#define NV_DEFINE_SINGLE_PROCFS_FILE_HELPER(name, lock) \
|
||||
static int nv_procfs_open_##name( \
|
||||
struct inode *inode, \
|
||||
|
||||
@@ -54,8 +54,6 @@ void nv_free_contig_pages (nv_alloc_t *);
|
||||
NV_STATUS nv_alloc_system_pages (nv_state_t *, nv_alloc_t *);
|
||||
void nv_free_system_pages (nv_alloc_t *);
|
||||
|
||||
void nv_address_space_init_once (struct address_space *mapping);
|
||||
|
||||
int nv_uvm_init (void);
|
||||
void nv_uvm_exit (void);
|
||||
NV_STATUS nv_uvm_suspend (void);
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
#include <nvstatus.h>
|
||||
#include "nv_stdarg.h"
|
||||
#include <nv-caps.h>
|
||||
#include <nv-firmware.h>
|
||||
#include <nv-ioctl.h>
|
||||
#include <nvmisc.h>
|
||||
|
||||
@@ -160,8 +161,14 @@ typedef enum _TEGRASOC_WHICH_CLK
|
||||
TEGRASOC_WHICH_CLK_MAUD,
|
||||
TEGRASOC_WHICH_CLK_AZA_2XBIT,
|
||||
TEGRASOC_WHICH_CLK_AZA_BIT,
|
||||
TEGRA234_CLK_MIPI_CAL,
|
||||
TEGRA234_CLK_UART_FST_MIPI_CAL,
|
||||
TEGRASOC_WHICH_CLK_MIPI_CAL,
|
||||
TEGRASOC_WHICH_CLK_UART_FST_MIPI_CAL,
|
||||
TEGRASOC_WHICH_CLK_SOR0_DIV,
|
||||
TEGRASOC_WHICH_CLK_DISP_ROOT,
|
||||
TEGRASOC_WHICH_CLK_HUB_ROOT,
|
||||
TEGRASOC_WHICH_CLK_PLLA_DISP,
|
||||
TEGRASOC_WHICH_CLK_PLLA_DISPHUB,
|
||||
TEGRASOC_WHICH_CLK_PLLA,
|
||||
TEGRASOC_WHICH_CLK_MAX, // TEGRASOC_WHICH_CLK_MAX is defined for boundary checks only.
|
||||
} TEGRASOC_WHICH_CLK;
|
||||
|
||||
@@ -304,10 +311,11 @@ typedef struct nv_alloc_mapping_context_s {
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_SOC_IRQ_DISPLAY_TYPE,
|
||||
NV_SOC_IRQ_DISPLAY_TYPE = 0x1,
|
||||
NV_SOC_IRQ_DPAUX_TYPE,
|
||||
NV_SOC_IRQ_GPIO_TYPE,
|
||||
NV_SOC_IRQ_HDACODEC_TYPE,
|
||||
NV_SOC_IRQ_TCPC2DISP_TYPE,
|
||||
NV_SOC_IRQ_INVALID_TYPE
|
||||
} nv_soc_irq_type_t;
|
||||
|
||||
@@ -322,6 +330,7 @@ typedef struct nv_soc_irq_info_s {
|
||||
NvU32 gpio_num;
|
||||
NvU32 dpaux_instance;
|
||||
} irq_data;
|
||||
NvS32 ref_count;
|
||||
} nv_soc_irq_info_t;
|
||||
|
||||
#define NV_MAX_SOC_IRQS 6
|
||||
@@ -338,6 +347,12 @@ typedef struct nv_soc_irq_info_s {
|
||||
/* DMA-capable device data, defined by kernel interface layer */
|
||||
typedef struct nv_dma_device nv_dma_device_t;
|
||||
|
||||
typedef struct nv_phys_addr_range
|
||||
{
|
||||
NvU64 addr;
|
||||
NvU64 len;
|
||||
} nv_phys_addr_range_t;
|
||||
|
||||
typedef struct nv_state_t
|
||||
{
|
||||
void *priv; /* private data */
|
||||
@@ -368,6 +383,7 @@ typedef struct nv_state_t
|
||||
nv_aperture_t *mipical_regs;
|
||||
nv_aperture_t *fb, ud;
|
||||
nv_aperture_t *simregs;
|
||||
nv_aperture_t *emc_regs;
|
||||
|
||||
NvU32 num_dpaux_instance;
|
||||
NvU32 interrupt_line;
|
||||
@@ -376,9 +392,11 @@ typedef struct nv_state_t
|
||||
NvS32 current_soc_irq;
|
||||
NvU32 num_soc_irqs;
|
||||
NvU32 hdacodec_irq;
|
||||
NvU32 tcpc2disp_irq;
|
||||
NvU8 *soc_dcb_blob;
|
||||
NvU32 soc_dcb_size;
|
||||
NvU32 disp_sw_soc_chip_id;
|
||||
NvBool soc_is_dpalt_mode_supported;
|
||||
|
||||
NvU32 igpu_stall_irq[NV_IGPU_MAX_STALL_IRQS];
|
||||
NvU32 igpu_nonstall_irq;
|
||||
@@ -430,9 +448,6 @@ typedef struct nv_state_t
|
||||
/* Variable to force allocation of 32-bit addressable memory */
|
||||
NvBool force_dma32_alloc;
|
||||
|
||||
/* Variable to track if device has entered dynamic power state */
|
||||
NvBool dynamic_power_entered;
|
||||
|
||||
/* PCI power state should be D0 during system suspend */
|
||||
NvBool d0_state_in_suspend;
|
||||
|
||||
@@ -457,6 +472,9 @@ typedef struct nv_state_t
|
||||
/* Bool to check if ISO iommu enabled */
|
||||
NvBool iso_iommu_present;
|
||||
|
||||
/* Bool to check if NISO iommu enabled */
|
||||
NvBool niso_iommu_present;
|
||||
|
||||
/* Bool to check if dma-buf is supported */
|
||||
NvBool dma_buf_supported;
|
||||
|
||||
@@ -465,6 +483,11 @@ typedef struct nv_state_t
|
||||
/* Check if NVPCF DSM function is implemented under NVPCF or GPU device scope */
|
||||
NvBool nvpcf_dsm_in_gpu_scope;
|
||||
|
||||
/* Bool to check if the device received a shutdown notification */
|
||||
NvBool is_shutdown;
|
||||
|
||||
/* Bool to check if the GPU has a coherent sysmem link */
|
||||
NvBool coherent;
|
||||
} nv_state_t;
|
||||
|
||||
// These define need to be in sync with defines in system.h
|
||||
@@ -473,6 +496,10 @@ typedef struct nv_state_t
|
||||
#define OS_TYPE_SUNOS 0x3
|
||||
#define OS_TYPE_VMWARE 0x4
|
||||
|
||||
#define NVFP_TYPE_NONE 0x0
|
||||
#define NVFP_TYPE_REFCOUNTED 0x1
|
||||
#define NVFP_TYPE_REGISTERED 0x2
|
||||
|
||||
struct nv_file_private_t
|
||||
{
|
||||
NvHandle *handles;
|
||||
@@ -482,12 +509,21 @@ struct nv_file_private_t
|
||||
|
||||
nv_file_private_t *ctl_nvfp;
|
||||
void *ctl_nvfp_priv;
|
||||
NvU32 register_or_refcount;
|
||||
|
||||
//
|
||||
// True if a client or an event was ever allocated on this fd.
|
||||
// If false, RMAPI cleanup is skipped.
|
||||
//
|
||||
NvBool bCleanupRmapi;
|
||||
};
|
||||
|
||||
// Forward define the gpu ops structures
|
||||
typedef struct gpuSession *nvgpuSessionHandle_t;
|
||||
typedef struct gpuDevice *nvgpuDeviceHandle_t;
|
||||
typedef struct gpuAddressSpace *nvgpuAddressSpaceHandle_t;
|
||||
typedef struct gpuTsg *nvgpuTsgHandle_t;
|
||||
typedef struct UvmGpuTsgAllocParams_tag nvgpuTsgAllocParams_t;
|
||||
typedef struct gpuChannel *nvgpuChannelHandle_t;
|
||||
typedef struct UvmGpuChannelInfo_tag *nvgpuChannelInfo_t;
|
||||
typedef struct UvmGpuChannelAllocParams_tag nvgpuChannelAllocParams_t;
|
||||
@@ -513,8 +549,9 @@ typedef struct UvmGpuChannelResourceBindParams_tag *nvgpuChannelResourceBindPar
|
||||
typedef struct UvmGpuPagingChannelAllocParams_tag nvgpuPagingChannelAllocParams_t;
|
||||
typedef struct UvmGpuPagingChannel_tag *nvgpuPagingChannelHandle_t;
|
||||
typedef struct UvmGpuPagingChannelInfo_tag *nvgpuPagingChannelInfo_t;
|
||||
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU32, NvU64 *, NvU32, NvU64, NvU64);
|
||||
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64);
|
||||
typedef enum UvmPmaGpuMemoryType_tag nvgpuGpuMemoryType_t;
|
||||
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU64, NvU64 *, NvU32, NvU64, NvU64, nvgpuGpuMemoryType_t);
|
||||
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemoryType_t);
|
||||
|
||||
/*
|
||||
* flags
|
||||
@@ -566,12 +603,6 @@ typedef enum
|
||||
NV_POWER_STATE_RUNNING
|
||||
} nv_power_state_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_GSP,
|
||||
NV_FIRMWARE_GSP_LOG
|
||||
} nv_firmware_t;
|
||||
|
||||
#define NV_PRIMARY_VGA(nv) ((nv)->primary_vga)
|
||||
|
||||
#define NV_IS_CTL_DEVICE(nv) ((nv)->flags & NV_FLAG_CONTROL)
|
||||
@@ -587,12 +618,8 @@ typedef enum
|
||||
#define NV_SOC_IS_ISO_IOMMU_PRESENT(nv) \
|
||||
((nv)->iso_iommu_present)
|
||||
|
||||
/*
|
||||
* NVIDIA ACPI event ID to be passed into the core NVIDIA driver for
|
||||
* AC/DC event.
|
||||
*/
|
||||
#define NV_SYSTEM_ACPI_BATTERY_POWER_EVENT 0x8002
|
||||
|
||||
#define NV_SOC_IS_NISO_IOMMU_PRESENT(nv) \
|
||||
((nv)->niso_iommu_present)
|
||||
/*
|
||||
* GPU add/remove events
|
||||
*/
|
||||
@@ -604,8 +631,6 @@ typedef enum
|
||||
* to core NVIDIA driver for ACPI events.
|
||||
*/
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_DISPLAY_SWITCH_DEFAULT 0
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_POWER_EVENT_AC 0
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_POWER_EVENT_BATTERY 1
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_DOCK_EVENT_UNDOCKED 0
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_DOCK_EVENT_DOCKED 1
|
||||
|
||||
@@ -616,14 +641,18 @@ typedef enum
|
||||
#define NV_EVAL_ACPI_METHOD_NVIF 0x01
|
||||
#define NV_EVAL_ACPI_METHOD_WMMX 0x02
|
||||
|
||||
#define NV_I2C_CMD_READ 1
|
||||
#define NV_I2C_CMD_WRITE 2
|
||||
#define NV_I2C_CMD_SMBUS_READ 3
|
||||
#define NV_I2C_CMD_SMBUS_WRITE 4
|
||||
#define NV_I2C_CMD_SMBUS_QUICK_WRITE 5
|
||||
#define NV_I2C_CMD_SMBUS_QUICK_READ 6
|
||||
#define NV_I2C_CMD_SMBUS_BLOCK_READ 7
|
||||
#define NV_I2C_CMD_SMBUS_BLOCK_WRITE 8
|
||||
typedef enum {
|
||||
NV_I2C_CMD_READ = 1,
|
||||
NV_I2C_CMD_WRITE,
|
||||
NV_I2C_CMD_SMBUS_READ,
|
||||
NV_I2C_CMD_SMBUS_WRITE,
|
||||
NV_I2C_CMD_SMBUS_QUICK_WRITE,
|
||||
NV_I2C_CMD_SMBUS_QUICK_READ,
|
||||
NV_I2C_CMD_SMBUS_BLOCK_READ,
|
||||
NV_I2C_CMD_SMBUS_BLOCK_WRITE,
|
||||
NV_I2C_CMD_BLOCK_READ,
|
||||
NV_I2C_CMD_BLOCK_WRITE
|
||||
} nv_i2c_cmd_t;
|
||||
|
||||
// Flags needed by OSAllocPagesNode
|
||||
#define NV_ALLOC_PAGES_NODE_NONE 0x0
|
||||
@@ -636,27 +665,38 @@ typedef enum
|
||||
#define NV_GET_NV_STATE(pGpu) \
|
||||
(nv_state_t *)((pGpu) ? (pGpu)->pOsGpuInfo : NULL)
|
||||
|
||||
#define IS_REG_OFFSET(nv, offset, length) \
|
||||
(((offset) >= (nv)->regs->cpu_address) && \
|
||||
(((offset) + ((length)-1)) <= \
|
||||
(nv)->regs->cpu_address + ((nv)->regs->size-1)))
|
||||
static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((offset >= nv->regs->cpu_address) &&
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
|
||||
}
|
||||
|
||||
#define IS_FB_OFFSET(nv, offset, length) \
|
||||
(((nv)->fb) && ((offset) >= (nv)->fb->cpu_address) && \
|
||||
(((offset) + ((length)-1)) <= (nv)->fb->cpu_address + ((nv)->fb->size-1)))
|
||||
static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((nv->fb) && (nv->fb->size != 0) &&
|
||||
(offset >= nv->fb->cpu_address) &&
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
|
||||
}
|
||||
|
||||
#define IS_UD_OFFSET(nv, offset, length) \
|
||||
(((nv)->ud.cpu_address != 0) && ((nv)->ud.size != 0) && \
|
||||
((offset) >= (nv)->ud.cpu_address) && \
|
||||
(((offset) + ((length)-1)) <= (nv)->ud.cpu_address + ((nv)->ud.size-1)))
|
||||
static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
|
||||
(offset >= nv->ud.cpu_address) &&
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
|
||||
}
|
||||
|
||||
#define IS_IMEM_OFFSET(nv, offset, length) \
|
||||
(((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) && \
|
||||
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) && \
|
||||
((offset) >= (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) && \
|
||||
(((offset) + ((length) - 1)) <= \
|
||||
(nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + \
|
||||
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))
|
||||
static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
|
||||
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
|
||||
(offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
|
||||
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
|
||||
}
|
||||
|
||||
#define NV_RM_MAX_MSIX_LINES 8
|
||||
|
||||
@@ -725,7 +765,7 @@ nv_state_t* NV_API_CALL nv_get_ctl_state (void);
|
||||
void NV_API_CALL nv_set_dma_address_size (nv_state_t *, NvU32 );
|
||||
|
||||
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU32, NvU32, NvU64, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvBool, NvU32, NvBool, NvBool, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_free_pages (nv_state_t *, NvU32, NvBool, NvU32, void *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_register_user_pages (nv_state_t *, NvU64, NvU64 *, void *, void **);
|
||||
@@ -787,13 +827,14 @@ NV_STATUS NV_API_CALL nv_pci_trigger_recovery (nv_state_t *);
|
||||
NvBool NV_API_CALL nv_requires_dma_remap (nv_state_t *);
|
||||
|
||||
NvBool NV_API_CALL nv_is_rm_firmware_active(nv_state_t *);
|
||||
const void*NV_API_CALL nv_get_firmware(nv_state_t *, nv_firmware_t, const void **, NvU32 *);
|
||||
const void*NV_API_CALL nv_get_firmware(nv_state_t *, nv_firmware_type_t, nv_firmware_chip_family_t, const void **, NvU32 *);
|
||||
void NV_API_CALL nv_put_firmware(const void *);
|
||||
|
||||
nv_file_private_t* NV_API_CALL nv_get_file_private(NvS32, NvBool, void **);
|
||||
void NV_API_CALL nv_put_file_private(void *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU32 *, NvS32 *);
|
||||
NV_STATUS NV_API_CALL nv_get_egm_info(nv_state_t *, NvU64 *, NvU64 *, NvS32 *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_ibmnpu_genreg_info(nv_state_t *, NvU64 *, NvU64 *, void**);
|
||||
NV_STATUS NV_API_CALL nv_get_ibmnpu_relaxed_ordering_mode(nv_state_t *nv, NvBool *mode);
|
||||
@@ -828,6 +869,7 @@ NV_STATUS NV_API_CALL nv_acquire_fabric_mgmt_cap (int, int*);
|
||||
int NV_API_CALL nv_cap_drv_init(void);
|
||||
void NV_API_CALL nv_cap_drv_exit(void);
|
||||
NvBool NV_API_CALL nv_is_gpu_accessible(nv_state_t *);
|
||||
NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
|
||||
|
||||
NvU32 NV_API_CALL nv_get_os_type(void);
|
||||
|
||||
@@ -900,6 +942,7 @@ NV_STATUS NV_API_CALL rm_write_registry_string (nvidia_stack_t *, nv_state_t *
|
||||
void NV_API_CALL rm_parse_option_string (nvidia_stack_t *, const char *);
|
||||
char* NV_API_CALL rm_remove_spaces (const char *);
|
||||
char* NV_API_CALL rm_string_token (char **, const char);
|
||||
void NV_API_CALL rm_vgpu_vfio_set_driver_vm(nvidia_stack_t *, NvBool);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_run_rc_callback (nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_execute_work_item (nvidia_stack_t *, void *);
|
||||
@@ -916,11 +959,13 @@ NvBool NV_API_CALL rm_is_supported_pci_device(NvU8 pci_class,
|
||||
|
||||
void NV_API_CALL rm_i2c_remove_adapters (nvidia_stack_t *, nv_state_t *);
|
||||
NvBool NV_API_CALL rm_i2c_is_smbus_capable (nvidia_stack_t *, nv_state_t *, void *);
|
||||
NV_STATUS NV_API_CALL rm_i2c_transfer (nvidia_stack_t *, nv_state_t *, void *, NvU8, NvU8, NvU8, NvU32, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_i2c_transfer (nvidia_stack_t *, nv_state_t *, void *, nv_i2c_cmd_t, NvU8, NvU8, NvU32, NvU8 *);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_perform_version_check (nvidia_stack_t *, void *, NvU32);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_system_event (nvidia_stack_t *, NvU32, NvU32);
|
||||
void NV_API_CALL rm_power_source_change_event (nvidia_stack_t *, NvU32);
|
||||
|
||||
void NV_API_CALL rm_request_dnotifier_state (nvidia_stack_t *, nv_state_t *);
|
||||
|
||||
void NV_API_CALL rm_disable_gpu_state_persistence (nvidia_stack_t *sp, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU64 *, NvU64 *, NvU64 *, NvU64 *, NvU64, NvU64, NvU64, NvU64, void (*)(void *), void *);
|
||||
@@ -931,19 +976,19 @@ NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, N
|
||||
NV_STATUS NV_API_CALL rm_p2p_register_callback (nvidia_stack_t *, NvU64, NvU64, NvU64, void *, void (*)(void *), void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU32, NvU32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
|
||||
void NV_API_CALL rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64, NvU64 *);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_get_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle *, NvHandle *, NvHandle *, void **);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64, void *, nv_phys_addr_range_t **, NvU32 *);
|
||||
void NV_API_CALL rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, nv_phys_addr_range_t **, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_get_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle *, NvHandle *, NvHandle *, void **, NvBool *);
|
||||
void NV_API_CALL rm_dma_buf_put_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, void *);
|
||||
NV_STATUS NV_API_CALL rm_log_gpu_crash (nv_stack_t *, nv_state_t *);
|
||||
|
||||
void NV_API_CALL rm_kernel_rmapi_op(nvidia_stack_t *sp, void *ops_cmd);
|
||||
NvBool NV_API_CALL rm_get_device_remove_flag(nvidia_stack_t *sp, NvU32 gpu_id);
|
||||
NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults_unlocked(nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_handle_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
NvBool NV_API_CALL rm_gpu_need_4k_page_isolation(nv_state_t *);
|
||||
NvBool NV_API_CALL rm_is_chipset_io_coherent(nv_stack_t *);
|
||||
NvBool NV_API_CALL rm_init_event_locks(nvidia_stack_t *, nv_state_t *);
|
||||
@@ -969,12 +1014,14 @@ const char* NV_API_CALL rm_get_dynamic_power_management_status(nvidia_stack_t *,
|
||||
const char* NV_API_CALL rm_get_gpu_gcx_support(nvidia_stack_t *, nv_state_t *, NvBool);
|
||||
|
||||
void NV_API_CALL rm_acpi_notify(nvidia_stack_t *, nv_state_t *, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_get_clientnvpcf_power_limits(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *);
|
||||
void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
|
||||
|
||||
NvBool NV_API_CALL rm_is_altstack_in_use(void);
|
||||
|
||||
/* vGPU VFIO specific functions */
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32, NvBool *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 **, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32);
|
||||
@@ -987,6 +1034,10 @@ NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_acces
|
||||
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);
|
||||
void NV_API_CALL nv_flush_coherent_cpu_cache_range(nv_state_t *nv, NvU64 cpu_virtual, NvU64 size);
|
||||
|
||||
#if defined(NV_VMWARE)
|
||||
const void* NV_API_CALL rm_get_firmware(nv_firmware_type_t fw_type, const void **fw_buf, NvU32 *fw_size);
|
||||
#endif
|
||||
|
||||
/* Callbacks should occur roughly every 10ms. */
|
||||
#define NV_SNAPSHOT_TIMER_HZ 100
|
||||
void NV_API_CALL nv_start_snapshot_timer(void (*snapshot_callback)(void *context));
|
||||
@@ -998,6 +1049,16 @@ static inline const NvU8 *nv_get_cached_uuid(nv_state_t *nv)
|
||||
return nv->nv_uuid_cache.valid ? nv->nv_uuid_cache.uuid : NULL;
|
||||
}
|
||||
|
||||
/* nano second resolution timer callback structure */
|
||||
typedef struct nv_nano_timer nv_nano_timer_t;
|
||||
|
||||
/* nano timer functions */
|
||||
void NV_API_CALL nv_create_nano_timer(nv_state_t *, void *pTmrEvent, nv_nano_timer_t **);
|
||||
void NV_API_CALL nv_start_nano_timer(nv_state_t *nv, nv_nano_timer_t *, NvU64 timens);
|
||||
NV_STATUS NV_API_CALL rm_run_nano_timer_callback(nvidia_stack_t *, nv_state_t *, void *pTmrEvent);
|
||||
void NV_API_CALL nv_cancel_nano_timer(nv_state_t *, nv_nano_timer_t *);
|
||||
void NV_API_CALL nv_destroy_nano_timer(nv_state_t *nv, nv_nano_timer_t *);
|
||||
|
||||
#if defined(NVCPU_X86_64)
|
||||
|
||||
static inline NvU64 nv_rdtsc(void)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -327,14 +327,18 @@ NV_STATUS nvUvmInterfaceGetPmaObject(uvmGpuDeviceHandle device,
|
||||
|
||||
// Mirrors pmaEvictPagesCb_t, see its documentation in pma.h.
|
||||
typedef NV_STATUS (*uvmPmaEvictPagesCallback)(void *callbackData,
|
||||
NvU32 pageSize,
|
||||
NvU64 pageSize,
|
||||
NvU64 *pPages,
|
||||
NvU32 count,
|
||||
NvU64 physBegin,
|
||||
NvU64 physEnd);
|
||||
NvU64 physEnd,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type);
|
||||
|
||||
// Mirrors pmaEvictRangeCb_t, see its documentation in pma.h.
|
||||
typedef NV_STATUS (*uvmPmaEvictRangeCallback)(void *callbackData, NvU64 physBegin, NvU64 physEnd);
|
||||
typedef NV_STATUS (*uvmPmaEvictRangeCallback)(void *callbackData,
|
||||
NvU64 physBegin,
|
||||
NvU64 physEnd,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfacePmaRegisterEvictionCallbacks
|
||||
@@ -386,7 +390,7 @@ void nvUvmInterfacePmaUnregisterEvictionCallbacks(void *pPma);
|
||||
*/
|
||||
NV_STATUS nvUvmInterfacePmaAllocPages(void *pPma,
|
||||
NvLength pageCount,
|
||||
NvU32 pageSize,
|
||||
NvU64 pageSize,
|
||||
UvmPmaAllocationOptions *pPmaAllocOptions,
|
||||
NvU64 *pPages);
|
||||
|
||||
@@ -415,7 +419,7 @@ NV_STATUS nvUvmInterfacePmaAllocPages(void *pPma,
|
||||
NV_STATUS nvUvmInterfacePmaPinPages(void *pPma,
|
||||
NvU64 *pPages,
|
||||
NvLength pageCount,
|
||||
NvU32 pageSize,
|
||||
NvU64 pageSize,
|
||||
NvU32 flags);
|
||||
|
||||
/*******************************************************************************
|
||||
@@ -443,7 +447,7 @@ NV_STATUS nvUvmInterfacePmaPinPages(void *pPma,
|
||||
NV_STATUS nvUvmInterfacePmaUnpinPages(void *pPma,
|
||||
NvU64 *pPages,
|
||||
NvLength pageCount,
|
||||
NvU32 pageSize);
|
||||
NvU64 pageSize);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceMemoryFree
|
||||
@@ -484,7 +488,7 @@ void nvUvmInterfaceMemoryFree(uvmGpuAddressSpaceHandle vaSpace,
|
||||
void nvUvmInterfacePmaFreePages(void *pPma,
|
||||
NvU64 *pPages,
|
||||
NvLength pageCount,
|
||||
NvU32 pageSize,
|
||||
NvU64 pageSize,
|
||||
NvU32 flags);
|
||||
|
||||
/*******************************************************************************
|
||||
@@ -503,7 +507,7 @@ void nvUvmInterfacePmaFreePages(void *pPma,
|
||||
NV_STATUS nvUvmInterfaceMemoryCpuMap(uvmGpuAddressSpaceHandle vaSpace,
|
||||
UvmGpuPointer gpuPointer,
|
||||
NvLength length, void **cpuPtr,
|
||||
NvU32 pageSize);
|
||||
NvU64 pageSize);
|
||||
|
||||
/*******************************************************************************
|
||||
uvmGpuMemoryCpuUnmap
|
||||
@@ -513,16 +517,59 @@ NV_STATUS nvUvmInterfaceMemoryCpuMap(uvmGpuAddressSpaceHandle vaSpace,
|
||||
void nvUvmInterfaceMemoryCpuUnMap(uvmGpuAddressSpaceHandle vaSpace,
|
||||
void *cpuPtr);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceTsgAllocate
|
||||
|
||||
This function allocates a Time-Slice Group (TSG).
|
||||
|
||||
allocParams must contain an engineIndex as TSGs need to be bound to an
|
||||
engine type at allocation time. The possible values are [0,
|
||||
UVM_COPY_ENGINE_COUNT_MAX) for CE engine type. Notably only the copy engines
|
||||
that have UvmGpuCopyEngineCaps::supported set to true can be allocated.
|
||||
|
||||
Note that TSG is not supported on all GPU architectures for all engine
|
||||
types, e.g., pre-Volta GPUs only support TSG for the GR/Compute engine type.
|
||||
On devices that do not support HW TSGs on the requested engine, this API is
|
||||
still required, i.e., a TSG handle is required in
|
||||
nvUvmInterfaceChannelAllocate(), due to information stored in it necessary
|
||||
for channel allocation. However, when HW TSGs aren't supported, a TSG handle
|
||||
is essentially a "fake" TSG with no HW scheduling impact.
|
||||
|
||||
tsg is filled with the address of the corresponding TSG handle.
|
||||
|
||||
Arguments:
|
||||
vaSpace[IN] - VA space linked to a client and a device under which
|
||||
the TSG is allocated.
|
||||
allocParams[IN] - structure with allocation settings.
|
||||
tsg[OUT] - pointer to the new TSG handle.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_GENERIC
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
NV_ERR_NO_MEMORY
|
||||
NV_ERR_NOT_SUPPORTED
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceTsgAllocate(uvmGpuAddressSpaceHandle vaSpace,
|
||||
const UvmGpuTsgAllocParams *allocParams,
|
||||
uvmGpuTsgHandle *tsg);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceTsgDestroy
|
||||
|
||||
This function destroys a given TSG.
|
||||
|
||||
Arguments:
|
||||
tsg[IN] - Tsg handle
|
||||
*/
|
||||
void nvUvmInterfaceTsgDestroy(uvmGpuTsgHandle tsg);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceChannelAllocate
|
||||
|
||||
This function will allocate a channel bound to a copy engine
|
||||
This function will allocate a channel bound to a copy engine(CE) or a SEC2
|
||||
engine.
|
||||
|
||||
allocParams must contain an engineIndex as channels need to be bound to an
|
||||
engine type at allocation time. The possible values are [0,
|
||||
UVM_COPY_ENGINE_COUNT_MAX), but notably only the copy engines that have
|
||||
UvmGpuCopyEngineCaps::supported set to true can be allocated. This struct
|
||||
also contains information relative to GPFIFO and GPPut.
|
||||
allocParams contains information relative to GPFIFO and GPPut.
|
||||
|
||||
channel is filled with the address of the corresponding channel handle.
|
||||
|
||||
@@ -532,17 +579,18 @@ void nvUvmInterfaceMemoryCpuUnMap(uvmGpuAddressSpaceHandle vaSpace,
|
||||
Host channel submission doorbell.
|
||||
|
||||
Arguments:
|
||||
vaSpace[IN] - VA space linked to a client and a device under which
|
||||
the channel will be allocated
|
||||
tsg[IN] - Time-Slice Group that the channel will be a member.
|
||||
allocParams[IN] - structure with allocation settings
|
||||
channel[OUT] - pointer to the new channel handle
|
||||
channelInfo[OUT] - structure filled with channel information
|
||||
|
||||
Error codes:
|
||||
NV_ERR_GENERIC
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
NV_ERR_NO_MEMORY
|
||||
NV_ERR_NOT_SUPPORTED
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceChannelAllocate(uvmGpuAddressSpaceHandle vaSpace,
|
||||
NV_STATUS nvUvmInterfaceChannelAllocate(const uvmGpuTsgHandle tsg,
|
||||
const UvmGpuChannelAllocParams *allocParams,
|
||||
uvmGpuChannelHandle *channel,
|
||||
UvmGpuChannelInfo *channelInfo);
|
||||
@@ -550,7 +598,7 @@ NV_STATUS nvUvmInterfaceChannelAllocate(uvmGpuAddressSpaceHandle vaSpace,
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceChannelDestroy
|
||||
|
||||
This function destroys a given channel
|
||||
This function destroys a given channel.
|
||||
|
||||
Arguments:
|
||||
channel[IN] - channel handle
|
||||
@@ -571,7 +619,7 @@ void nvUvmInterfaceChannelDestroy(uvmGpuChannelHandle channel);
|
||||
NV_ERR_NO_MEMORY
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceQueryCaps(uvmGpuDeviceHandle device,
|
||||
UvmGpuCaps * caps);
|
||||
UvmGpuCaps *caps);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceQueryCopyEnginesCaps
|
||||
@@ -917,6 +965,23 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
void *pFaultBuffer,
|
||||
NvU32 *numFaults);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceFlushReplayableFaultBuffer
|
||||
|
||||
This function sends an RPC to GSP in order to flush the HW replayable fault buffer.
|
||||
|
||||
NOTES:
|
||||
- This function DOES NOT acquire the RM API or GPU locks. That is because
|
||||
it is called during fault servicing, which could produce deadlocks.
|
||||
|
||||
Arguments:
|
||||
device[IN] - Device handle associated with the gpu
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceInitAccessCntrInfo
|
||||
|
||||
@@ -925,13 +990,15 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
Arguments:
|
||||
device[IN] - Device handle associated with the gpu
|
||||
pAccessCntrInfo[OUT] - Information provided by RM for access counter handling
|
||||
accessCntrIndex[IN] - Access counter index
|
||||
|
||||
Error codes:
|
||||
NV_ERR_GENERIC
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceInitAccessCntrInfo(uvmGpuDeviceHandle device,
|
||||
UvmGpuAccessCntrInfo *pAccessCntrInfo);
|
||||
UvmGpuAccessCntrInfo *pAccessCntrInfo,
|
||||
NvU32 accessCntrIndex);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceDestroyAccessCntrInfo
|
||||
@@ -1050,11 +1117,13 @@ void nvUvmInterfaceP2pObjectDestroy(uvmGpuSessionHandle session,
|
||||
hMemory[IN] - Memory handle.
|
||||
offset [IN] - Offset from the beginning of the allocation
|
||||
where PTE mappings should begin.
|
||||
Should be aligned with pagesize associated
|
||||
Should be aligned with mappingPagesize
|
||||
in gpuExternalMappingInfo associated
|
||||
with the allocation.
|
||||
size [IN] - Length of the allocation for which PTEs
|
||||
should be built.
|
||||
Should be aligned with pagesize associated
|
||||
Should be aligned with mappingPagesize
|
||||
in gpuExternalMappingInfo associated
|
||||
with the allocation.
|
||||
size = 0 will be interpreted as the total size
|
||||
of the allocation.
|
||||
@@ -1360,8 +1429,6 @@ void nvUvmInterfacePagingChannelsUnmap(uvmGpuAddressSpaceHandle srcVaSpace,
|
||||
a. pre-allocated stack
|
||||
b. the fact that internal RPC infrastructure doesn't acquire GPU lock.
|
||||
Therefore, locking is the caller's responsibility.
|
||||
- This function DOES NOT sleep (does not allocate memory or acquire locks)
|
||||
so it can be invoked while holding a spinlock.
|
||||
|
||||
Arguments:
|
||||
channel[IN] - paging channel handle obtained via
|
||||
@@ -1381,4 +1448,243 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
|
||||
char *methodStream,
|
||||
NvU32 methodStreamSize);
|
||||
|
||||
/*******************************************************************************
|
||||
CSL Interface and Locking
|
||||
|
||||
The following functions do not acquire the RM API or GPU locks and must not be called
|
||||
concurrently with the same UvmCslContext parameter in different threads. The caller must
|
||||
guarantee this exclusion.
|
||||
|
||||
* nvUvmInterfaceCslRotateIv
|
||||
* nvUvmInterfaceCslEncrypt
|
||||
* nvUvmInterfaceCslDecrypt
|
||||
* nvUvmInterfaceCslSign
|
||||
* nvUvmInterfaceCslQueryMessagePool
|
||||
* nvUvmInterfaceCslIncrementIv
|
||||
*/
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslInitContext
|
||||
|
||||
Allocates and initializes a CSL context for a given secure channel.
|
||||
|
||||
The lifetime of the context is the same as the lifetime of the secure channel
|
||||
it is paired with.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
channel[IN] - Handle to a secure channel.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_STATE - The system is not operating in Confidential Compute mode.
|
||||
NV_ERR_INVALID_CHANNEL - The associated channel is not a secure channel.
|
||||
NV_ERR_IN_USE - The context has already been initialized.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
|
||||
uvmGpuChannelHandle channel);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceDeinitCslContext
|
||||
|
||||
Securely deinitializes and clears the contents of a context.
|
||||
|
||||
If context is already deinitialized then function returns immediately.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN] - The CSL context.
|
||||
*/
|
||||
void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslRotateIv
|
||||
|
||||
Rotates the IV for a given channel and operation.
|
||||
|
||||
This function will rotate the IV on both the CPU and the GPU.
|
||||
Outstanding messages that have been encrypted by the GPU should first be
|
||||
decrypted before calling this function with operation equal to
|
||||
UVM_CSL_OPERATION_DECRYPT. Similarly, outstanding messages that have been
|
||||
encrypted by the CPU should first be decrypted before calling this function
|
||||
with operation equal to UVM_CSL_OPERATION_ENCRYPT. For a given operation
|
||||
the channel must be idle before calling this function. This function can be
|
||||
called regardless of the value of the IV's message counter.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
operation[IN] - Either
|
||||
- UVM_CSL_OPERATION_ENCRYPT
|
||||
- UVM_CSL_OPERATION_DECRYPT
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The rotate operation would cause a counter
|
||||
to overflow.
|
||||
NV_ERR_INVALID_ARGUMENT - Invalid value for operation.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslEncrypt
|
||||
|
||||
Encrypts data and produces an authentication tag.
|
||||
|
||||
Auth, input, and output buffers must not overlap. If they do then calling
|
||||
this function produces undefined behavior. Performance is typically
|
||||
maximized when the input and output buffers are 16-byte aligned. This is
|
||||
natural alignment for AES block.
|
||||
The encryptIV can be obtained from nvUvmInterfaceCslIncrementIv.
|
||||
However, it is optional. If it is NULL, the next IV in line will be used.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
bufferSize[IN] - Size of the input and output buffers in
|
||||
units of bytes. Value can range from 1 byte
|
||||
to (2^32) - 1 bytes.
|
||||
inputBuffer[IN] - Address of plaintext input buffer.
|
||||
encryptIv[IN/OUT] - IV to use for encryption. Can be NULL.
|
||||
outputBuffer[OUT] - Address of ciphertext output buffer.
|
||||
authTagBuffer[OUT] - Address of authentication tag buffer.
|
||||
Its size is UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - The size of the data is 0 bytes.
|
||||
- The encryptIv has already been used.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize,
|
||||
NvU8 const *inputBuffer,
|
||||
UvmCslIv *encryptIv,
|
||||
NvU8 *outputBuffer,
|
||||
NvU8 *authTagBuffer);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslDecrypt
|
||||
|
||||
Verifies the authentication tag and decrypts data.
|
||||
|
||||
Auth, input, and output buffers must not overlap. If they do then calling
|
||||
this function produces undefined behavior. Performance is typically
|
||||
maximized when the input and output buffers are 16-byte aligned. This is
|
||||
natural alignment for AES block.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
bufferSize[IN] - Size of the input and output buffers in units of bytes.
|
||||
Value can range from 1 byte to (2^32) - 1 bytes.
|
||||
decryptIv[IN] - IV used to decrypt the ciphertext. Its value can either be given by
|
||||
nvUvmInterfaceCslIncrementIv, or, if NULL, the CSL context's
|
||||
internal counter is used.
|
||||
inputBuffer[IN] - Address of ciphertext input buffer.
|
||||
outputBuffer[OUT] - Address of plaintext output buffer.
|
||||
addAuthData[IN] - Address of the plaintext additional authenticated data used to
|
||||
calculate the authentication tag. Can be NULL.
|
||||
addAuthDataSize[IN] - Size of the additional authenticated data in units of bytes.
|
||||
Value can range from 1 byte to (2^32) - 1 bytes.
|
||||
This parameter is ignored if addAuthData is NULL.
|
||||
authTagBuffer[IN] - Address of authentication tag buffer.
|
||||
Its size is UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The decryption operation would cause a
|
||||
counter overflow to occur.
|
||||
NV_ERR_INVALID_ARGUMENT - The size of the data is 0 bytes.
|
||||
NV_ERR_INVALID_DATA - Verification of the authentication tag fails.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize,
|
||||
NvU8 const *inputBuffer,
|
||||
UvmCslIv const *decryptIv,
|
||||
NvU8 *outputBuffer,
|
||||
NvU8 const *addAuthData,
|
||||
NvU32 addAuthDataSize,
|
||||
NvU8 const *authTagBuffer);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslSign
|
||||
|
||||
Generates an authentication tag for secure work launch.
|
||||
|
||||
Auth and input buffers must not overlap. If they do then calling this function produces
|
||||
undefined behavior.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
bufferSize[IN] - Size of the input buffer in units of bytes.
|
||||
Value can range from 1 byte to (2^32) - 1 bytes.
|
||||
inputBuffer[IN] - Address of plaintext input buffer.
|
||||
authTagBuffer[OUT] - Address of authentication tag buffer.
|
||||
Its size is UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The signing operation would cause a counter overflow to occur.
|
||||
NV_ERR_INVALID_ARGUMENT - The size of the data is 0 bytes.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize,
|
||||
NvU8 const *inputBuffer,
|
||||
NvU8 *authTagBuffer);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslQueryMessagePool
|
||||
|
||||
Returns the number of messages that can be encrypted before the message counter will overflow.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
operation[IN] - Either UVM_CSL_OPERATION_ENCRYPT or UVM_CSL_OPERATION_DECRYPT.
|
||||
messageNum[OUT] - Number of messages left before overflow.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - The value of the operation parameter is illegal.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation,
|
||||
NvU64 *messageNum);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslIncrementIv
|
||||
|
||||
Increments the message counter by the specified amount.
|
||||
|
||||
If iv is non-NULL then the incremented value is returned.
|
||||
If operation is UVM_CSL_OPERATION_ENCRYPT then the returned IV's "freshness" bit is set and
|
||||
can be used in nvUvmInterfaceCslEncrypt. If operation is UVM_CSL_OPERATION_DECRYPT then
|
||||
the returned IV can be used in nvUvmInterfaceCslDecrypt.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
operation[IN] - Either
|
||||
- UVM_CSL_OPERATION_ENCRYPT
|
||||
- UVM_CSL_OPERATION_DECRYPT
|
||||
increment[IN] - The amount by which the IV is incremented. Can be 0.
|
||||
iv[out] - If non-NULL, a buffer to store the incremented IV.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - The value of the operation parameter is illegal.
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - Incrementing the message counter would result
|
||||
in an overflow.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation,
|
||||
NvU64 increment,
|
||||
UvmCslIv *iv);
|
||||
|
||||
#endif // _NV_UVM_INTERFACE_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -92,6 +92,7 @@ typedef unsigned long long UvmGpuPointer;
|
||||
typedef struct uvmGpuSession_tag *uvmGpuSessionHandle; // gpuSessionHandle
|
||||
typedef struct uvmGpuDevice_tag *uvmGpuDeviceHandle; // gpuDeviceHandle
|
||||
typedef struct uvmGpuAddressSpace_tag *uvmGpuAddressSpaceHandle; // gpuAddressSpaceHandle
|
||||
typedef struct uvmGpuTsg_tag *uvmGpuTsgHandle; // gpuTsgHandle
|
||||
typedef struct uvmGpuChannel_tag *uvmGpuChannelHandle; // gpuChannelHandle
|
||||
typedef struct uvmGpuCopyEngine_tag *uvmGpuCopyEngineHandle; // gpuObjectHandle
|
||||
|
||||
@@ -110,7 +111,7 @@ typedef struct UvmGpuMemoryInfo_tag
|
||||
NvBool deviceDescendant;
|
||||
|
||||
// Out: Page size associated with the phys alloc.
|
||||
NvU32 pageSize;
|
||||
NvU64 pageSize;
|
||||
|
||||
// Out: Set to TRUE, if the allocation is contiguous.
|
||||
NvBool contig;
|
||||
@@ -280,6 +281,16 @@ typedef struct UvmGpuChannelInfo_tag
|
||||
// to kick off the new work.
|
||||
//
|
||||
volatile NvU32 *pWorkSubmissionToken;
|
||||
|
||||
// GPU VAs of both GPFIFO and GPPUT are needed in Confidential Computing
|
||||
// so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
|
||||
NvU64 gpFifoGpuVa;
|
||||
NvU64 gpPutGpuVa;
|
||||
NvU64 gpGetGpuVa;
|
||||
// GPU VA of work submission offset is needed in Confidential Computing
|
||||
// so CE channels can ring doorbell of other channels as required for
|
||||
// WLC/LCIC work submission
|
||||
NvU64 workSubmissionOffsetGpuVa;
|
||||
} UvmGpuChannelInfo;
|
||||
|
||||
typedef enum
|
||||
@@ -292,6 +303,17 @@ typedef enum
|
||||
UVM_BUFFER_LOCATION_VID = 2,
|
||||
} UVM_BUFFER_LOCATION;
|
||||
|
||||
typedef struct UvmGpuTsgAllocParams_tag
|
||||
{
|
||||
// Interpreted as UVM_GPU_CHANNEL_ENGINE_TYPE
|
||||
NvU32 engineType;
|
||||
|
||||
// Index of the engine the TSG is bound to.
|
||||
// Ignored if engineType is anything other than
|
||||
// UVM_GPU_CHANNEL_ENGINE_TYPE_CE.
|
||||
NvU32 engineIndex;
|
||||
} UvmGpuTsgAllocParams;
|
||||
|
||||
typedef struct UvmGpuChannelAllocParams_tag
|
||||
{
|
||||
NvU32 numGpFifoEntries;
|
||||
@@ -299,13 +321,6 @@ typedef struct UvmGpuChannelAllocParams_tag
|
||||
// The next two fields store UVM_BUFFER_LOCATION values
|
||||
NvU32 gpFifoLoc;
|
||||
NvU32 gpPutLoc;
|
||||
|
||||
// Index of the engine the channel will be bound to
|
||||
// ignored if engineType is anything other than UVM_GPU_CHANNEL_ENGINE_TYPE_CE
|
||||
NvU32 engineIndex;
|
||||
|
||||
// interpreted as UVM_GPU_CHANNEL_ENGINE_TYPE
|
||||
NvU32 engineType;
|
||||
} UvmGpuChannelAllocParams;
|
||||
|
||||
typedef struct UvmGpuPagingChannelAllocParams_tag
|
||||
@@ -376,40 +391,16 @@ typedef enum
|
||||
|
||||
typedef struct UvmGpuCaps_tag
|
||||
{
|
||||
NvU32 sysmemLink; // UVM_LINK_TYPE
|
||||
NvU32 sysmemLinkRateMBps; // See UvmGpuP2PCapsParams::totalLinkLineRateMBps
|
||||
// If numaEnabled is NV_TRUE, then the system address of allocated GPU
|
||||
// memory can be converted to struct pages. See
|
||||
// UvmGpuInfo::systemMemoryWindowStart.
|
||||
NvBool numaEnabled;
|
||||
NvU32 numaNodeId;
|
||||
|
||||
// On ATS systems, GPUs connected to different CPU sockets can have peer
|
||||
// traffic. They are called indirect peers. However, indirect peers are
|
||||
// mapped using sysmem aperture. In order to disambiguate the location of a
|
||||
// specific memory address, each GPU maps its memory to a different window
|
||||
// in the System Physical Address (SPA) space. The following fields contain
|
||||
// the base + size of such window for the GPU. systemMemoryWindowSize
|
||||
// different than 0 indicates that the window is valid.
|
||||
//
|
||||
// - If the window is valid, then we can map GPU memory to the CPU as
|
||||
// cache-coherent by adding the GPU address to the window start.
|
||||
// - If numaEnabled is NV_TRUE, then we can also convert the system
|
||||
// addresses of allocated GPU memory to struct pages.
|
||||
//
|
||||
// TODO: Bug 1986868: fix window start computation for SIMICS
|
||||
NvU64 systemMemoryWindowStart;
|
||||
NvU64 systemMemoryWindowSize;
|
||||
|
||||
// This tells if the GPU is connected to NVSwitch. On systems with NVSwitch
|
||||
// all GPUs are connected to it. If connectedToSwitch is NV_TRUE,
|
||||
// nvswitchMemoryWindowStart tells the base address for the GPU in the
|
||||
// NVSwitch address space. It is used when creating PTEs of memory mappings
|
||||
// to NVSwitch peers.
|
||||
NvBool connectedToSwitch;
|
||||
NvU64 nvswitchMemoryWindowStart;
|
||||
} UvmGpuCaps;
|
||||
|
||||
typedef struct UvmGpuAddressSpaceInfo_tag
|
||||
{
|
||||
NvU32 bigPageSize;
|
||||
NvU64 bigPageSize;
|
||||
|
||||
NvBool atsEnabled;
|
||||
|
||||
@@ -430,12 +421,14 @@ typedef struct UvmGpuAddressSpaceInfo_tag
|
||||
typedef struct UvmGpuAllocInfo_tag
|
||||
{
|
||||
NvU64 gpuPhysOffset; // Returns gpuPhysOffset if contiguous requested
|
||||
NvU32 pageSize; // default is RM big page size - 64K or 128 K" else use 4K or 2M
|
||||
NvU64 pageSize; // default is RM big page size - 64K or 128 K" else use 4K or 2M
|
||||
NvU64 alignment; // Virtual alignment
|
||||
NvBool bContiguousPhysAlloc; // Flag to request contiguous physical allocation
|
||||
NvBool bMemGrowsDown; // Causes RM to reserve physical heap from top of FB
|
||||
NvBool bPersistentVidmem; // Causes RM to allocate persistent video memory
|
||||
NvHandle hPhysHandle; // Handle for phys allocation either provided or retrieved
|
||||
NvBool bUnprotected; // Allocation to be made in unprotected memory whenever
|
||||
// SEV or GPU CC modes are enabled. Ignored otherwise
|
||||
} UvmGpuAllocInfo;
|
||||
|
||||
typedef enum
|
||||
@@ -516,6 +509,13 @@ typedef struct UvmGpuExternalMappingInfo_tag
|
||||
// In: Size of the buffer to store PTEs (in bytes).
|
||||
NvU64 pteBufferSize;
|
||||
|
||||
// In: Page size for mapping
|
||||
// If this field is passed as 0, the page size
|
||||
// of the allocation is used for mapping.
|
||||
// nvUvmInterfaceGetExternalAllocPtes must pass
|
||||
// this field as zero.
|
||||
NvU64 mappingPageSize;
|
||||
|
||||
// In: Pointer to a buffer to store PTEs.
|
||||
// Out: The interface will fill the buffer with PTEs
|
||||
NvU64 *pteBuffer;
|
||||
@@ -566,8 +566,11 @@ typedef struct UvmPlatformInfo_tag
|
||||
// Out: ATS (Address Translation Services) is supported
|
||||
NvBool atsSupported;
|
||||
|
||||
// Out: AMD SEV (Secure Encrypted Virtualization) is enabled
|
||||
NvBool sevEnabled;
|
||||
// Out: True if HW trusted execution, such as AMD's SEV-SNP or Intel's TDX,
|
||||
// is enabled in the VM, indicating that Confidential Computing must be
|
||||
// also enabled in the GPU(s); these two security features are either both
|
||||
// enabled, or both disabled.
|
||||
NvBool confComputingEnabled;
|
||||
} UvmPlatformInfo;
|
||||
|
||||
typedef struct UvmGpuClientInfo_tag
|
||||
@@ -577,6 +580,20 @@ typedef struct UvmGpuClientInfo_tag
|
||||
NvHandle hSmcPartRef;
|
||||
} UvmGpuClientInfo;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_GPU_CONF_COMPUTE_MODE_NONE,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_APM,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_HCC,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_COUNT
|
||||
} UvmGpuConfComputeMode;
|
||||
|
||||
typedef struct UvmGpuConfComputeCaps_tag
|
||||
{
|
||||
// Out: GPU's confidential compute mode
|
||||
UvmGpuConfComputeMode mode;
|
||||
} UvmGpuConfComputeCaps;
|
||||
|
||||
#define UVM_GPU_NAME_LENGTH 0x40
|
||||
|
||||
typedef struct UvmGpuInfo_tag
|
||||
@@ -641,6 +658,31 @@ typedef struct UvmGpuInfo_tag
|
||||
|
||||
UvmGpuClientInfo smcUserClientInfo;
|
||||
|
||||
// Confidential Compute capabilities of this GPU
|
||||
UvmGpuConfComputeCaps gpuConfComputeCaps;
|
||||
|
||||
// UVM_LINK_TYPE
|
||||
NvU32 sysmemLink;
|
||||
|
||||
// See UvmGpuP2PCapsParams::totalLinkLineRateMBps
|
||||
NvU32 sysmemLinkRateMBps;
|
||||
|
||||
// On coherent systems each GPU maps its memory to a window in the System
|
||||
// Physical Address (SPA) space. The following fields describe that window.
|
||||
//
|
||||
// systemMemoryWindowSize > 0 indicates that the window is valid. meaning
|
||||
// that GPU memory can be mapped by the CPU as cache-coherent by adding the
|
||||
// GPU address to the window start.
|
||||
NvU64 systemMemoryWindowStart;
|
||||
NvU64 systemMemoryWindowSize;
|
||||
|
||||
// This tells if the GPU is connected to NVSwitch. On systems with NVSwitch
|
||||
// all GPUs are connected to it. If connectedToSwitch is NV_TRUE,
|
||||
// nvswitchMemoryWindowStart tells the base address for the GPU in the
|
||||
// NVSwitch address space. It is used when creating PTEs of memory mappings
|
||||
// to NVSwitch peers.
|
||||
NvBool connectedToSwitch;
|
||||
NvU64 nvswitchMemoryWindowStart;
|
||||
} UvmGpuInfo;
|
||||
|
||||
typedef struct UvmGpuFbInfo_tag
|
||||
@@ -683,6 +725,9 @@ typedef struct UvmPmaStatistics_tag
|
||||
volatile NvU64 numPages2m; // PMA-wide 2MB pages count across all regions
|
||||
volatile NvU64 numFreePages64k; // PMA-wide free 64KB page count across all regions
|
||||
volatile NvU64 numFreePages2m; // PMA-wide free 2MB pages count across all regions
|
||||
volatile NvU64 numPages2mProtected; // PMA-wide 2MB pages count in protected memory
|
||||
volatile NvU64 numFreePages64kProtected; // PMA-wide free 64KB page count in protected memory
|
||||
volatile NvU64 numFreePages2mProtected; // PMA-wide free 2MB pages count in protected memory
|
||||
} UvmPmaStatistics;
|
||||
|
||||
/*******************************************************************************
|
||||
@@ -790,24 +835,92 @@ struct UvmOpsUvmEvents
|
||||
#endif
|
||||
};
|
||||
|
||||
#define UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES 32
|
||||
#define UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES 16
|
||||
|
||||
typedef union UvmFaultMetadataPacket_tag
|
||||
{
|
||||
struct {
|
||||
NvU8 authTag[UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES];
|
||||
NvBool valid;
|
||||
};
|
||||
// padding to 32Bytes
|
||||
NvU8 _padding[32];
|
||||
} UvmFaultMetadataPacket;
|
||||
|
||||
// This struct shall not be accessed nor modified directly by UVM as it is
|
||||
// entirely managed by the RM layer
|
||||
typedef struct UvmCslContext_tag
|
||||
{
|
||||
struct ccslContext_t *ctx;
|
||||
void *nvidia_stack;
|
||||
} UvmCslContext;
|
||||
|
||||
typedef struct UvmGpuFaultInfo_tag
|
||||
{
|
||||
struct
|
||||
{
|
||||
// Register mappings obtained from RM
|
||||
// Fault buffer GET register mapping.
|
||||
//
|
||||
// When Confidential Computing is enabled, GET refers to the shadow
|
||||
// buffer (see bufferAddress below), and not to the actual HW buffer.
|
||||
// In this setup, writes of GET (by UVM) do not result on re-evaluation
|
||||
// of any interrupt condition.
|
||||
volatile NvU32* pFaultBufferGet;
|
||||
|
||||
// Fault buffer PUT register mapping.
|
||||
//
|
||||
// When Confidential Computing is enabled, PUT refers to the shadow
|
||||
// buffer (see bufferAddress below), and not to the actual HW buffer.
|
||||
// In this setup, writes of PUT (by GSP-RM) do not result on
|
||||
// re-evaluation of any interrupt condition.
|
||||
volatile NvU32* pFaultBufferPut;
|
||||
// Note: this variable is deprecated since buffer overflow is not a separate
|
||||
// register from future chips.
|
||||
|
||||
// Note: this variable is deprecated since buffer overflow is not a
|
||||
// separate register from future chips.
|
||||
volatile NvU32* pFaultBufferInfo;
|
||||
|
||||
// Register mapping used to clear a replayable fault interrupt in
|
||||
// Turing+ GPUs.
|
||||
volatile NvU32* pPmcIntr;
|
||||
|
||||
// Register mapping used to enable replayable fault interrupts.
|
||||
volatile NvU32* pPmcIntrEnSet;
|
||||
|
||||
// Register mapping used to disable replayable fault interrupts.
|
||||
volatile NvU32* pPmcIntrEnClear;
|
||||
|
||||
// Register used to enable, or disable, faults on prefetches.
|
||||
volatile NvU32* pPrefetchCtrl;
|
||||
|
||||
// Replayable fault interrupt mask identifier.
|
||||
NvU32 replayableFaultMask;
|
||||
// fault buffer cpu mapping and size
|
||||
void* bufferAddress;
|
||||
|
||||
// Fault buffer CPU mapping
|
||||
void* bufferAddress;
|
||||
//
|
||||
// When Confidential Computing is disabled, the mapping points to the
|
||||
// actual HW fault buffer.
|
||||
//
|
||||
// When Confidential Computing is enabled, the mapping points to a
|
||||
// copy of the HW fault buffer. This "shadow buffer" is maintained
|
||||
// by GSP-RM.
|
||||
|
||||
// Size, in bytes, of the fault buffer pointed by bufferAddress.
|
||||
NvU32 bufferSize;
|
||||
// Mapping pointing to the start of the fault buffer metadata containing
|
||||
// a 16Byte authentication tag and a valid byte. Always NULL when
|
||||
// Confidential Computing is disabled.
|
||||
UvmFaultMetadataPacket *bufferMetadata;
|
||||
|
||||
// CSL context used for performing decryption of replayable faults when
|
||||
// Confidential Computing is enabled.
|
||||
UvmCslContext cslCtx;
|
||||
|
||||
// Indicates whether UVM owns the replayable fault buffer.
|
||||
// The value of this field is always NV_TRUE When Confidential Computing
|
||||
// is disabled.
|
||||
NvBool bUvmOwnsHwFaultBuffer;
|
||||
} replayable;
|
||||
struct
|
||||
{
|
||||
@@ -826,10 +939,24 @@ typedef struct UvmGpuFaultInfo_tag
|
||||
|
||||
// Preallocated stack for functions called from the UVM isr bottom half
|
||||
void *isr_bh_sp;
|
||||
|
||||
// Used only when Hopper Confidential Compute is enabled
|
||||
// Register mappings obtained from RM
|
||||
volatile NvU32* pFaultBufferPut;
|
||||
|
||||
// Used only when Hopper Confidential Compute is enabled
|
||||
// Cached get index of the non-replayable shadow buffer
|
||||
NvU32 shadowBufferGet;
|
||||
|
||||
// See replayable.bufferMetadata
|
||||
UvmFaultMetadataPacket *shadowBufferMetadata;
|
||||
} nonReplayable;
|
||||
NvHandle faultBufferHandle;
|
||||
struct Device *pDevice;
|
||||
} UvmGpuFaultInfo;
|
||||
|
||||
struct Device;
|
||||
|
||||
typedef struct UvmGpuPagingChannel_tag
|
||||
{
|
||||
struct gpuDevice *device;
|
||||
@@ -837,6 +964,7 @@ typedef struct UvmGpuPagingChannel_tag
|
||||
NvHandle channelHandle;
|
||||
NvHandle errorNotifierHandle;
|
||||
void *pushStreamSp;
|
||||
struct Device *pDevice;
|
||||
} UvmGpuPagingChannel, *UvmGpuPagingChannelHandle;
|
||||
|
||||
typedef struct UvmGpuAccessCntrInfo_tag
|
||||
@@ -860,12 +988,6 @@ typedef struct UvmGpuAccessCntrInfo_tag
|
||||
void* bufferAddress;
|
||||
NvU32 bufferSize;
|
||||
NvHandle accessCntrBufferHandle;
|
||||
|
||||
// The Notification address in the access counter notification msg does not
|
||||
// contain the correct upper bits 63-47 for GPA-based notifications. RM
|
||||
// provides us with the correct offset to be added.
|
||||
// See Bug 1803015
|
||||
NvU64 baseDmaSysmemAddr;
|
||||
} UvmGpuAccessCntrInfo;
|
||||
|
||||
typedef enum
|
||||
@@ -897,7 +1019,18 @@ typedef struct UvmGpuAccessCntrConfig_tag
|
||||
NvU32 threshold;
|
||||
} UvmGpuAccessCntrConfig;
|
||||
|
||||
//
|
||||
// When modifying this enum, make sure they are compatible with the mirrored
|
||||
// MEMORY_PROTECTION enum in phys_mem_allocator.h.
|
||||
//
|
||||
typedef enum UvmPmaGpuMemoryType_tag
|
||||
{
|
||||
UVM_PMA_GPU_MEMORY_TYPE_UNPROTECTED = 0,
|
||||
UVM_PMA_GPU_MEMORY_TYPE_PROTECTED = 1
|
||||
} UVM_PMA_GPU_MEMORY_TYPE;
|
||||
|
||||
typedef UvmGpuChannelInfo gpuChannelInfo;
|
||||
typedef UvmGpuTsgAllocParams gpuTsgAllocParams;
|
||||
typedef UvmGpuChannelAllocParams gpuChannelAllocParams;
|
||||
typedef UvmGpuCaps gpuCaps;
|
||||
typedef UvmGpuCopyEngineCaps gpuCeCaps;
|
||||
@@ -922,4 +1055,16 @@ typedef UvmGpuPagingChannelInfo gpuPagingChannelInfo;
|
||||
typedef UvmGpuPagingChannelAllocParams gpuPagingChannelAllocParams;
|
||||
typedef UvmPmaAllocationOptions gpuPmaAllocationOptions;
|
||||
|
||||
typedef struct UvmCslIv
|
||||
{
|
||||
NvU8 iv[12];
|
||||
NvU8 fresh;
|
||||
} UvmCslIv;
|
||||
|
||||
typedef enum UvmCslOperation
|
||||
{
|
||||
UVM_CSL_OPERATION_ENCRYPT,
|
||||
UVM_CSL_OPERATION_DECRYPT
|
||||
} UvmCslOperation;
|
||||
|
||||
#endif // _NV_UVM_TYPES_H_
|
||||
|
||||
@@ -150,9 +150,7 @@ typedef struct NvSyncPointFenceRec {
|
||||
|* *|
|
||||
\***************************************************************************/
|
||||
|
||||
#if !defined(XAPIGEN) /* NvOffset is XAPIGEN builtin type, so skip typedef */
|
||||
typedef NvU64 NvOffset; /* GPU address */
|
||||
#endif
|
||||
|
||||
#define NvOffset_HI32(n) ((NvU32)(((NvU64)(n)) >> 32))
|
||||
#define NvOffset_LO32(n) ((NvU32)((NvU64)(n)))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -29,6 +29,7 @@
|
||||
#include <nvlimits.h>
|
||||
|
||||
#define NVKMS_MAX_SUBDEVICES NV_MAX_SUBDEVICES
|
||||
#define NVKMS_MAX_HEADS_PER_DISP NV_MAX_HEADS
|
||||
|
||||
#define NVKMS_LEFT 0
|
||||
#define NVKMS_RIGHT 1
|
||||
@@ -530,4 +531,78 @@ typedef struct {
|
||||
NvBool noncoherent;
|
||||
} NvKmsDispIOCoherencyModes;
|
||||
|
||||
enum NvKmsInputColorSpace {
|
||||
/* Unknown colorspace; no de-gamma will be applied */
|
||||
NVKMS_INPUT_COLORSPACE_NONE = 0,
|
||||
|
||||
/* Linear, Rec.709 [-0.5, 7.5) */
|
||||
NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR = 1,
|
||||
|
||||
/* PQ, Rec.2020 unity */
|
||||
NVKMS_INPUT_COLORSPACE_BT2100_PQ = 2,
|
||||
};
|
||||
|
||||
enum NvKmsOutputTf {
|
||||
/*
|
||||
* NVKMS itself won't apply any OETF (clients are still
|
||||
* free to provide a custom OLUT)
|
||||
*/
|
||||
NVKMS_OUTPUT_TF_NONE = 0,
|
||||
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR = 1,
|
||||
NVKMS_OUTPUT_TF_PQ = 2,
|
||||
};
|
||||
|
||||
/*!
|
||||
* HDR Static Metadata Type1 Descriptor as per CEA-861.3 spec.
|
||||
* This is expected to match exactly with the spec.
|
||||
*/
|
||||
struct NvKmsHDRStaticMetadata {
|
||||
/*!
|
||||
* Color primaries of the data.
|
||||
* These are coded as unsigned 16-bit values in units of 0.00002,
|
||||
* where 0x0000 represents zero and 0xC350 represents 1.0000.
|
||||
*/
|
||||
struct {
|
||||
NvU16 x, y;
|
||||
} displayPrimaries[3];
|
||||
|
||||
/*!
|
||||
* White point of colorspace data.
|
||||
* These are coded as unsigned 16-bit values in units of 0.00002,
|
||||
* where 0x0000 represents zero and 0xC350 represents 1.0000.
|
||||
*/
|
||||
struct {
|
||||
NvU16 x, y;
|
||||
} whitePoint;
|
||||
|
||||
/**
|
||||
* Maximum mastering display luminance.
|
||||
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
|
||||
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
|
||||
*/
|
||||
NvU16 maxDisplayMasteringLuminance;
|
||||
|
||||
/*!
|
||||
* Minimum mastering display luminance.
|
||||
* This value is coded as an unsigned 16-bit value in units of
|
||||
* 0.0001 cd/m2, where 0x0001 represents 0.0001 cd/m2 and 0xFFFF
|
||||
* represents 6.5535 cd/m2.
|
||||
*/
|
||||
NvU16 minDisplayMasteringLuminance;
|
||||
|
||||
/*!
|
||||
* Maximum content light level.
|
||||
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
|
||||
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
|
||||
*/
|
||||
NvU16 maxCLL;
|
||||
|
||||
/*!
|
||||
* Maximum frame-average light level.
|
||||
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
|
||||
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
|
||||
*/
|
||||
NvU16 maxFALL;
|
||||
};
|
||||
|
||||
#endif /* NVKMS_API_TYPES_H */
|
||||
|
||||
@@ -86,8 +86,9 @@ enum NvKmsSurfaceMemoryFormat {
|
||||
NvKmsSurfaceMemoryFormatY12___V12U12_N420 = 32,
|
||||
NvKmsSurfaceMemoryFormatY8___U8___V8_N444 = 33,
|
||||
NvKmsSurfaceMemoryFormatY8___U8___V8_N420 = 34,
|
||||
NvKmsSurfaceMemoryFormatRF16GF16BF16XF16 = 35,
|
||||
NvKmsSurfaceMemoryFormatMin = NvKmsSurfaceMemoryFormatI8,
|
||||
NvKmsSurfaceMemoryFormatMax = NvKmsSurfaceMemoryFormatY8___U8___V8_N420,
|
||||
NvKmsSurfaceMemoryFormatMax = NvKmsSurfaceMemoryFormatRF16GF16BF16XF16,
|
||||
};
|
||||
|
||||
typedef struct NvKmsSurfaceMemoryFormatInfo {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -149,6 +149,7 @@ struct NvKmsKapiDeviceResourcesInfo {
|
||||
} caps;
|
||||
|
||||
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
|
||||
NvBool supportsHDR[NVKMS_KAPI_LAYER_MAX];
|
||||
};
|
||||
|
||||
#define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
|
||||
@@ -164,8 +165,6 @@ struct NvKmsKapiConnectorInfo {
|
||||
|
||||
NvU32 physicalIndex;
|
||||
|
||||
NvU32 headMask;
|
||||
|
||||
NvKmsConnectorSignalFormat signalFormat;
|
||||
NvKmsConnectorType type;
|
||||
|
||||
@@ -193,6 +192,7 @@ struct NvKmsKapiStaticDisplayInfo {
|
||||
NvU32 numPossibleClones;
|
||||
NvKmsKapiDisplay possibleCloneHandles[NVKMS_KAPI_MAX_CLONE_DISPLAYS];
|
||||
|
||||
NvU32 headMask;
|
||||
};
|
||||
|
||||
struct NvKmsKapiSyncpt {
|
||||
@@ -218,6 +218,11 @@ struct NvKmsKapiLayerConfig {
|
||||
struct NvKmsRRParams rrParams;
|
||||
struct NvKmsKapiSyncpt syncptParams;
|
||||
|
||||
struct NvKmsHDRStaticMetadata hdrMetadata;
|
||||
NvBool hdrMetadataSpecified;
|
||||
|
||||
enum NvKmsOutputTf tf;
|
||||
|
||||
NvU8 minPresentInterval;
|
||||
NvBool tearing;
|
||||
|
||||
@@ -226,6 +231,8 @@ struct NvKmsKapiLayerConfig {
|
||||
|
||||
NvS16 dstX, dstY;
|
||||
NvU16 dstWidth, dstHeight;
|
||||
|
||||
enum NvKmsInputColorSpace inputColorSpace;
|
||||
};
|
||||
|
||||
struct NvKmsKapiLayerRequestedConfig {
|
||||
@@ -277,6 +284,8 @@ struct NvKmsKapiHeadModeSetConfig {
|
||||
NvKmsKapiDisplay displays[NVKMS_KAPI_MAX_CLONE_DISPLAYS];
|
||||
|
||||
struct NvKmsKapiDisplayMode mode;
|
||||
|
||||
NvBool vrrEnabled;
|
||||
};
|
||||
|
||||
struct NvKmsKapiHeadRequestedConfig {
|
||||
@@ -368,6 +377,9 @@ struct NvKmsKapiDynamicDisplayParams {
|
||||
/* [OUT] Connection status */
|
||||
NvU32 connected;
|
||||
|
||||
/* [OUT] VRR status */
|
||||
NvBool vrrSupported;
|
||||
|
||||
/* [IN/OUT] EDID of connected monitor/ Input to override EDID */
|
||||
struct {
|
||||
NvU16 bufferSize;
|
||||
@@ -484,6 +496,47 @@ struct NvKmsKapiFunctionsTable {
|
||||
*/
|
||||
void (*releaseOwnership)(struct NvKmsKapiDevice *device);
|
||||
|
||||
/*!
|
||||
* Grant modeset permissions for a display to fd. Only one (dispIndex, head,
|
||||
* display) is currently supported.
|
||||
*
|
||||
* \param [in] fd fd from opening /dev/nvidia-modeset.
|
||||
*
|
||||
* \param [in] device A device returned by allocateDevice().
|
||||
*
|
||||
* \param [in] head head of display.
|
||||
*
|
||||
* \param [in] display The display to grant.
|
||||
*
|
||||
* \return NV_TRUE on success, NV_FALSE on failure.
|
||||
*/
|
||||
NvBool (*grantPermissions)
|
||||
(
|
||||
NvS32 fd,
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvU32 head,
|
||||
NvKmsKapiDisplay display
|
||||
);
|
||||
|
||||
/*!
|
||||
* Revoke permissions previously granted. Only one (dispIndex, head,
|
||||
* display) is currently supported.
|
||||
*
|
||||
* \param [in] device A device returned by allocateDevice().
|
||||
*
|
||||
* \param [in] head head of display.
|
||||
*
|
||||
* \param [in] display The display to revoke.
|
||||
*
|
||||
* \return NV_TRUE on success, NV_FALSE on failure.
|
||||
*/
|
||||
NvBool (*revokePermissions)
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvU32 head,
|
||||
NvKmsKapiDisplay display
|
||||
);
|
||||
|
||||
/*!
|
||||
* Registers for notification, via
|
||||
* NvKmsKapiAllocateDeviceParams::eventCallback, of the events specified
|
||||
@@ -1020,6 +1073,21 @@ struct NvKmsKapiFunctionsTable {
|
||||
NvU64 *pPages
|
||||
);
|
||||
|
||||
/*!
|
||||
* Check if this memory object can be scanned out for display.
|
||||
*
|
||||
* \param [in] device A device allocated using allocateDevice().
|
||||
*
|
||||
* \param [in] memory The memory object to check for display support.
|
||||
*
|
||||
* \return NV_TRUE if this memory can be displayed, NV_FALSE if not.
|
||||
*/
|
||||
NvBool (*isMemoryValidForDisplay)
|
||||
(
|
||||
const struct NvKmsKapiDevice *device,
|
||||
const struct NvKmsKapiMemory *memory
|
||||
);
|
||||
|
||||
/*
|
||||
* Import SGT as a memory handle.
|
||||
*
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
|
||||
//
|
||||
// This file was generated with FINN, an NVIDIA coding tool.
|
||||
// Source file: nvlimits.finn
|
||||
// Source file: nvlimits.finn
|
||||
//
|
||||
|
||||
|
||||
|
||||
@@ -234,12 +234,14 @@ extern "C" {
|
||||
#define DRF_EXTENT(drf) (drf##_HIGH_FIELD)
|
||||
#define DRF_SHIFT(drf) ((drf##_LOW_FIELD) % 32U)
|
||||
#define DRF_SHIFT_RT(drf) ((drf##_HIGH_FIELD) % 32U)
|
||||
#define DRF_SIZE(drf) ((drf##_HIGH_FIELD)-(drf##_LOW_FIELD)+1U)
|
||||
#define DRF_MASK(drf) (0xFFFFFFFFU >> (31U - ((drf##_HIGH_FIELD) % 32U) + ((drf##_LOW_FIELD) % 32U)))
|
||||
#else
|
||||
#define DRF_BASE(drf) (NV_FALSE?drf) // much better
|
||||
#define DRF_EXTENT(drf) (NV_TRUE?drf) // much better
|
||||
#define DRF_SHIFT(drf) (((NvU32)DRF_BASE(drf)) % 32U)
|
||||
#define DRF_SHIFT_RT(drf) (((NvU32)DRF_EXTENT(drf)) % 32U)
|
||||
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
|
||||
#define DRF_MASK(drf) (0xFFFFFFFFU>>(31U - DRF_SHIFT_RT(drf) + DRF_SHIFT(drf)))
|
||||
#endif
|
||||
#define DRF_DEF(d,r,f,c) (((NvU32)(NV ## d ## r ## f ## c))<<DRF_SHIFT(NV ## d ## r ## f))
|
||||
@@ -249,12 +251,12 @@ extern "C" {
|
||||
#define DRF_EXTENT(drf) (1?drf) // much better
|
||||
#define DRF_SHIFT(drf) ((DRF_ISBIT(0,drf)) % 32)
|
||||
#define DRF_SHIFT_RT(drf) ((DRF_ISBIT(1,drf)) % 32)
|
||||
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
|
||||
#define DRF_MASK(drf) (0xFFFFFFFFU>>(31-((DRF_ISBIT(1,drf)) % 32)+((DRF_ISBIT(0,drf)) % 32)))
|
||||
#define DRF_DEF(d,r,f,c) ((NV ## d ## r ## f ## c)<<DRF_SHIFT(NV ## d ## r ## f))
|
||||
#define DRF_NUM(d,r,f,n) (((n)&DRF_MASK(NV ## d ## r ## f))<<DRF_SHIFT(NV ## d ## r ## f))
|
||||
#endif
|
||||
#define DRF_SHIFTMASK(drf) (DRF_MASK(drf)<<(DRF_SHIFT(drf)))
|
||||
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
|
||||
|
||||
#define DRF_VAL(d,r,f,v) (((v)>>DRF_SHIFT(NV ## d ## r ## f))&DRF_MASK(NV ## d ## r ## f))
|
||||
#endif
|
||||
@@ -907,6 +909,16 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
|
||||
return uAddr.p;
|
||||
}
|
||||
|
||||
// Get bit at pos (k) from x
|
||||
#define NV_BIT_GET(k, x) (((x) >> (k)) & 1)
|
||||
// Get bit at pos (n) from (hi) if >= 64, otherwise from (lo). This is paired with NV_BIT_SET_128 which sets the bit.
|
||||
#define NV_BIT_GET_128(n, lo, hi) (((n) < 64) ? NV_BIT_GET((n), (lo)) : NV_BIT_GET((n) - 64, (hi)))
|
||||
//
|
||||
// Set the bit at pos (b) for U64 which is < 128. Since the (b) can be >= 64, we need 2 U64 to store this.
|
||||
// Use (lo) if (b) is less than 64, and (hi) if >= 64.
|
||||
//
|
||||
#define NV_BIT_SET_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) |= NVBIT64(b); else (hi) |= NVBIT64( b & 0x3F ); }
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif //__cplusplus
|
||||
|
||||
@@ -24,11 +24,6 @@
|
||||
#ifndef SDK_NVSTATUS_H
|
||||
#define SDK_NVSTATUS_H
|
||||
|
||||
/* XAPIGEN - this file is not suitable for (nor needed by) xapigen. */
|
||||
/* Rather than #ifdef out every such include in every sdk */
|
||||
/* file, punt here. */
|
||||
#if !defined(XAPIGEN) /* rest of file */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@@ -125,6 +120,4 @@ const char *nvstatusToString(NV_STATUS nvStatusIn);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // XAPIGEN
|
||||
|
||||
#endif /* SDK_NVSTATUS_H */
|
||||
|
||||
@@ -24,11 +24,6 @@
|
||||
#ifndef SDK_NVSTATUSCODES_H
|
||||
#define SDK_NVSTATUSCODES_H
|
||||
|
||||
/* XAPIGEN - this file is not suitable for (nor needed by) xapigen. */
|
||||
/* Rather than #ifdef out every such include in every sdk */
|
||||
/* file, punt here. */
|
||||
#if !defined(XAPIGEN) /* rest of file */
|
||||
|
||||
NV_STATUS_CODE(NV_OK, 0x00000000, "Success")
|
||||
NV_STATUS_CODE(NV_ERR_GENERIC, 0x0000FFFF, "Failure: Generic Error")
|
||||
|
||||
@@ -153,6 +148,8 @@ NV_STATUS_CODE(NV_ERR_NVLINK_CLOCK_ERROR, 0x00000076, "Nvlink Clock
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_TRAINING_ERROR, 0x00000077, "Nvlink Training Error")
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_CONFIGURATION_ERROR, 0x00000078, "Nvlink Configuration Error")
|
||||
NV_STATUS_CODE(NV_ERR_RISCV_ERROR, 0x00000079, "Generic RISC-V assert or halt")
|
||||
NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT, 0x0000007A, "Fabric Manager is not loaded")
|
||||
NV_STATUS_CODE(NV_ERR_ALREADY_SIGNALLED, 0x0000007B, "Semaphore Surface value already >= requested wait value")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
@@ -164,6 +161,4 @@ NV_STATUS_CODE(NV_WARN_NOTHING_TO_DO, 0x00010006, "WARNING Noth
|
||||
NV_STATUS_CODE(NV_WARN_NULL_OBJECT, 0x00010007, "WARNING NULL object found")
|
||||
NV_STATUS_CODE(NV_WARN_OUT_OF_RANGE, 0x00010008, "WARNING value out of range")
|
||||
|
||||
#endif // XAPIGEN
|
||||
|
||||
#endif /* SDK_NVSTATUSCODES_H */
|
||||
|
||||
@@ -513,6 +513,12 @@ typedef struct
|
||||
// place to re-locate these from nvos.h which cannot be included by a number
|
||||
// of builds that need them
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
|
||||
#define NV_ATTRIBUTE_UNUSED __attribute__((__unused__))
|
||||
#else
|
||||
#define NV_ATTRIBUTE_UNUSED
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
#if _MSC_VER >= 1310
|
||||
@@ -536,8 +542,6 @@ typedef struct
|
||||
|
||||
#define NV_FORCERESULTCHECK
|
||||
|
||||
#define NV_ATTRIBUTE_UNUSED
|
||||
|
||||
#define NV_FORMAT_PRINTF(_f, _a)
|
||||
|
||||
#else // ! defined(_MSC_VER)
|
||||
@@ -635,12 +639,6 @@ typedef struct
|
||||
#define NV_FORCERESULTCHECK
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
|
||||
#define NV_ATTRIBUTE_UNUSED __attribute__((__unused__))
|
||||
#else
|
||||
#define NV_ATTRIBUTE_UNUSED
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Functions decorated with NV_FORMAT_PRINTF(f, a) have a format string at
|
||||
* parameter number 'f' and variadic arguments start at parameter number 'a'.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -143,6 +143,14 @@ void NV_API_CALL os_free_semaphore (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_semaphore (void *);
|
||||
NV_STATUS NV_API_CALL os_cond_acquire_semaphore (void *);
|
||||
NV_STATUS NV_API_CALL os_release_semaphore (void *);
|
||||
void* NV_API_CALL os_alloc_rwlock (void);
|
||||
void NV_API_CALL os_free_rwlock (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_rwlock_read (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_rwlock_write (void *);
|
||||
NV_STATUS NV_API_CALL os_cond_acquire_rwlock_read (void *);
|
||||
NV_STATUS NV_API_CALL os_cond_acquire_rwlock_write(void *);
|
||||
void NV_API_CALL os_release_rwlock_read (void *);
|
||||
void NV_API_CALL os_release_rwlock_write (void *);
|
||||
NvBool NV_API_CALL os_semaphore_may_sleep (void);
|
||||
NV_STATUS NV_API_CALL os_get_version_info (os_version_info*);
|
||||
NvBool NV_API_CALL os_is_isr (void);
|
||||
@@ -173,7 +181,6 @@ NV_STATUS NV_API_CALL os_put_page (NvU64 address);
|
||||
NvU32 NV_API_CALL os_get_page_refcount (NvU64 address);
|
||||
NvU32 NV_API_CALL os_count_tail_pages (NvU64 address);
|
||||
void NV_API_CALL os_free_pages_phys (NvU64, NvU32);
|
||||
NV_STATUS NV_API_CALL os_call_nv_vmbus (NvU32, void *);
|
||||
NV_STATUS NV_API_CALL os_open_temporary_file (void **);
|
||||
void NV_API_CALL os_close_file (void *);
|
||||
NV_STATUS NV_API_CALL os_write_file (void *, NvU8 *, NvU64, NvU64);
|
||||
@@ -181,7 +188,7 @@ NV_STATUS NV_API_CALL os_read_file (void *, NvU8 *, NvU64, NvU
|
||||
NV_STATUS NV_API_CALL os_open_readonly_file (const char *, void **);
|
||||
NV_STATUS NV_API_CALL os_open_and_read_file (const char *, NvU8 *, NvU64);
|
||||
NvBool NV_API_CALL os_is_nvswitch_present (void);
|
||||
void NV_API_CALL os_get_random_bytes (NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL os_get_random_bytes (NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL os_alloc_wait_queue (os_wait_queue **);
|
||||
void NV_API_CALL os_free_wait_queue (os_wait_queue *);
|
||||
void NV_API_CALL os_wait_uninterruptible (os_wait_queue *);
|
||||
@@ -200,12 +207,15 @@ enum os_pci_req_atomics_type {
|
||||
OS_INTF_PCIE_REQ_ATOMICS_128BIT
|
||||
};
|
||||
NV_STATUS NV_API_CALL os_enable_pci_req_atomics (void *, enum os_pci_req_atomics_type);
|
||||
NV_STATUS NV_API_CALL os_numa_add_gpu_memory (void *, NvU64, NvU64, NvU32 *);
|
||||
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory (void *, NvU64, NvU64, NvU32);
|
||||
NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
|
||||
|
||||
extern NvU32 os_page_size;
|
||||
extern NvU64 os_page_mask;
|
||||
extern NvU8 os_page_shift;
|
||||
extern NvU32 os_sev_status;
|
||||
extern NvBool os_sev_enabled;
|
||||
extern NvBool os_cc_enabled;
|
||||
extern NvBool os_cc_tdx_enabled;
|
||||
extern NvBool os_dma_buf_enabled;
|
||||
|
||||
/*
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -56,7 +56,9 @@ NV_STATUS NV_API_CALL rm_gpu_ops_get_p2p_caps(nvidia_stack_t *, nvgpuDeviceHan
|
||||
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_memory_cpu_map(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, NvLength, void **, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_memory_cpu_ummap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, void*);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_channel_allocate(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, const nvgpuChannelAllocParams_t *, nvgpuChannelHandle_t *, nvgpuChannelInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_tsg_allocate(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, const nvgpuTsgAllocParams_t *, nvgpuTsgHandle_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_tsg_destroy(nvidia_stack_t *, nvgpuTsgHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_channel_allocate(nvidia_stack_t *, const nvgpuTsgHandle_t, const nvgpuChannelAllocParams_t *, nvgpuChannelHandle_t *, nvgpuChannelInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_channel_destroy(nvidia_stack_t *, nvgpuChannelHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_memory_free(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_query_caps(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuCaps_t);
|
||||
@@ -74,8 +76,9 @@ NV_STATUS NV_API_CALL rm_gpu_ops_own_page_fault_intr(nvidia_stack_t *, nvgpuDevi
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, void *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_own_access_cntr_intr(nvidia_stack_t *, nvgpuSessionHandle_t, nvgpuAccessCntrInfo_t, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, nvgpuAccessCntrConfig_t);
|
||||
@@ -98,4 +101,14 @@ NV_STATUS NV_API_CALL rm_gpu_ops_paging_channels_map(nvidia_stack_t *, nvgpuAdd
|
||||
void NV_API_CALL rm_gpu_ops_paging_channels_unmap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, nvgpuPagingChannelHandle_t, char *, NvU32);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt_with_iv(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8*, NvU8 *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU8 *, NvU8 const *, NvU32, NvU8 const *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_sign(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_query_message_pool(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_increment_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64, NvU8 *);
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -27,6 +27,7 @@
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-connector.h"
|
||||
#include "nvidia-drm-crtc.h"
|
||||
#include "nvidia-drm-utils.h"
|
||||
#include "nvidia-drm-encoder.h"
|
||||
|
||||
@@ -42,6 +43,7 @@
|
||||
|
||||
#include <drm/drm_atomic.h>
|
||||
#include <drm/drm_atomic_helper.h>
|
||||
#include <drm/drm_edid.h>
|
||||
|
||||
static void nv_drm_connector_destroy(struct drm_connector *connector)
|
||||
{
|
||||
@@ -98,7 +100,11 @@ __nv_drm_detect_encoder(struct NvKmsKapiDynamicDisplayParams *pDetectParams,
|
||||
break;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_CONNECTOR_HAS_OVERRIDE_EDID)
|
||||
if (connector->override_edid) {
|
||||
#else
|
||||
if (drm_edid_override_connector_update(connector) > 0) {
|
||||
#endif
|
||||
const struct drm_property_blob *edid = connector->edid_blob_ptr;
|
||||
|
||||
if (edid->length <= sizeof(pDetectParams->edid.buffer)) {
|
||||
@@ -118,6 +124,11 @@ __nv_drm_detect_encoder(struct NvKmsKapiDynamicDisplayParams *pDetectParams,
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_CONNECTOR_HAS_VRR_CAPABLE_PROPERTY)
|
||||
drm_connector_attach_vrr_capable_property(&nv_connector->base);
|
||||
drm_connector_set_vrr_capable_property(&nv_connector->base, pDetectParams->vrrSupported ? true : false);
|
||||
#endif
|
||||
|
||||
if (pDetectParams->connected) {
|
||||
if (!pDetectParams->overrideEdid && pDetectParams->edid.bufferSize) {
|
||||
|
||||
@@ -197,6 +208,11 @@ done:
|
||||
|
||||
nv_drm_free(pDetectParams);
|
||||
|
||||
if (status == connector_status_disconnected &&
|
||||
nv_connector->modeset_permission_filep) {
|
||||
nv_drm_connector_revoke_permissions(dev, nv_connector);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -362,6 +378,8 @@ nv_drm_connector_new(struct drm_device *dev,
|
||||
nv_connector->physicalIndex = physicalIndex;
|
||||
nv_connector->type = type;
|
||||
nv_connector->internal = internal;
|
||||
nv_connector->modeset_permission_filep = NULL;
|
||||
nv_connector->modeset_permission_crtc = NULL;
|
||||
|
||||
strcpy(nv_connector->dpAddress, dpAddress);
|
||||
|
||||
@@ -464,4 +482,26 @@ done:
|
||||
return connector;
|
||||
}
|
||||
|
||||
/*
|
||||
* Revoke the permissions on this connector.
|
||||
*/
|
||||
bool nv_drm_connector_revoke_permissions(struct drm_device *dev,
|
||||
struct nv_drm_connector* nv_connector)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
bool ret = true;
|
||||
|
||||
if (nv_connector->modeset_permission_crtc) {
|
||||
if (nv_connector->nv_detected_encoder) {
|
||||
ret = nvKms->revokePermissions(
|
||||
nv_dev->pDevice, nv_connector->modeset_permission_crtc->head,
|
||||
nv_connector->nv_detected_encoder->hDisplay);
|
||||
}
|
||||
nv_connector->modeset_permission_crtc->modeset_permission_filep = NULL;
|
||||
nv_connector->modeset_permission_crtc = NULL;
|
||||
}
|
||||
nv_connector->modeset_permission_filep = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -51,6 +51,20 @@ struct nv_drm_connector {
|
||||
|
||||
atomic_t connection_status_dirty;
|
||||
|
||||
/**
|
||||
* @modeset_permission_filep:
|
||||
*
|
||||
* The filep using this connector with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
|
||||
*/
|
||||
struct drm_file *modeset_permission_filep;
|
||||
|
||||
/**
|
||||
* @modeset_permission_crtc:
|
||||
*
|
||||
* The crtc using this connector with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
|
||||
*/
|
||||
struct nv_drm_crtc *modeset_permission_crtc;
|
||||
|
||||
struct drm_connector base;
|
||||
};
|
||||
|
||||
@@ -84,6 +98,9 @@ nv_drm_get_connector(struct drm_device *dev,
|
||||
NvBool internal,
|
||||
char dpAddress[NVKMS_DP_ADDRESS_STRING_LENGTH]);
|
||||
|
||||
bool nv_drm_connector_revoke_permissions(struct drm_device *dev,
|
||||
struct nv_drm_connector *nv_connector);
|
||||
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
|
||||
#endif /* __NVIDIA_DRM_CONNECTOR_H__ */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -44,6 +44,37 @@
|
||||
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
#include <linux/nvhost.h>
|
||||
#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)
|
||||
#include <linux/host1x-next.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
static int
|
||||
nv_drm_atomic_replace_property_blob_from_id(struct drm_device *dev,
|
||||
struct drm_property_blob **blob,
|
||||
uint64_t blob_id,
|
||||
ssize_t expected_size)
|
||||
{
|
||||
struct drm_property_blob *new_blob = NULL;
|
||||
|
||||
if (blob_id != 0) {
|
||||
new_blob = drm_property_lookup_blob(dev, blob_id);
|
||||
if (new_blob == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((expected_size > 0) &&
|
||||
(new_blob->length != expected_size)) {
|
||||
drm_property_blob_put(new_blob);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
drm_property_replace_blob(blob, new_blob);
|
||||
drm_property_blob_put(new_blob);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void nv_drm_plane_destroy(struct drm_plane *plane)
|
||||
@@ -84,9 +115,6 @@ cursor_plane_req_config_update(struct drm_plane *plane,
|
||||
{
|
||||
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
|
||||
struct NvKmsKapiCursorRequestedConfig old_config = *req_config;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(plane_state);
|
||||
|
||||
if (plane_state->fb == NULL) {
|
||||
cursor_req_config_disable(req_config);
|
||||
@@ -186,7 +214,6 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(plane_state);
|
||||
int ret = 0;
|
||||
|
||||
if (plane_state->fb == NULL) {
|
||||
plane_req_config_disable(req_config);
|
||||
@@ -309,6 +336,9 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
nv_plane->defaultCompositionMode;
|
||||
#endif
|
||||
|
||||
req_config->config.inputColorSpace =
|
||||
nv_drm_plane_state->input_colorspace;
|
||||
|
||||
req_config->config.syncptParams.preSyncptSpecified = false;
|
||||
req_config->config.syncptParams.postSyncptRequested = false;
|
||||
|
||||
@@ -320,10 +350,10 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
#if defined(NV_NVHOST_DMA_FENCE_UNPACK_PRESENT)
|
||||
if (plane_state->fence != NULL) {
|
||||
ret = nvhost_dma_fence_unpack(
|
||||
plane_state->fence,
|
||||
&req_config->config.syncptParams.preSyncptId,
|
||||
&req_config->config.syncptParams.preSyncptValue);
|
||||
int ret = nvhost_dma_fence_unpack(
|
||||
plane_state->fence,
|
||||
&req_config->config.syncptParams.preSyncptId,
|
||||
&req_config->config.syncptParams.preSyncptValue);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
@@ -333,12 +363,81 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
|
||||
if (nv_drm_plane_state->fd_user_ptr) {
|
||||
req_config->config.syncptParams.postSyncptRequested = true;
|
||||
}
|
||||
#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)
|
||||
if (plane_state->fence != NULL) {
|
||||
int ret = host1x_fence_extract(
|
||||
plane_state->fence,
|
||||
&req_config->config.syncptParams.preSyncptId,
|
||||
&req_config->config.syncptParams.preSyncptValue);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
req_config->config.syncptParams.preSyncptSpecified = true;
|
||||
}
|
||||
|
||||
if (nv_drm_plane_state->fd_user_ptr) {
|
||||
req_config->config.syncptParams.postSyncptRequested = true;
|
||||
}
|
||||
#else
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
if (nv_drm_plane_state->hdr_output_metadata != NULL) {
|
||||
struct hdr_output_metadata *hdr_metadata =
|
||||
nv_drm_plane_state->hdr_output_metadata->data;
|
||||
struct hdr_metadata_infoframe *info_frame =
|
||||
&hdr_metadata->hdmi_metadata_type1;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
uint32_t i;
|
||||
|
||||
if (hdr_metadata->metadata_type != HDMI_STATIC_METADATA_TYPE1) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Unsupported Metadata Type");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(info_frame->display_primaries); i ++) {
|
||||
req_config->config.hdrMetadata.displayPrimaries[i].x =
|
||||
info_frame->display_primaries[i].x;
|
||||
req_config->config.hdrMetadata.displayPrimaries[i].y =
|
||||
info_frame->display_primaries[i].y;
|
||||
}
|
||||
|
||||
req_config->config.hdrMetadata.whitePoint.x =
|
||||
info_frame->white_point.x;
|
||||
req_config->config.hdrMetadata.whitePoint.y =
|
||||
info_frame->white_point.y;
|
||||
req_config->config.hdrMetadata.maxDisplayMasteringLuminance =
|
||||
info_frame->max_display_mastering_luminance;
|
||||
req_config->config.hdrMetadata.minDisplayMasteringLuminance =
|
||||
info_frame->min_display_mastering_luminance;
|
||||
req_config->config.hdrMetadata.maxCLL =
|
||||
info_frame->max_cll;
|
||||
req_config->config.hdrMetadata.maxFALL =
|
||||
info_frame->max_fall;
|
||||
|
||||
req_config->config.hdrMetadataSpecified = true;
|
||||
|
||||
switch (info_frame->eotf) {
|
||||
case HDMI_EOTF_SMPTE_ST2084:
|
||||
req_config->config.tf = NVKMS_OUTPUT_TF_PQ;
|
||||
break;
|
||||
case HDMI_EOTF_TRADITIONAL_GAMMA_SDR:
|
||||
req_config->config.tf =
|
||||
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR;
|
||||
break;
|
||||
default:
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Unsupported EOTF");
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
req_config->config.hdrMetadataSpecified = false;
|
||||
req_config->config.tf = NVKMS_OUTPUT_TF_NONE;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Unconditionally mark the surface as changed, even if nothing changed,
|
||||
* so that we always get a flip event: a DRM client may flip with
|
||||
@@ -509,9 +608,21 @@ static int nv_drm_plane_atomic_set_property(
|
||||
nv_drm_plane_state->fd_user_ptr = u64_to_user_ptr(val);
|
||||
#endif
|
||||
return 0;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
} else if (property == nv_dev->nv_input_colorspace_property) {
|
||||
nv_drm_plane_state->input_colorspace = val;
|
||||
return 0;
|
||||
}
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
else if (property == nv_dev->nv_hdr_output_metadata_property) {
|
||||
return nv_drm_atomic_replace_property_blob_from_id(
|
||||
nv_dev->dev,
|
||||
&nv_drm_plane_state->hdr_output_metadata,
|
||||
val,
|
||||
sizeof(struct hdr_output_metadata));
|
||||
}
|
||||
#endif
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int nv_drm_plane_atomic_get_property(
|
||||
@@ -521,12 +632,26 @@ static int nv_drm_plane_atomic_get_property(
|
||||
uint64_t *val)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
const struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state_const(state);
|
||||
|
||||
if (property == nv_dev->nv_out_fence_property) {
|
||||
return 0;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
} else if (property == nv_dev->nv_input_colorspace_property) {
|
||||
*val = nv_drm_plane_state->input_colorspace;
|
||||
return 0;
|
||||
}
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
else if (property == nv_dev->nv_hdr_output_metadata_property) {
|
||||
const struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state_const(state);
|
||||
*val = nv_drm_plane_state->hdr_output_metadata ?
|
||||
nv_drm_plane_state->hdr_output_metadata->base.id : 0;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static struct drm_plane_state *
|
||||
@@ -544,6 +669,14 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
|
||||
__drm_atomic_helper_plane_duplicate_state(plane, &nv_plane_state->base);
|
||||
|
||||
nv_plane_state->fd_user_ptr = nv_old_plane_state->fd_user_ptr;
|
||||
nv_plane_state->input_colorspace = nv_old_plane_state->input_colorspace;
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
nv_plane_state->hdr_output_metadata = nv_old_plane_state->hdr_output_metadata;
|
||||
if (nv_plane_state->hdr_output_metadata) {
|
||||
drm_property_blob_get(nv_plane_state->hdr_output_metadata);
|
||||
}
|
||||
#endif
|
||||
|
||||
return &nv_plane_state->base;
|
||||
}
|
||||
@@ -557,6 +690,12 @@ static inline void __nv_drm_plane_atomic_destroy_state(
|
||||
#else
|
||||
__drm_atomic_helper_plane_destroy_state(state);
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(state);
|
||||
drm_property_blob_put(nv_drm_plane_state->hdr_output_metadata);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void nv_drm_plane_atomic_destroy_state(
|
||||
@@ -803,7 +942,8 @@ static const struct drm_crtc_helper_funcs nv_crtc_helper_funcs = {
|
||||
};
|
||||
|
||||
static void nv_drm_plane_install_properties(
|
||||
struct drm_plane *plane)
|
||||
struct drm_plane *plane,
|
||||
NvBool supportsHDR)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
|
||||
@@ -811,6 +951,19 @@ static void nv_drm_plane_install_properties(
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_out_fence_property, 0);
|
||||
}
|
||||
|
||||
if (nv_dev->nv_input_colorspace_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_input_colorspace_property,
|
||||
NVKMS_INPUT_COLORSPACE_NONE);
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
if (supportsHDR && nv_dev->nv_hdr_output_metadata_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_hdr_output_metadata_property, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -990,7 +1143,9 @@ nv_drm_plane_create(struct drm_device *dev,
|
||||
drm_plane_helper_add(plane, &nv_plane_helper_funcs);
|
||||
|
||||
if (plane_type != DRM_PLANE_TYPE_CURSOR) {
|
||||
nv_drm_plane_install_properties(plane);
|
||||
nv_drm_plane_install_properties(
|
||||
plane,
|
||||
pResInfo->supportsHDR[layer_idx]);
|
||||
}
|
||||
|
||||
__nv_drm_plane_create_alpha_blending_properties(
|
||||
@@ -1043,6 +1198,7 @@ static struct drm_crtc *__nv_drm_crtc_create(struct nv_drm_device *nv_dev,
|
||||
nv_crtc->head = head;
|
||||
INIT_LIST_HEAD(&nv_crtc->flip_list);
|
||||
spin_lock_init(&nv_crtc->flip_list_lock);
|
||||
nv_crtc->modeset_permission_filep = NULL;
|
||||
|
||||
ret = drm_crtc_init_with_planes(nv_dev->dev,
|
||||
&nv_crtc->base,
|
||||
@@ -1141,11 +1297,13 @@ void nv_drm_enumerate_crtcs_and_planes(
|
||||
}
|
||||
|
||||
for (layer = 0; layer < pResInfo->numLayers[i]; layer++) {
|
||||
struct drm_plane *overlay_plane = NULL;
|
||||
|
||||
if (layer == NVKMS_KAPI_LAYER_PRIMARY_IDX) {
|
||||
continue;
|
||||
}
|
||||
|
||||
struct drm_plane *overlay_plane =
|
||||
overlay_plane =
|
||||
nv_drm_plane_create(nv_dev->dev,
|
||||
DRM_PLANE_TYPE_OVERLAY,
|
||||
layer,
|
||||
@@ -1189,7 +1347,7 @@ int nv_drm_get_crtc_crc32_v2_ioctl(struct drm_device *dev,
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
crtc = nv_drm_crtc_find(dev, params->crtc_id);
|
||||
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
|
||||
if (!crtc) {
|
||||
return -ENOENT;
|
||||
}
|
||||
@@ -1217,7 +1375,7 @@ int nv_drm_get_crtc_crc32_ioctl(struct drm_device *dev,
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
crtc = nv_drm_crtc_find(dev, params->crtc_id);
|
||||
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
|
||||
if (!crtc) {
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -35,38 +35,9 @@
|
||||
|
||||
#include <drm/drm_crtc.h>
|
||||
|
||||
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE) || defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_ROTATE_* , DRM_REFLECT_* */
|
||||
#include <drm/drm_blend.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* */
|
||||
#include <uapi/drm/drm_mode.h>
|
||||
#endif
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvkms-kapi.h"
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/*
|
||||
* 19-05-2017 c2c446ad29437bb92b157423c632286608ebd3ec has added
|
||||
* DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* to UAPI and removed
|
||||
* DRM_ROTATE_* and DRM_MODE_REFLECT_*
|
||||
*/
|
||||
#if !defined(DRM_MODE_ROTATE_0)
|
||||
#define DRM_MODE_ROTATE_0 DRM_ROTATE_0
|
||||
#define DRM_MODE_ROTATE_90 DRM_ROTATE_90
|
||||
#define DRM_MODE_ROTATE_180 DRM_ROTATE_180
|
||||
#define DRM_MODE_ROTATE_270 DRM_ROTATE_270
|
||||
#define DRM_MODE_REFLECT_X DRM_REFLECT_X
|
||||
#define DRM_MODE_REFLECT_Y DRM_REFLECT_Y
|
||||
#define DRM_MODE_ROTATE_MASK DRM_ROTATE_MASK
|
||||
#define DRM_MODE_REFLECT_MASK DRM_REFLECT_MASK
|
||||
#endif
|
||||
|
||||
#endif //NV_DRM_ROTATION_AVAILABLE
|
||||
|
||||
struct nv_drm_crtc {
|
||||
NvU32 head;
|
||||
|
||||
@@ -85,6 +56,13 @@ struct nv_drm_crtc {
|
||||
*/
|
||||
spinlock_t flip_list_lock;
|
||||
|
||||
/**
|
||||
* @modeset_permission_filep:
|
||||
*
|
||||
* The filep using this crtc with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
|
||||
*/
|
||||
struct drm_file *modeset_permission_filep;
|
||||
|
||||
struct drm_crtc base;
|
||||
};
|
||||
|
||||
@@ -205,6 +183,10 @@ static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
|
||||
struct nv_drm_plane_state {
|
||||
struct drm_plane_state base;
|
||||
s32 __user *fd_user_ptr;
|
||||
enum NvKmsInputColorSpace input_colorspace;
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct drm_property_blob *hdr_output_metadata;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_state *state)
|
||||
@@ -212,6 +194,11 @@ static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_
|
||||
return container_of(state, struct nv_drm_plane_state, base);
|
||||
}
|
||||
|
||||
static inline const struct nv_drm_plane_state *to_nv_drm_plane_state_const(const struct drm_plane_state *state)
|
||||
{
|
||||
return container_of(state, const struct nv_drm_plane_state, base);
|
||||
}
|
||||
|
||||
static inline struct nv_drm_crtc *to_nv_crtc(struct drm_crtc *crtc)
|
||||
{
|
||||
if (crtc == NULL) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -30,7 +30,7 @@
|
||||
#include "nvidia-drm-connector.h"
|
||||
#include "nvidia-drm-gem.h"
|
||||
#include "nvidia-drm-crtc.h"
|
||||
#include "nvidia-drm-prime-fence.h"
|
||||
#include "nvidia-drm-fence.h"
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-gem-nvkms-memory.h"
|
||||
#include "nvidia-drm-gem-user-memory.h"
|
||||
@@ -86,6 +86,23 @@
|
||||
|
||||
static struct nv_drm_device *dev_list = NULL;
|
||||
|
||||
static const char* nv_get_input_colorspace_name(
|
||||
enum NvKmsInputColorSpace colorSpace)
|
||||
{
|
||||
switch (colorSpace) {
|
||||
case NVKMS_INPUT_COLORSPACE_NONE:
|
||||
return "None";
|
||||
case NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR:
|
||||
return "IEC 61966-2-2 linear FP";
|
||||
case NVKMS_INPUT_COLORSPACE_BT2100_PQ:
|
||||
return "ITU-R BT.2100-PQ YCbCr";
|
||||
default:
|
||||
/* We shoudn't hit this */
|
||||
WARN_ON("Unsupported input colorspace");
|
||||
return "None";
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
|
||||
static void nv_drm_output_poll_changed(struct drm_device *dev)
|
||||
@@ -240,10 +257,6 @@ nv_drm_init_mode_config(struct nv_drm_device *nv_dev,
|
||||
dev->mode_config.preferred_depth = 24;
|
||||
dev->mode_config.prefer_shadow = 1;
|
||||
|
||||
/* Currently unused. Update when needed. */
|
||||
|
||||
dev->mode_config.fb_base = 0;
|
||||
|
||||
#if defined(NV_DRM_CRTC_STATE_HAS_ASYNC_FLIP) || \
|
||||
defined(NV_DRM_CRTC_STATE_HAS_PAGEFLIP_FLAGS)
|
||||
dev->mode_config.async_page_flip = true;
|
||||
@@ -332,6 +345,15 @@ static void nv_drm_enumerate_encoders_and_connectors
|
||||
*/
|
||||
static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
{
|
||||
struct drm_prop_enum_list enum_list[3] = { };
|
||||
int i, len = 0;
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
enum_list[len].type = i;
|
||||
enum_list[len].name = nv_get_input_colorspace_name(i);
|
||||
len++;
|
||||
}
|
||||
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
if (!nv_dev->supportsSyncpts) {
|
||||
return 0;
|
||||
@@ -345,6 +367,23 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
}
|
||||
#endif
|
||||
|
||||
nv_dev->nv_input_colorspace_property =
|
||||
drm_property_create_enum(nv_dev->dev, 0, "NV_INPUT_COLORSPACE",
|
||||
enum_list, len);
|
||||
if (nv_dev->nv_input_colorspace_property == NULL) {
|
||||
NV_DRM_LOG_ERR("Failed to create NV_INPUT_COLORSPACE property");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
nv_dev->nv_hdr_output_metadata_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_HDR_STATIC_METADATA", 0);
|
||||
if (nv_dev->nv_hdr_output_metadata_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -667,6 +706,16 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_drm_dmabuf_supported_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
/* check the pDevice since this only gets set if modeset = 1
|
||||
* which is a requirement for the dma_buf extension to work
|
||||
*/
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
return nv_dev->pDevice ? 0 : -EINVAL;
|
||||
}
|
||||
|
||||
static
|
||||
int nv_drm_get_client_capability_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
@@ -696,6 +745,455 @@ int nv_drm_get_client_capability_ioctl(struct drm_device *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
static bool nv_drm_connector_is_dpy_id(struct drm_connector *connector,
|
||||
NvU32 dpyId)
|
||||
{
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
return nv_connector->nv_detected_encoder &&
|
||||
nv_connector->nv_detected_encoder->hDisplay == dpyId;
|
||||
}
|
||||
|
||||
static int nv_drm_get_dpy_id_for_connector_id_ioctl(struct drm_device *dev,
|
||||
void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_get_dpy_id_for_connector_id_params *params = data;
|
||||
// Importantly, drm_connector_lookup (with filep) will only return the
|
||||
// connector if we are master, a lessee with the connector, or not master at
|
||||
// all. It will return NULL if we are a lessee with other connectors.
|
||||
struct drm_connector *connector =
|
||||
nv_drm_connector_lookup(dev, filep, params->connectorId);
|
||||
struct nv_drm_connector *nv_connector;
|
||||
int ret = 0;
|
||||
|
||||
if (!connector) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nv_connector = to_nv_connector(connector);
|
||||
if (!nv_connector) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!nv_connector->nv_detected_encoder) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
params->dpyId = nv_connector->nv_detected_encoder->hDisplay;
|
||||
|
||||
done:
|
||||
nv_drm_connector_put(connector);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nv_drm_get_connector_id_for_dpy_id_ioctl(struct drm_device *dev,
|
||||
void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_get_connector_id_for_dpy_id_params *params = data;
|
||||
struct drm_connector *connector;
|
||||
int ret = -EINVAL;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
|
||||
/* Lookup for existing connector with same dpyId */
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
if (nv_drm_connector_is_dpy_id(connector, params->dpyId)) {
|
||||
params->connectorId = connector->base.id;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static NvU32 nv_drm_get_head_bit_from_connector(struct drm_connector *connector)
|
||||
{
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
|
||||
if (connector->state && connector->state->crtc) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(connector->state->crtc);
|
||||
return NVBIT(nv_crtc->head);
|
||||
} else if (nv_connector->nv_detected_encoder &&
|
||||
nv_connector->nv_detected_encoder->base.crtc) {
|
||||
struct nv_drm_crtc *nv_crtc =
|
||||
to_nv_crtc(nv_connector->nv_detected_encoder->base.crtc);
|
||||
return NVBIT(nv_crtc->head);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_drm_grant_permission_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_grant_permissions_params *params = data;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct nv_drm_connector *target_nv_connector = NULL;
|
||||
struct nv_drm_crtc *target_nv_crtc = NULL;
|
||||
struct drm_connector *connector, *target_connector = NULL;
|
||||
struct drm_crtc *crtc;
|
||||
NvU32 head = 0, freeHeadBits, targetHeadBit, possible_crtcs;
|
||||
int ret = 0;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
#endif
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
struct drm_modeset_acquire_ctx ctx;
|
||||
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
|
||||
ret);
|
||||
#else
|
||||
mutex_lock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
/* Get the connector for the dpyId. */
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
if (nv_drm_connector_is_dpy_id(connector, params->dpyId)) {
|
||||
target_connector =
|
||||
nv_drm_connector_lookup(dev, filep, connector->base.id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
// Importantly, drm_connector_lookup/drm_crtc_find (with filep) will only
|
||||
// return the object if we are master, a lessee with the object, or not
|
||||
// master at all. It will return NULL if we are a lessee with other objects.
|
||||
if (!target_connector) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
target_nv_connector = to_nv_connector(target_connector);
|
||||
possible_crtcs =
|
||||
target_nv_connector->nv_detected_encoder->base.possible_crtcs;
|
||||
|
||||
/* Target connector must not be previously granted. */
|
||||
if (target_nv_connector->modeset_permission_filep) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Add all heads that are owned and not already granted. */
|
||||
freeHeadBits = 0;
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (nv_drm_crtc_find(dev, filep, crtc->base.id) &&
|
||||
!nv_crtc->modeset_permission_filep &&
|
||||
(drm_crtc_mask(crtc) & possible_crtcs)) {
|
||||
freeHeadBits |= NVBIT(nv_crtc->head);
|
||||
}
|
||||
}
|
||||
|
||||
targetHeadBit = nv_drm_get_head_bit_from_connector(target_connector);
|
||||
if (targetHeadBit & freeHeadBits) {
|
||||
/* If a crtc is already being used by this connector, use it. */
|
||||
freeHeadBits = targetHeadBit;
|
||||
} else {
|
||||
/* Otherwise, remove heads that are in use by other connectors. */
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
freeHeadBits &= ~nv_drm_get_head_bit_from_connector(connector);
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Fail if no heads are available. */
|
||||
if (!freeHeadBits) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop through the crtc again and find a matching head.
|
||||
* Record the filep that is using the crtc and the connector.
|
||||
*/
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (freeHeadBits & NVBIT(nv_crtc->head)) {
|
||||
target_nv_crtc = nv_crtc;
|
||||
head = nv_crtc->head;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!nvKms->grantPermissions(params->fd, nv_dev->pDevice, head,
|
||||
params->dpyId)) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
target_nv_connector->modeset_permission_crtc = target_nv_crtc;
|
||||
target_nv_connector->modeset_permission_filep = filep;
|
||||
target_nv_crtc->modeset_permission_filep = filep;
|
||||
|
||||
done:
|
||||
if (target_connector) {
|
||||
nv_drm_connector_put(target_connector);
|
||||
}
|
||||
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
|
||||
#else
|
||||
mutex_unlock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool nv_drm_revoke_connector(struct nv_drm_device *nv_dev,
|
||||
struct nv_drm_connector *nv_connector)
|
||||
{
|
||||
bool ret = true;
|
||||
if (nv_connector->modeset_permission_crtc) {
|
||||
if (nv_connector->nv_detected_encoder) {
|
||||
ret = nvKms->revokePermissions(
|
||||
nv_dev->pDevice, nv_connector->modeset_permission_crtc->head,
|
||||
nv_connector->nv_detected_encoder->hDisplay);
|
||||
}
|
||||
nv_connector->modeset_permission_crtc->modeset_permission_filep = NULL;
|
||||
nv_connector->modeset_permission_crtc = NULL;
|
||||
}
|
||||
nv_connector->modeset_permission_filep = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nv_drm_revoke_permission(struct drm_device *dev,
|
||||
struct drm_file *filep, NvU32 dpyId)
|
||||
{
|
||||
struct drm_connector *connector;
|
||||
struct drm_crtc *crtc;
|
||||
int ret = 0;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
#endif
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
struct drm_modeset_acquire_ctx ctx;
|
||||
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
|
||||
ret);
|
||||
#else
|
||||
mutex_lock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If dpyId is set, only revoke those specific resources. Otherwise,
|
||||
* it is from closing the file so revoke all resources for that filep.
|
||||
*/
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
if (nv_connector->modeset_permission_filep == filep &&
|
||||
(!dpyId || nv_drm_connector_is_dpy_id(connector, dpyId))) {
|
||||
if (!nv_drm_connector_revoke_permissions(dev, nv_connector)) {
|
||||
ret = -EINVAL;
|
||||
// Continue trying to revoke as much as possible.
|
||||
}
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (nv_crtc->modeset_permission_filep == filep && !dpyId) {
|
||||
nv_crtc->modeset_permission_filep = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
|
||||
#else
|
||||
mutex_unlock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nv_drm_revoke_permission_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_revoke_permissions_params *params = data;
|
||||
if (!params->dpyId) {
|
||||
return -EINVAL;
|
||||
}
|
||||
return nv_drm_revoke_permission(dev, filep, params->dpyId);
|
||||
}
|
||||
|
||||
static void nv_drm_postclose(struct drm_device *dev, struct drm_file *filep)
|
||||
{
|
||||
/*
|
||||
* Some systems like android can reach here without initializing the
|
||||
* device, so check for that.
|
||||
*/
|
||||
if (dev->mode_config.num_crtc > 0 &&
|
||||
dev->mode_config.crtc_list.next != NULL &&
|
||||
dev->mode_config.crtc_list.prev != NULL &&
|
||||
dev->mode_config.num_connector > 0 &&
|
||||
dev->mode_config.connector_list.next != NULL &&
|
||||
dev->mode_config.connector_list.prev != NULL) {
|
||||
nv_drm_revoke_permission(dev, filep, 0);
|
||||
}
|
||||
}
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
|
||||
#if defined(NV_DRM_MASTER_HAS_LEASES)
|
||||
static struct drm_master *nv_drm_find_lessee(struct drm_master *master,
|
||||
int lessee_id)
|
||||
{
|
||||
int object;
|
||||
void *entry;
|
||||
|
||||
while (master->lessor != NULL) {
|
||||
master = master->lessor;
|
||||
}
|
||||
|
||||
idr_for_each_entry(&master->lessee_idr, entry, object)
|
||||
{
|
||||
if (object == lessee_id) {
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void nv_drm_get_revoked_objects(struct drm_device *dev,
|
||||
struct drm_file *filep, unsigned int cmd,
|
||||
unsigned long arg, int **objects,
|
||||
int *objects_count)
|
||||
{
|
||||
unsigned int ioc_size;
|
||||
struct drm_mode_revoke_lease revoke_lease;
|
||||
struct drm_master *lessor, *lessee;
|
||||
void *entry;
|
||||
int *objs;
|
||||
int obj, obj_count, obj_i;
|
||||
|
||||
ioc_size = _IOC_SIZE(cmd);
|
||||
if (ioc_size > sizeof(revoke_lease)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (copy_from_user(&revoke_lease, (void __user *)arg, ioc_size) != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
lessor = nv_drm_file_get_master(filep);
|
||||
if (lessor == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&dev->mode_config.idr_mutex);
|
||||
lessee = nv_drm_find_lessee(lessor, revoke_lease.lessee_id);
|
||||
|
||||
if (lessee == NULL) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
obj_count = 0;
|
||||
idr_for_each_entry(&lessee->leases, entry, obj) {
|
||||
++obj_count;
|
||||
}
|
||||
if (obj_count == 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
objs = nv_drm_calloc(obj_count, sizeof(int));
|
||||
if (objs == NULL) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
obj_i = 0;
|
||||
idr_for_each_entry(&lessee->leases, entry, obj) {
|
||||
objs[obj_i++] = obj;
|
||||
}
|
||||
*objects = objs;
|
||||
*objects_count = obj_count;
|
||||
|
||||
done:
|
||||
mutex_unlock(&dev->mode_config.idr_mutex);
|
||||
drm_master_put(&lessor);
|
||||
}
|
||||
|
||||
static bool nv_drm_is_in_objects(int object, int *objects, int objects_count)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < objects_count; ++i) {
|
||||
if (objects[i] == object) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void nv_drm_finish_revoking_objects(struct drm_device *dev,
|
||||
struct drm_file *filep, int *objects,
|
||||
int objects_count)
|
||||
{
|
||||
struct drm_connector *connector;
|
||||
struct drm_crtc *crtc;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
#endif
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
int ret = 0;
|
||||
struct drm_modeset_acquire_ctx ctx;
|
||||
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
|
||||
ret);
|
||||
#else
|
||||
mutex_lock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
if (nv_connector->modeset_permission_filep &&
|
||||
nv_drm_is_in_objects(connector->base.id, objects, objects_count)) {
|
||||
nv_drm_connector_revoke_permissions(dev, nv_connector);
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (nv_crtc->modeset_permission_filep &&
|
||||
nv_drm_is_in_objects(crtc->base.id, objects, objects_count)) {
|
||||
nv_crtc->modeset_permission_filep = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
|
||||
#else
|
||||
mutex_unlock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
}
|
||||
#endif /* NV_DRM_MASTER_HAS_LEASES */
|
||||
|
||||
#if defined(NV_DRM_BUS_PRESENT)
|
||||
|
||||
#if defined(NV_DRM_BUS_HAS_GET_IRQ)
|
||||
@@ -727,12 +1225,50 @@ static struct drm_bus nv_drm_bus = {
|
||||
|
||||
#endif /* NV_DRM_BUS_PRESENT */
|
||||
|
||||
/*
|
||||
* Wrapper around drm_ioctl to hook in to upstream ioctl.
|
||||
*
|
||||
* Currently used to add additional handling to REVOKE_LEASE.
|
||||
*/
|
||||
static long nv_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
long retcode;
|
||||
|
||||
#if defined(NV_DRM_MASTER_HAS_LEASES)
|
||||
struct drm_file *file_priv = filp->private_data;
|
||||
struct drm_device *dev = file_priv->minor->dev;
|
||||
int *objects = NULL;
|
||||
int objects_count = 0;
|
||||
|
||||
if (cmd == DRM_IOCTL_MODE_REVOKE_LEASE) {
|
||||
// Save the revoked objects before revoking.
|
||||
nv_drm_get_revoked_objects(dev, file_priv, cmd, arg, &objects,
|
||||
&objects_count);
|
||||
}
|
||||
#endif
|
||||
|
||||
retcode = drm_ioctl(filp, cmd, arg);
|
||||
|
||||
#if defined(NV_DRM_MASTER_HAS_LEASES)
|
||||
if (cmd == DRM_IOCTL_MODE_REVOKE_LEASE && objects) {
|
||||
if (retcode == 0) {
|
||||
// If revoking was successful, finish revoking the objects.
|
||||
nv_drm_finish_revoking_objects(dev, file_priv, objects,
|
||||
objects_count);
|
||||
}
|
||||
nv_drm_free(objects);
|
||||
}
|
||||
#endif
|
||||
|
||||
return retcode;
|
||||
}
|
||||
|
||||
static const struct file_operations nv_drm_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
|
||||
.open = drm_open,
|
||||
.release = drm_release,
|
||||
.unlocked_ioctl = drm_ioctl,
|
||||
.unlocked_ioctl = nv_drm_ioctl,
|
||||
#if defined(CONFIG_COMPAT)
|
||||
.compat_ioctl = drm_compat_ioctl,
|
||||
#endif
|
||||
@@ -768,11 +1304,11 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_SUPPORTED,
|
||||
nv_drm_fence_supported_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_CONTEXT_CREATE,
|
||||
nv_drm_fence_context_create_ioctl,
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_PRIME_FENCE_CONTEXT_CREATE,
|
||||
nv_drm_prime_fence_context_create_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_FENCE_ATTACH,
|
||||
nv_drm_gem_fence_attach_ioctl,
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_PRIME_FENCE_ATTACH,
|
||||
nv_drm_gem_prime_fence_attach_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
#endif
|
||||
|
||||
@@ -798,6 +1334,21 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_IDENTIFY_OBJECT,
|
||||
nv_drm_gem_identify_object_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_DMABUF_SUPPORTED,
|
||||
nv_drm_dmabuf_supported_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID,
|
||||
nv_drm_get_dpy_id_for_connector_id_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID,
|
||||
nv_drm_get_connector_id_for_dpy_id_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GRANT_PERMISSIONS,
|
||||
nv_drm_grant_permission_ioctl,
|
||||
DRM_UNLOCKED|DRM_MASTER),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_REVOKE_PERMISSIONS,
|
||||
nv_drm_revoke_permission_ioctl,
|
||||
DRM_UNLOCKED|DRM_MASTER),
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
};
|
||||
|
||||
@@ -816,8 +1367,23 @@ static struct drm_driver nv_drm_driver = {
|
||||
.ioctls = nv_drm_ioctls,
|
||||
.num_ioctls = ARRAY_SIZE(nv_drm_ioctls),
|
||||
|
||||
/*
|
||||
* linux-next commit 71a7974ac701 ("drm/prime: Unexport helpers for fd/handle
|
||||
* conversion") unexports drm_gem_prime_handle_to_fd() and
|
||||
* drm_gem_prime_fd_to_handle().
|
||||
*
|
||||
* Prior linux-next commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for
|
||||
* all drivers") made these helpers the default when .prime_handle_to_fd /
|
||||
* .prime_fd_to_handle are unspecified, so it's fine to just skip specifying
|
||||
* them if the helpers aren't present.
|
||||
*/
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_handle_to_fd
|
||||
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
|
||||
#endif
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_fd_to_handle
|
||||
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
|
||||
#endif
|
||||
|
||||
.gem_prime_import = nv_drm_gem_prime_import,
|
||||
.gem_prime_import_sg_table = nv_drm_gem_prime_import_sg_table,
|
||||
|
||||
@@ -840,6 +1406,9 @@ static struct drm_driver nv_drm_driver = {
|
||||
|
||||
.load = nv_drm_load,
|
||||
.unload = nv_drm_unload,
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
.postclose = nv_drm_postclose,
|
||||
#endif
|
||||
|
||||
.fops = &nv_drm_fops,
|
||||
|
||||
@@ -882,7 +1451,9 @@ static void nv_drm_update_drm_driver_features(void)
|
||||
|
||||
nv_drm_driver.dumb_create = nv_drm_dumb_create;
|
||||
nv_drm_driver.dumb_map_offset = nv_drm_dumb_map_offset;
|
||||
#if defined(NV_DRM_DRIVER_HAS_DUMB_DESTROY)
|
||||
nv_drm_driver.dumb_destroy = nv_drm_dumb_destroy;
|
||||
#endif /* NV_DRM_DRIVER_HAS_DUMB_DESTROY */
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
}
|
||||
|
||||
|
||||
@@ -205,7 +205,7 @@ nv_drm_add_encoder(struct drm_device *dev, NvKmsKapiDisplay hDisplay)
|
||||
encoder = nv_drm_encoder_new(dev,
|
||||
displayInfo->handle,
|
||||
connectorInfo->signalFormat,
|
||||
get_crtc_mask(dev, connectorInfo->headMask));
|
||||
get_crtc_mask(dev, displayInfo->headMask));
|
||||
|
||||
if (IS_ERR(encoder)) {
|
||||
ret = PTR_ERR(encoder);
|
||||
|
||||
@@ -150,6 +150,14 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(nv_fb->nv_gem); i++) {
|
||||
if (nv_fb->nv_gem[i] != NULL) {
|
||||
if (!nvKms->isMemoryValidForDisplay(nv_dev->pDevice,
|
||||
nv_fb->nv_gem[i]->pMemory)) {
|
||||
NV_DRM_DEV_LOG_INFO(
|
||||
nv_dev,
|
||||
"Framebuffer memory not appropriate for scanout");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
params.planes[i].memory = nv_fb->nv_gem[i]->pMemory;
|
||||
params.planes[i].offset = nv_fb->base.offsets[i];
|
||||
params.planes[i].pitch = nv_fb->base.pitches[i];
|
||||
@@ -164,6 +172,17 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
|
||||
params.layout = (modifier & 0x10) ?
|
||||
NvKmsSurfaceMemoryLayoutBlockLinear :
|
||||
NvKmsSurfaceMemoryLayoutPitch;
|
||||
|
||||
// See definition of DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D, we are testing
|
||||
// 'c', the lossless compression field of the modifier
|
||||
if (params.layout == NvKmsSurfaceMemoryLayoutBlockLinear &&
|
||||
(modifier >> 23) & 0x7) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Cannot create FB from compressible surface allocation");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
params.log2GobsPerBlockY = modifier & 0xf;
|
||||
} else {
|
||||
params.explicit_layout = false;
|
||||
@@ -174,11 +193,14 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
|
||||
nv_fb->pSurface = nvKms->createSurface(nv_dev->pDevice, ¶ms);
|
||||
if (nv_fb->pSurface == NULL) {
|
||||
NV_DRM_DEV_DEBUG_DRIVER(nv_dev, "Failed to create NvKmsKapiSurface");
|
||||
drm_framebuffer_cleanup(&nv_fb->base);
|
||||
return -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
drm_framebuffer_cleanup(&nv_fb->base);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
struct drm_framebuffer *nv_drm_internal_framebuffer_create(
|
||||
|
||||
@@ -31,17 +31,28 @@
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nvidia-drm-gem.h"
|
||||
#include "nvidia-drm-prime-fence.h"
|
||||
#include "nvidia-drm-fence.h"
|
||||
#include "nvidia-dma-resv-helper.h"
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
|
||||
#include "nvidia-dma-fence-helper.h"
|
||||
|
||||
struct nv_drm_fence_context {
|
||||
struct nv_drm_device *nv_dev;
|
||||
struct nv_drm_fence_context;
|
||||
|
||||
struct nv_drm_fence_context_ops {
|
||||
void (*destroy)(struct nv_drm_fence_context *nv_fence_context);
|
||||
};
|
||||
|
||||
struct nv_drm_fence_context {
|
||||
const struct nv_drm_fence_context_ops *ops;
|
||||
|
||||
struct nv_drm_device *nv_dev;
|
||||
uint32_t context;
|
||||
};
|
||||
|
||||
struct nv_drm_prime_fence_context {
|
||||
struct nv_drm_fence_context base;
|
||||
|
||||
NvU64 fenceSemIndex; /* Index into semaphore surface */
|
||||
|
||||
@@ -53,10 +64,10 @@ struct nv_drm_fence_context {
|
||||
spinlock_t lock;
|
||||
|
||||
/*
|
||||
* Software signaling structures. __nv_drm_fence_context_new()
|
||||
* allocates channel event and __nv_drm_fence_context_destroy() frees it.
|
||||
* There are no simultaneous read/write access to 'cb', therefore it does
|
||||
* not require spin-lock protection.
|
||||
* Software signaling structures. __nv_drm_prime_fence_context_new()
|
||||
* allocates channel event and __nv_drm_prime_fence_context_destroy() frees
|
||||
* it. There are no simultaneous read/write access to 'cb', therefore it
|
||||
* does not require spin-lock protection.
|
||||
*/
|
||||
struct NvKmsKapiChannelEvent *cb;
|
||||
|
||||
@@ -79,7 +90,7 @@ struct nv_drm_prime_fence *to_nv_drm_prime_fence(nv_dma_fence_t *fence)
|
||||
}
|
||||
|
||||
static const char*
|
||||
nv_drm_gem_prime_fence_op_get_driver_name(nv_dma_fence_t *fence)
|
||||
nv_drm_gem_fence_op_get_driver_name(nv_dma_fence_t *fence)
|
||||
{
|
||||
return "NVIDIA";
|
||||
}
|
||||
@@ -122,7 +133,7 @@ nv_drm_gem_prime_fence_op_wait(nv_dma_fence_t *fence,
|
||||
}
|
||||
|
||||
static const nv_dma_fence_ops_t nv_drm_gem_prime_fence_ops = {
|
||||
.get_driver_name = nv_drm_gem_prime_fence_op_get_driver_name,
|
||||
.get_driver_name = nv_drm_gem_fence_op_get_driver_name,
|
||||
.get_timeline_name = nv_drm_gem_prime_fence_op_get_timeline_name,
|
||||
.enable_signaling = nv_drm_gem_prime_fence_op_enable_signaling,
|
||||
.release = nv_drm_gem_prime_fence_op_release,
|
||||
@@ -138,7 +149,7 @@ __nv_drm_prime_fence_signal(struct nv_drm_prime_fence *nv_fence)
|
||||
}
|
||||
|
||||
static void nv_drm_gem_prime_force_fence_signal(
|
||||
struct nv_drm_fence_context *nv_fence_context)
|
||||
struct nv_drm_prime_fence_context *nv_fence_context)
|
||||
{
|
||||
WARN_ON(!spin_is_locked(&nv_fence_context->lock));
|
||||
|
||||
@@ -158,7 +169,7 @@ static void nv_drm_gem_prime_fence_event
|
||||
NvU32 dataU32
|
||||
)
|
||||
{
|
||||
struct nv_drm_fence_context *nv_fence_context = dataPtr;
|
||||
struct nv_drm_prime_fence_context *nv_fence_context = dataPtr;
|
||||
|
||||
spin_lock(&nv_fence_context->lock);
|
||||
|
||||
@@ -187,11 +198,53 @@ static void nv_drm_gem_prime_fence_event
|
||||
spin_unlock(&nv_fence_context->lock);
|
||||
}
|
||||
|
||||
static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
struct nv_drm_device *nv_dev,
|
||||
struct drm_nvidia_fence_context_create_params *p)
|
||||
static inline struct nv_drm_prime_fence_context*
|
||||
to_prime_fence_context(struct nv_drm_fence_context *nv_fence_context) {
|
||||
return (struct nv_drm_prime_fence_context *)nv_fence_context;
|
||||
}
|
||||
|
||||
static void __nv_drm_prime_fence_context_destroy(
|
||||
struct nv_drm_fence_context *nv_fence_context)
|
||||
{
|
||||
struct nv_drm_fence_context *nv_fence_context;
|
||||
struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context =
|
||||
to_prime_fence_context(nv_fence_context);
|
||||
|
||||
/*
|
||||
* Free channel event before destroying the fence context, otherwise event
|
||||
* callback continue to get called.
|
||||
*/
|
||||
nvKms->freeChannelEvent(nv_dev->pDevice, nv_prime_fence_context->cb);
|
||||
|
||||
/* Force signal all pending fences and empty pending list */
|
||||
spin_lock(&nv_prime_fence_context->lock);
|
||||
|
||||
nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);
|
||||
|
||||
spin_unlock(&nv_prime_fence_context->lock);
|
||||
|
||||
/* Free nvkms resources */
|
||||
|
||||
nvKms->unmapMemory(nv_dev->pDevice,
|
||||
nv_prime_fence_context->pSemSurface,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
|
||||
(void *) nv_prime_fence_context->pLinearAddress);
|
||||
|
||||
nvKms->freeMemory(nv_dev->pDevice, nv_prime_fence_context->pSemSurface);
|
||||
|
||||
nv_drm_free(nv_fence_context);
|
||||
}
|
||||
|
||||
static struct nv_drm_fence_context_ops nv_drm_prime_fence_context_ops = {
|
||||
.destroy = __nv_drm_prime_fence_context_destroy,
|
||||
};
|
||||
|
||||
static inline struct nv_drm_prime_fence_context *
|
||||
__nv_drm_prime_fence_context_new(
|
||||
struct nv_drm_device *nv_dev,
|
||||
struct drm_nvidia_prime_fence_context_create_params *p)
|
||||
{
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context;
|
||||
struct NvKmsKapiMemory *pSemSurface;
|
||||
NvU32 *pLinearAddress;
|
||||
|
||||
@@ -225,9 +278,9 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
* event for it.
|
||||
*/
|
||||
|
||||
if ((nv_fence_context = nv_drm_calloc(
|
||||
if ((nv_prime_fence_context = nv_drm_calloc(
|
||||
1,
|
||||
sizeof(*nv_fence_context))) == NULL) {
|
||||
sizeof(*nv_prime_fence_context))) == NULL) {
|
||||
goto failed_alloc_fence_context;
|
||||
}
|
||||
|
||||
@@ -236,17 +289,18 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
* to check a return value.
|
||||
*/
|
||||
|
||||
*nv_fence_context = (struct nv_drm_fence_context) {
|
||||
.nv_dev = nv_dev,
|
||||
.context = nv_dma_fence_context_alloc(1),
|
||||
*nv_prime_fence_context = (struct nv_drm_prime_fence_context) {
|
||||
.base.ops = &nv_drm_prime_fence_context_ops,
|
||||
.base.nv_dev = nv_dev,
|
||||
.base.context = nv_dma_fence_context_alloc(1),
|
||||
.pSemSurface = pSemSurface,
|
||||
.pLinearAddress = pLinearAddress,
|
||||
.fenceSemIndex = p->index,
|
||||
};
|
||||
|
||||
INIT_LIST_HEAD(&nv_fence_context->pending);
|
||||
INIT_LIST_HEAD(&nv_prime_fence_context->pending);
|
||||
|
||||
spin_lock_init(&nv_fence_context->lock);
|
||||
spin_lock_init(&nv_prime_fence_context->lock);
|
||||
|
||||
/*
|
||||
* Except 'cb', the fence context should be completely initialized
|
||||
@@ -256,22 +310,22 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
* There are no simultaneous read/write access to 'cb', therefore it does
|
||||
* not require spin-lock protection.
|
||||
*/
|
||||
nv_fence_context->cb =
|
||||
nv_prime_fence_context->cb =
|
||||
nvKms->allocateChannelEvent(nv_dev->pDevice,
|
||||
nv_drm_gem_prime_fence_event,
|
||||
nv_fence_context,
|
||||
nv_prime_fence_context,
|
||||
p->event_nvkms_params_ptr,
|
||||
p->event_nvkms_params_size);
|
||||
if (!nv_fence_context->cb) {
|
||||
if (!nv_prime_fence_context->cb) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev,
|
||||
"Failed to allocate fence signaling event");
|
||||
goto failed_to_allocate_channel_event;
|
||||
}
|
||||
|
||||
return nv_fence_context;
|
||||
return nv_prime_fence_context;
|
||||
|
||||
failed_to_allocate_channel_event:
|
||||
nv_drm_free(nv_fence_context);
|
||||
nv_drm_free(nv_prime_fence_context);
|
||||
|
||||
failed_alloc_fence_context:
|
||||
|
||||
@@ -287,38 +341,8 @@ failed:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void __nv_drm_fence_context_destroy(
|
||||
struct nv_drm_fence_context *nv_fence_context)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
|
||||
|
||||
/*
|
||||
* Free channel event before destroying the fence context, otherwise event
|
||||
* callback continue to get called.
|
||||
*/
|
||||
nvKms->freeChannelEvent(nv_dev->pDevice, nv_fence_context->cb);
|
||||
|
||||
/* Force signal all pending fences and empty pending list */
|
||||
spin_lock(&nv_fence_context->lock);
|
||||
|
||||
nv_drm_gem_prime_force_fence_signal(nv_fence_context);
|
||||
|
||||
spin_unlock(&nv_fence_context->lock);
|
||||
|
||||
/* Free nvkms resources */
|
||||
|
||||
nvKms->unmapMemory(nv_dev->pDevice,
|
||||
nv_fence_context->pSemSurface,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
|
||||
(void *) nv_fence_context->pLinearAddress);
|
||||
|
||||
nvKms->freeMemory(nv_dev->pDevice, nv_fence_context->pSemSurface);
|
||||
|
||||
nv_drm_free(nv_fence_context);
|
||||
}
|
||||
|
||||
static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
|
||||
struct nv_drm_fence_context *nv_fence_context,
|
||||
static nv_dma_fence_t *__nv_drm_prime_fence_context_create_fence(
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context,
|
||||
unsigned int seqno)
|
||||
{
|
||||
struct nv_drm_prime_fence *nv_fence;
|
||||
@@ -329,14 +353,14 @@ static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock(&nv_fence_context->lock);
|
||||
spin_lock(&nv_prime_fence_context->lock);
|
||||
|
||||
/*
|
||||
* If seqno wrapped, force signal fences to make sure none of them
|
||||
* get stuck.
|
||||
*/
|
||||
if (seqno < nv_fence_context->last_seqno) {
|
||||
nv_drm_gem_prime_force_fence_signal(nv_fence_context);
|
||||
if (seqno < nv_prime_fence_context->last_seqno) {
|
||||
nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&nv_fence->list_entry);
|
||||
@@ -344,14 +368,17 @@ static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
|
||||
spin_lock_init(&nv_fence->lock);
|
||||
|
||||
nv_dma_fence_init(&nv_fence->base, &nv_drm_gem_prime_fence_ops,
|
||||
&nv_fence->lock, nv_fence_context->context,
|
||||
&nv_fence->lock, nv_prime_fence_context->base.context,
|
||||
seqno);
|
||||
|
||||
list_add_tail(&nv_fence->list_entry, &nv_fence_context->pending);
|
||||
/* The context maintains a reference to any pending fences. */
|
||||
nv_dma_fence_get(&nv_fence->base);
|
||||
|
||||
nv_fence_context->last_seqno = seqno;
|
||||
list_add_tail(&nv_fence->list_entry, &nv_prime_fence_context->pending);
|
||||
|
||||
spin_unlock(&nv_fence_context->lock);
|
||||
nv_prime_fence_context->last_seqno = seqno;
|
||||
|
||||
spin_unlock(&nv_prime_fence_context->lock);
|
||||
|
||||
out:
|
||||
return ret != 0 ? ERR_PTR(ret) : &nv_fence->base;
|
||||
@@ -385,12 +412,15 @@ static inline struct nv_drm_gem_fence_context *to_gem_fence_context(
|
||||
* because tear down sequence calls to flush all existing
|
||||
* worker thread.
|
||||
*/
|
||||
static void __nv_drm_gem_fence_context_free(struct nv_drm_gem_object *nv_gem)
|
||||
static void
|
||||
__nv_drm_gem_fence_context_free(struct nv_drm_gem_object *nv_gem)
|
||||
{
|
||||
struct nv_drm_gem_fence_context *nv_gem_fence_context =
|
||||
to_gem_fence_context(nv_gem);
|
||||
struct nv_drm_fence_context *nv_fence_context =
|
||||
nv_gem_fence_context->nv_fence_context;
|
||||
|
||||
__nv_drm_fence_context_destroy(nv_gem_fence_context->nv_fence_context);
|
||||
nv_fence_context->ops->destroy(nv_fence_context);
|
||||
|
||||
nv_drm_free(nv_gem_fence_context);
|
||||
}
|
||||
@@ -400,7 +430,8 @@ const struct nv_drm_gem_object_funcs nv_gem_fence_context_ops = {
|
||||
};
|
||||
|
||||
static inline
|
||||
struct nv_drm_gem_fence_context *__nv_drm_gem_object_fence_context_lookup(
|
||||
struct nv_drm_gem_fence_context *
|
||||
__nv_drm_gem_object_fence_context_lookup(
|
||||
struct drm_device *dev,
|
||||
struct drm_file *filp,
|
||||
u32 handle)
|
||||
@@ -416,11 +447,13 @@ struct nv_drm_gem_fence_context *__nv_drm_gem_object_fence_context_lookup(
|
||||
return to_gem_fence_context(nv_gem);
|
||||
}
|
||||
|
||||
int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
static int
|
||||
__nv_drm_gem_fence_context_create(struct drm_device *dev,
|
||||
struct nv_drm_fence_context *nv_fence_context,
|
||||
u32 *handle,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct drm_nvidia_fence_context_create_params *p = data;
|
||||
struct nv_drm_gem_fence_context *nv_gem_fence_context = NULL;
|
||||
|
||||
if ((nv_gem_fence_context = nv_drm_calloc(
|
||||
@@ -429,10 +462,7 @@ int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
goto done;
|
||||
}
|
||||
|
||||
if ((nv_gem_fence_context->nv_fence_context =
|
||||
__nv_drm_fence_context_new(nv_dev, p)) == NULL) {
|
||||
goto fence_context_new_failed;
|
||||
}
|
||||
nv_gem_fence_context->nv_fence_context = nv_fence_context;
|
||||
|
||||
nv_drm_gem_object_init(nv_dev,
|
||||
&nv_gem_fence_context->base,
|
||||
@@ -442,26 +472,51 @@ int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
|
||||
return nv_drm_gem_handle_create_drop_reference(filep,
|
||||
&nv_gem_fence_context->base,
|
||||
&p->handle);
|
||||
|
||||
fence_context_new_failed:
|
||||
nv_drm_free(nv_gem_fence_context);
|
||||
handle);
|
||||
|
||||
done:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct drm_nvidia_prime_fence_context_create_params *p = data;
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context =
|
||||
__nv_drm_prime_fence_context_new(nv_dev, p);
|
||||
int err;
|
||||
|
||||
if (!nv_prime_fence_context) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = __nv_drm_gem_fence_context_create(dev,
|
||||
&nv_prime_fence_context->base,
|
||||
&p->handle,
|
||||
filep);
|
||||
if (err) {
|
||||
__nv_drm_prime_fence_context_destroy(&nv_prime_fence_context->base);
|
||||
}
|
||||
|
||||
return err;
|
||||
|
||||
done:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct drm_nvidia_gem_fence_attach_params *p = data;
|
||||
struct drm_nvidia_gem_prime_fence_attach_params *p = data;
|
||||
|
||||
struct nv_drm_gem_object *nv_gem;
|
||||
struct nv_drm_gem_fence_context *nv_gem_fence_context;
|
||||
|
||||
nv_dma_fence_t *fence;
|
||||
nv_dma_resv_t *resv;
|
||||
|
||||
nv_gem = nv_drm_gem_object_lookup(nv_dev->dev, filep, p->handle);
|
||||
|
||||
@@ -487,9 +542,22 @@ int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
goto fence_context_lookup_failed;
|
||||
}
|
||||
|
||||
if (IS_ERR(fence = __nv_drm_fence_context_create_fence(
|
||||
nv_gem_fence_context->nv_fence_context,
|
||||
p->sem_thresh))) {
|
||||
if (nv_gem_fence_context->nv_fence_context->ops !=
|
||||
&nv_drm_prime_fence_context_ops) {
|
||||
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Wrong fence context type: 0x%08x",
|
||||
p->fence_context_handle);
|
||||
|
||||
goto fence_context_create_fence_failed;
|
||||
}
|
||||
|
||||
fence = __nv_drm_prime_fence_context_create_fence(
|
||||
to_prime_fence_context(nv_gem_fence_context->nv_fence_context),
|
||||
p->sem_thresh);
|
||||
|
||||
if (IS_ERR(fence)) {
|
||||
ret = PTR_ERR(fence);
|
||||
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
@@ -499,18 +567,23 @@ int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
goto fence_context_create_fence_failed;
|
||||
}
|
||||
|
||||
nv_dma_resv_lock(&nv_gem->resv, NULL);
|
||||
resv = nv_drm_gem_res_obj(nv_gem);
|
||||
|
||||
ret = nv_dma_resv_reserve_fences(&nv_gem->resv, 1, false);
|
||||
nv_dma_resv_lock(resv, NULL);
|
||||
|
||||
ret = nv_dma_resv_reserve_fences(resv, 1, false);
|
||||
if (ret == 0) {
|
||||
nv_dma_resv_add_excl_fence(&nv_gem->resv, fence);
|
||||
nv_dma_resv_add_excl_fence(resv, fence);
|
||||
} else {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Failed to reserve fence. Error code: %d", ret);
|
||||
}
|
||||
|
||||
nv_dma_resv_unlock(&nv_gem->resv);
|
||||
nv_dma_resv_unlock(resv);
|
||||
|
||||
/* dma_resv_add_excl_fence takes its own reference to the fence. */
|
||||
nv_dma_fence_put(fence);
|
||||
|
||||
fence_context_create_fence_failed:
|
||||
nv_drm_gem_object_unreference_unlocked(&nv_gem_fence_context->base);
|
||||
@@ -35,11 +35,11 @@ struct drm_device;
|
||||
int nv_drm_fence_supported_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
|
||||
int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
|
||||
int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
|
||||
#endif /* NV_DRM_FENCE_AVAILABLE */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -40,9 +40,16 @@ static const u32 nvkms_to_drm_format[] = {
|
||||
[NvKmsSurfaceMemoryFormatR5G6B5] = DRM_FORMAT_RGB565,
|
||||
[NvKmsSurfaceMemoryFormatA8R8G8B8] = DRM_FORMAT_ARGB8888,
|
||||
[NvKmsSurfaceMemoryFormatX8R8G8B8] = DRM_FORMAT_XRGB8888,
|
||||
[NvKmsSurfaceMemoryFormatX8B8G8R8] = DRM_FORMAT_XBGR8888,
|
||||
[NvKmsSurfaceMemoryFormatA2B10G10R10] = DRM_FORMAT_ABGR2101010,
|
||||
[NvKmsSurfaceMemoryFormatX2B10G10R10] = DRM_FORMAT_XBGR2101010,
|
||||
[NvKmsSurfaceMemoryFormatA8B8G8R8] = DRM_FORMAT_ABGR8888,
|
||||
#if defined(DRM_FORMAT_ABGR16161616F)
|
||||
[NvKmsSurfaceMemoryFormatRF16GF16BF16AF16] = DRM_FORMAT_ABGR16161616F,
|
||||
#endif
|
||||
#if defined(DRM_FORMAT_XBGR16161616F)
|
||||
[NvKmsSurfaceMemoryFormatRF16GF16BF16XF16] = DRM_FORMAT_XBGR16161616F,
|
||||
#endif
|
||||
|
||||
[NvKmsSurfaceMemoryFormatY8_U8__Y8_V8_N422] = DRM_FORMAT_YUYV,
|
||||
[NvKmsSurfaceMemoryFormatU8_Y8__V8_Y8_N422] = DRM_FORMAT_UYVY,
|
||||
|
||||
@@ -95,7 +95,7 @@ static vm_fault_t __nv_drm_gem_nvkms_handle_vma_fault(
|
||||
pfn >>= PAGE_SHIFT;
|
||||
pfn += page_offset;
|
||||
} else {
|
||||
BUG_ON(page_offset > nv_nvkms_memory->pages_count);
|
||||
BUG_ON(page_offset >= nv_nvkms_memory->pages_count);
|
||||
pfn = page_to_pfn(nv_nvkms_memory->pages[page_offset]);
|
||||
}
|
||||
|
||||
@@ -131,11 +131,11 @@ static struct drm_gem_object *__nv_drm_gem_nvkms_prime_dup(
|
||||
const struct nv_drm_gem_object *nv_gem_src);
|
||||
|
||||
static int __nv_drm_gem_nvkms_map(
|
||||
struct nv_drm_device *nv_dev,
|
||||
struct NvKmsKapiMemory *pMemory,
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory,
|
||||
uint64_t size)
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = nv_nvkms_memory->base.nv_dev;
|
||||
struct NvKmsKapiMemory *pMemory = nv_nvkms_memory->base.pMemory;
|
||||
|
||||
if (!nv_dev->hasVideoMemory) {
|
||||
return 0;
|
||||
}
|
||||
@@ -153,7 +153,7 @@ static int __nv_drm_gem_nvkms_map(
|
||||
|
||||
nv_nvkms_memory->pWriteCombinedIORemapAddress = ioremap_wc(
|
||||
(uintptr_t)nv_nvkms_memory->pPhysicalAddress,
|
||||
size);
|
||||
nv_nvkms_memory->base.base.size);
|
||||
|
||||
if (!nv_nvkms_memory->pWriteCombinedIORemapAddress) {
|
||||
NV_DRM_DEV_LOG_INFO(
|
||||
@@ -167,6 +167,22 @@ static int __nv_drm_gem_nvkms_map(
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *__nv_drm_gem_nvkms_prime_vmap(
|
||||
struct nv_drm_gem_object *nv_gem)
|
||||
{
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
|
||||
to_nv_nvkms_memory(nv_gem);
|
||||
|
||||
if (!nv_nvkms_memory->physically_mapped) {
|
||||
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
|
||||
if (ret) {
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
}
|
||||
|
||||
return nv_nvkms_memory->pWriteCombinedIORemapAddress;
|
||||
}
|
||||
|
||||
static int __nv_drm_gem_map_nvkms_memory_offset(
|
||||
struct nv_drm_device *nv_dev,
|
||||
struct nv_drm_gem_object *nv_gem,
|
||||
@@ -176,10 +192,7 @@ static int __nv_drm_gem_map_nvkms_memory_offset(
|
||||
to_nv_nvkms_memory(nv_gem);
|
||||
|
||||
if (!nv_nvkms_memory->physically_mapped) {
|
||||
int ret = __nv_drm_gem_nvkms_map(nv_dev,
|
||||
nv_nvkms_memory->base.pMemory,
|
||||
nv_nvkms_memory,
|
||||
nv_nvkms_memory->base.base.size);
|
||||
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -201,7 +214,7 @@ static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
|
||||
nv_dev,
|
||||
"Cannot create sg_table for NvKmsKapiMemory 0x%p",
|
||||
nv_gem->pMemory);
|
||||
return NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
sg_table = nv_drm_prime_pages_to_sg(nv_dev->dev,
|
||||
@@ -214,6 +227,7 @@ static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
|
||||
const struct nv_drm_gem_object_funcs nv_gem_nvkms_memory_ops = {
|
||||
.free = __nv_drm_gem_nvkms_memory_free,
|
||||
.prime_dup = __nv_drm_gem_nvkms_prime_dup,
|
||||
.prime_vmap = __nv_drm_gem_nvkms_prime_vmap,
|
||||
.mmap = __nv_drm_gem_nvkms_mmap,
|
||||
.handle_vma_fault = __nv_drm_gem_nvkms_handle_vma_fault,
|
||||
.create_mmap_offset = __nv_drm_gem_map_nvkms_memory_offset,
|
||||
@@ -314,7 +328,7 @@ int nv_drm_dumb_create(
|
||||
* to use dumb buffers for software rendering, so they're not much use
|
||||
* without a CPU mapping.
|
||||
*/
|
||||
ret = __nv_drm_gem_nvkms_map(nv_dev, pMemory, nv_nvkms_memory, args->size);
|
||||
ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
|
||||
if (ret) {
|
||||
nv_drm_gem_object_unreference_unlocked(&nv_nvkms_memory->base);
|
||||
goto fail;
|
||||
@@ -583,11 +597,13 @@ int nv_drm_dumb_map_offset(struct drm_file *file,
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_DRIVER_HAS_DUMB_DESTROY)
|
||||
int nv_drm_dumb_destroy(struct drm_file *file,
|
||||
struct drm_device *dev,
|
||||
uint32_t handle)
|
||||
{
|
||||
return drm_gem_handle_delete(file, handle);
|
||||
}
|
||||
#endif /* NV_DRM_DRIVER_HAS_DUMB_DESTROY */
|
||||
|
||||
#endif
|
||||
|
||||
@@ -97,9 +97,11 @@ int nv_drm_dumb_map_offset(struct drm_file *file,
|
||||
struct drm_device *dev, uint32_t handle,
|
||||
uint64_t *offset);
|
||||
|
||||
#if defined(NV_DRM_DRIVER_HAS_DUMB_DESTROY)
|
||||
int nv_drm_dumb_destroy(struct drm_file *file,
|
||||
struct drm_device *dev,
|
||||
uint32_t handle);
|
||||
#endif /* NV_DRM_DRIVER_HAS_DUMB_DESTROY */
|
||||
|
||||
struct drm_gem_object *nv_drm_gem_nvkms_prime_import(
|
||||
struct drm_device *dev,
|
||||
|
||||
@@ -92,9 +92,9 @@ static int __nv_drm_gem_user_memory_mmap(struct nv_drm_gem_object *nv_gem,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
vma->vm_flags &= ~VM_PFNMAP;
|
||||
vma->vm_flags &= ~VM_IO;
|
||||
vma->vm_flags |= VM_MIXEDMAP;
|
||||
nv_vm_flags_clear(vma, VM_PFNMAP);
|
||||
nv_vm_flags_clear(vma, VM_IO);
|
||||
nv_vm_flags_set(vma, VM_MIXEDMAP);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -112,8 +112,7 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
|
||||
|
||||
page_offset = vmf->pgoff - drm_vma_node_start(&gem->vma_node);
|
||||
|
||||
BUG_ON(page_offset > nv_user_memory->pages_count);
|
||||
|
||||
BUG_ON(page_offset >= nv_user_memory->pages_count);
|
||||
ret = vm_insert_page(vma, address, nv_user_memory->pages[page_offset]);
|
||||
switch (ret) {
|
||||
case 0:
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nvidia-drm-prime-fence.h"
|
||||
#include "nvidia-drm-fence.h"
|
||||
#include "nvidia-drm-gem.h"
|
||||
#include "nvidia-drm-gem-nvkms-memory.h"
|
||||
#include "nvidia-drm-gem-user-memory.h"
|
||||
@@ -81,10 +81,13 @@ typedef struct dma_buf_map nv_sysio_map_t;
|
||||
static int nv_drm_gem_vmap(struct drm_gem_object *gem,
|
||||
nv_sysio_map_t *map)
|
||||
{
|
||||
map->vaddr = nv_drm_gem_prime_vmap(gem);
|
||||
if (map->vaddr == NULL) {
|
||||
void *vaddr = nv_drm_gem_prime_vmap(gem);
|
||||
if (vaddr == NULL) {
|
||||
return -ENOMEM;
|
||||
} else if (IS_ERR(vaddr)) {
|
||||
return PTR_ERR(vaddr);
|
||||
}
|
||||
map->vaddr = vaddr;
|
||||
map->is_iomem = true;
|
||||
return 0;
|
||||
}
|
||||
@@ -132,13 +135,8 @@ void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
|
||||
|
||||
/* Initialize the gem object */
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE) && !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
|
||||
nv_dma_resv_init(&nv_gem->resv);
|
||||
|
||||
#if defined(NV_DRM_GEM_OBJECT_HAS_RESV)
|
||||
nv_gem->base.resv = &nv_gem->resv;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(NV_DRM_DRIVER_HAS_GEM_FREE_OBJECT)
|
||||
@@ -212,8 +210,7 @@ void nv_drm_gem_prime_vunmap(struct drm_gem_object *gem, void *address)
|
||||
nv_dma_resv_t* nv_drm_gem_prime_res_obj(struct drm_gem_object *obj)
|
||||
{
|
||||
struct nv_drm_gem_object *nv_gem = to_nv_gem_object(obj);
|
||||
|
||||
return &nv_gem->resv;
|
||||
return nv_drm_gem_res_obj(nv_gem);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -299,7 +296,7 @@ int nv_drm_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
vma->vm_flags &= ~VM_MAYWRITE;
|
||||
nv_vm_flags_clear(vma, VM_MAYWRITE);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -45,6 +45,8 @@
|
||||
#include "nvidia-dma-resv-helper.h"
|
||||
#endif
|
||||
|
||||
#include "linux/dma-buf.h"
|
||||
|
||||
struct nv_drm_gem_object;
|
||||
|
||||
struct nv_drm_gem_object_funcs {
|
||||
@@ -71,7 +73,7 @@ struct nv_drm_gem_object {
|
||||
|
||||
struct NvKmsKapiMemory *pMemory;
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE) && !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
|
||||
nv_dma_resv_t resv;
|
||||
#endif
|
||||
};
|
||||
@@ -177,6 +179,17 @@ static inline int nv_drm_gem_handle_create(struct drm_file *filp,
|
||||
return drm_gem_handle_create(filp, &nv_gem->base, handle);
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
static inline nv_dma_resv_t *nv_drm_gem_res_obj(struct nv_drm_gem_object *nv_gem)
|
||||
{
|
||||
#if defined(NV_DRM_GEM_OBJECT_HAS_RESV)
|
||||
return nv_gem->base.resv;
|
||||
#else
|
||||
return nv_gem->base.dma_buf ? nv_gem->base.dma_buf->resv : &nv_gem->resv;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
|
||||
struct nv_drm_gem_object *nv_gem,
|
||||
const struct nv_drm_gem_object_funcs * const ops,
|
||||
|
||||
@@ -28,6 +28,8 @@
|
||||
*/
|
||||
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-crtc.h"
|
||||
|
||||
#include "nvmisc.h"
|
||||
|
||||
@@ -148,6 +150,18 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
|
||||
goto free;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
nv_drm_for_each_plane(plane, dev) {
|
||||
plane_state = drm_atomic_get_plane_state(state, plane);
|
||||
if (IS_ERR(plane_state)) {
|
||||
ret = PTR_ERR(plane_state);
|
||||
goto free;
|
||||
}
|
||||
|
||||
plane_state->rotation = DRM_MODE_ROTATE_0;
|
||||
}
|
||||
#endif
|
||||
|
||||
nv_drm_for_each_connector_in_state(state, conn, conn_state, i) {
|
||||
ret = drm_atomic_set_crtc_for_connector(conn_state, NULL);
|
||||
if (ret < 0)
|
||||
|
||||
@@ -35,6 +35,35 @@
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE) || defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_ROTATE_* , DRM_REFLECT_* */
|
||||
#include <drm/drm_blend.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* */
|
||||
#include <uapi/drm/drm_mode.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/*
|
||||
* 19-05-2017 c2c446ad29437bb92b157423c632286608ebd3ec has added
|
||||
* DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* to UAPI and removed
|
||||
* DRM_ROTATE_* and DRM_REFLECT_*
|
||||
*/
|
||||
#if !defined(DRM_MODE_ROTATE_0)
|
||||
#define DRM_MODE_ROTATE_0 DRM_ROTATE_0
|
||||
#define DRM_MODE_ROTATE_90 DRM_ROTATE_90
|
||||
#define DRM_MODE_ROTATE_180 DRM_ROTATE_180
|
||||
#define DRM_MODE_ROTATE_270 DRM_ROTATE_270
|
||||
#define DRM_MODE_REFLECT_X DRM_REFLECT_X
|
||||
#define DRM_MODE_REFLECT_Y DRM_REFLECT_Y
|
||||
#define DRM_MODE_ROTATE_MASK DRM_ROTATE_MASK
|
||||
#define DRM_MODE_REFLECT_MASK DRM_REFLECT_MASK
|
||||
#endif
|
||||
|
||||
#endif //NV_DRM_ROTATION_AVAILABLE
|
||||
|
||||
/*
|
||||
* drm_dev_put() is added by commit 9a96f55034e41b4e002b767e9218d55f03bdff7d
|
||||
* (2017-09-26) and drm_dev_unref() is removed by
|
||||
@@ -277,11 +306,33 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
|
||||
for_each_plane_in_state(__state, plane, plane_state, __i)
|
||||
#endif
|
||||
|
||||
static inline struct drm_crtc *nv_drm_crtc_find(struct drm_device *dev,
|
||||
uint32_t id)
|
||||
static inline struct drm_connector *
|
||||
nv_drm_connector_lookup(struct drm_device *dev, struct drm_file *filep,
|
||||
uint32_t id)
|
||||
{
|
||||
#if !defined(NV_DRM_CONNECTOR_LOOKUP_PRESENT)
|
||||
return drm_connector_find(dev, id);
|
||||
#elif defined(NV_DRM_MODE_OBJECT_FIND_HAS_FILE_PRIV_ARG)
|
||||
return drm_connector_lookup(dev, filep, id);
|
||||
#else
|
||||
return drm_connector_lookup(dev, id);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_drm_connector_put(struct drm_connector *connector)
|
||||
{
|
||||
#if defined(NV_DRM_CONNECTOR_PUT_PRESENT)
|
||||
drm_connector_put(connector);
|
||||
#elif defined(NV_DRM_CONNECTOR_LOOKUP_PRESENT)
|
||||
drm_connector_unreference(connector);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct drm_crtc *
|
||||
nv_drm_crtc_find(struct drm_device *dev, struct drm_file *filep, uint32_t id)
|
||||
{
|
||||
#if defined(NV_DRM_MODE_OBJECT_FIND_HAS_FILE_PRIV_ARG)
|
||||
return drm_crtc_find(dev, NULL /* file_priv */, id);
|
||||
return drm_crtc_find(dev, filep, id);
|
||||
#else
|
||||
return drm_crtc_find(dev, id);
|
||||
#endif
|
||||
@@ -297,6 +348,30 @@ static inline struct drm_encoder *nv_drm_encoder_find(struct drm_device *dev,
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_DRM_AUTH_H_PRESENT)
|
||||
#include <drm/drm_auth.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_FILE_H_PRESENT)
|
||||
#include <drm/drm_file.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* drm_file_get_master() added by commit 56f0729a510f ("drm: protect drm_master
|
||||
* pointers in drm_lease.c") in v5.15 (2021-07-20)
|
||||
*/
|
||||
static inline struct drm_master *nv_drm_file_get_master(struct drm_file *filep)
|
||||
{
|
||||
#if defined(NV_DRM_FILE_GET_MASTER_PRESENT)
|
||||
return drm_file_get_master(filep);
|
||||
#else
|
||||
if (filep->master) {
|
||||
return drm_master_get(filep->master);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* drm_connector_for_each_possible_encoder() is added by commit
|
||||
* 83aefbb887b59df0b3520965c3701e01deacfc52 which was Signed-off-by:
|
||||
|
||||
@@ -34,8 +34,8 @@
|
||||
#define DRM_NVIDIA_GEM_IMPORT_USERSPACE_MEMORY 0x02
|
||||
#define DRM_NVIDIA_GET_DEV_INFO 0x03
|
||||
#define DRM_NVIDIA_FENCE_SUPPORTED 0x04
|
||||
#define DRM_NVIDIA_FENCE_CONTEXT_CREATE 0x05
|
||||
#define DRM_NVIDIA_GEM_FENCE_ATTACH 0x06
|
||||
#define DRM_NVIDIA_PRIME_FENCE_CONTEXT_CREATE 0x05
|
||||
#define DRM_NVIDIA_GEM_PRIME_FENCE_ATTACH 0x06
|
||||
#define DRM_NVIDIA_GET_CLIENT_CAPABILITY 0x08
|
||||
#define DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY 0x09
|
||||
#define DRM_NVIDIA_GEM_MAP_OFFSET 0x0a
|
||||
@@ -43,6 +43,11 @@
|
||||
#define DRM_NVIDIA_GET_CRTC_CRC32_V2 0x0c
|
||||
#define DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY 0x0d
|
||||
#define DRM_NVIDIA_GEM_IDENTIFY_OBJECT 0x0e
|
||||
#define DRM_NVIDIA_DMABUF_SUPPORTED 0x0f
|
||||
#define DRM_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID 0x10
|
||||
#define DRM_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID 0x11
|
||||
#define DRM_NVIDIA_GRANT_PERMISSIONS 0x12
|
||||
#define DRM_NVIDIA_REVOKE_PERMISSIONS 0x13
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_IMPORT_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IMPORT_NVKMS_MEMORY), \
|
||||
@@ -65,50 +70,69 @@
|
||||
#if defined(NV_LINUX)
|
||||
#define DRM_IOCTL_NVIDIA_FENCE_SUPPORTED \
|
||||
DRM_IO(DRM_COMMAND_BASE + DRM_NVIDIA_FENCE_SUPPORTED)
|
||||
#define DRM_IOCTL_NVIDIA_DMABUF_SUPPORTED \
|
||||
DRM_IO(DRM_COMMAND_BASE + DRM_NVIDIA_DMABUF_SUPPORTED)
|
||||
#else
|
||||
#define DRM_IOCTL_NVIDIA_FENCE_SUPPORTED 0
|
||||
#define DRM_IOCTL_NVIDIA_DMABUF_SUPPORTED 0
|
||||
#endif
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_FENCE_CONTEXT_CREATE \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_FENCE_CONTEXT_CREATE), \
|
||||
struct drm_nvidia_fence_context_create_params)
|
||||
#define DRM_IOCTL_NVIDIA_PRIME_FENCE_CONTEXT_CREATE \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_PRIME_FENCE_CONTEXT_CREATE),\
|
||||
struct drm_nvidia_prime_fence_context_create_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_FENCE_ATTACH \
|
||||
DRM_IOW((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_FENCE_ATTACH), \
|
||||
struct drm_nvidia_gem_fence_attach_params)
|
||||
#define DRM_IOCTL_NVIDIA_GEM_PRIME_FENCE_ATTACH \
|
||||
DRM_IOW((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_PRIME_FENCE_ATTACH), \
|
||||
struct drm_nvidia_gem_prime_fence_attach_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_CLIENT_CAPABILITY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CLIENT_CAPABILITY), \
|
||||
#define DRM_IOCTL_NVIDIA_GET_CLIENT_CAPABILITY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CLIENT_CAPABILITY), \
|
||||
struct drm_nvidia_get_client_capability_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32 \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32), \
|
||||
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32 \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32), \
|
||||
struct drm_nvidia_get_crtc_crc32_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32_V2 \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32_V2), \
|
||||
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32_V2 \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32_V2), \
|
||||
struct drm_nvidia_get_crtc_crc32_v2_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY), \
|
||||
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY), \
|
||||
struct drm_nvidia_gem_export_nvkms_memory_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_MAP_OFFSET \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_MAP_OFFSET), \
|
||||
#define DRM_IOCTL_NVIDIA_GEM_MAP_OFFSET \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_MAP_OFFSET), \
|
||||
struct drm_nvidia_gem_map_offset_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_ALLOC_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_ALLOC_NVKMS_MEMORY), \
|
||||
#define DRM_IOCTL_NVIDIA_GEM_ALLOC_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_ALLOC_NVKMS_MEMORY), \
|
||||
struct drm_nvidia_gem_alloc_nvkms_memory_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_DMABUF_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY), \
|
||||
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_DMABUF_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY), \
|
||||
struct drm_nvidia_gem_export_dmabuf_memory_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_IDENTIFY_OBJECT \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IDENTIFY_OBJECT), \
|
||||
#define DRM_IOCTL_NVIDIA_GEM_IDENTIFY_OBJECT \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IDENTIFY_OBJECT), \
|
||||
struct drm_nvidia_gem_identify_object_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID),\
|
||||
struct drm_nvidia_get_dpy_id_for_connector_id_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID),\
|
||||
struct drm_nvidia_get_connector_id_for_dpy_id_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GRANT_PERMISSIONS), \
|
||||
struct drm_nvidia_grant_permissions_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_REVOKE_PERMISSIONS \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_REVOKE_PERMISSIONS), \
|
||||
struct drm_nvidia_revoke_permissions_params)
|
||||
|
||||
struct drm_nvidia_gem_import_nvkms_memory_params {
|
||||
uint64_t mem_size; /* IN */
|
||||
|
||||
@@ -136,7 +160,7 @@ struct drm_nvidia_get_dev_info_params {
|
||||
uint32_t sector_layout; /* OUT */
|
||||
};
|
||||
|
||||
struct drm_nvidia_fence_context_create_params {
|
||||
struct drm_nvidia_prime_fence_context_create_params {
|
||||
uint32_t handle; /* OUT GEM handle to fence context */
|
||||
|
||||
uint32_t index; /* IN Index of semaphore to use for fencing */
|
||||
@@ -151,7 +175,7 @@ struct drm_nvidia_fence_context_create_params {
|
||||
uint64_t event_nvkms_params_size; /* IN */
|
||||
};
|
||||
|
||||
struct drm_nvidia_gem_fence_attach_params {
|
||||
struct drm_nvidia_gem_prime_fence_attach_params {
|
||||
uint32_t handle; /* IN GEM handle to attach fence to */
|
||||
uint32_t fence_context_handle; /* IN GEM handle to fence context on which fence is run on */
|
||||
uint32_t sem_thresh; /* IN Semaphore value to reach before signal */
|
||||
@@ -232,4 +256,23 @@ struct drm_nvidia_gem_identify_object_params {
|
||||
drm_nvidia_gem_object_type object_type; /* OUT GEM object type */
|
||||
};
|
||||
|
||||
struct drm_nvidia_get_dpy_id_for_connector_id_params {
|
||||
uint32_t connectorId; /* IN */
|
||||
uint32_t dpyId; /* OUT */
|
||||
};
|
||||
|
||||
struct drm_nvidia_get_connector_id_for_dpy_id_params {
|
||||
uint32_t dpyId; /* IN */
|
||||
uint32_t connectorId; /* OUT */
|
||||
};
|
||||
|
||||
struct drm_nvidia_grant_permissions_params {
|
||||
int32_t fd; /* IN */
|
||||
uint32_t dpyId; /* IN */
|
||||
};
|
||||
|
||||
struct drm_nvidia_revoke_permissions_params {
|
||||
uint32_t dpyId; /* IN */
|
||||
};
|
||||
|
||||
#endif /* _UAPI_NVIDIA_DRM_IOCTL_H_ */
|
||||
|
||||
@@ -47,6 +47,14 @@ module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);
|
||||
|
||||
void *nv_drm_calloc(size_t nmemb, size_t size)
|
||||
{
|
||||
size_t total_size = nmemb * size;
|
||||
//
|
||||
// Check for overflow.
|
||||
//
|
||||
if ((nmemb != 0) && ((total_size / nmemb) != size))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
return kzalloc(nmemb * size, GFP_KERNEL);
|
||||
}
|
||||
|
||||
@@ -93,8 +101,6 @@ int nv_drm_lock_user_pages(unsigned long address,
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct page **user_pages;
|
||||
const int write = 1;
|
||||
const int force = 0;
|
||||
int pages_pinned;
|
||||
|
||||
user_pages = nv_drm_calloc(pages_count, sizeof(*user_pages));
|
||||
@@ -105,7 +111,7 @@ int nv_drm_lock_user_pages(unsigned long address,
|
||||
|
||||
nv_mmap_read_lock(mm);
|
||||
|
||||
pages_pinned = NV_GET_USER_PAGES(address, pages_count, write, force,
|
||||
pages_pinned = NV_PIN_USER_PAGES(address, pages_count, FOLL_WRITE,
|
||||
user_pages, NULL);
|
||||
nv_mmap_read_unlock(mm);
|
||||
|
||||
@@ -123,7 +129,7 @@ failed:
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pages_pinned; i++) {
|
||||
put_page(user_pages[i]);
|
||||
NV_UNPIN_USER_PAGE(user_pages[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -138,8 +144,7 @@ void nv_drm_unlock_user_pages(unsigned long pages_count, struct page **pages)
|
||||
|
||||
for (i = 0; i < pages_count; i++) {
|
||||
set_page_dirty_lock(pages[i]);
|
||||
|
||||
put_page(pages[i]);
|
||||
NV_UNPIN_USER_PAGE(pages[i]);
|
||||
}
|
||||
|
||||
nv_drm_free(pages);
|
||||
@@ -174,12 +179,7 @@ static void __exit nv_linux_drm_exit(void)
|
||||
module_init(nv_linux_drm_init);
|
||||
module_exit(nv_linux_drm_exit);
|
||||
|
||||
#if defined(MODULE_LICENSE)
|
||||
MODULE_LICENSE("Dual MIT/GPL");
|
||||
#endif
|
||||
#if defined(MODULE_INFO)
|
||||
MODULE_INFO(supported, "external");
|
||||
#endif
|
||||
#if defined(MODULE_VERSION)
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
#endif
|
||||
|
||||
MODULE_INFO(supported, "external");
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
|
||||
@@ -93,9 +93,6 @@ static bool __will_generate_flip_event(struct drm_crtc *crtc,
|
||||
to_nv_crtc_state(new_crtc_state);
|
||||
struct drm_plane_state *old_plane_state = NULL;
|
||||
struct drm_plane *plane = NULL;
|
||||
struct drm_plane *primary_plane = crtc->primary;
|
||||
bool primary_event = false;
|
||||
bool overlay_event = false;
|
||||
int i;
|
||||
|
||||
if (!old_crtc_state->active && !new_crtc_state->active) {
|
||||
@@ -134,16 +131,19 @@ static int __nv_drm_put_back_post_fence_fd(
|
||||
const struct NvKmsKapiLayerReplyConfig *layer_reply_config)
|
||||
{
|
||||
int fd = layer_reply_config->postSyncptFd;
|
||||
int ret = 0;
|
||||
|
||||
if ((fd >= 0) && (plane_state->fd_user_ptr != NULL)) {
|
||||
if (put_user(fd, plane_state->fd_user_ptr)) {
|
||||
return -EFAULT;
|
||||
ret = copy_to_user(plane_state->fd_user_ptr, &fd, sizeof(fd));
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*! set back to Null and let set_property specify it again */
|
||||
plane_state->fd_user_ptr = NULL;
|
||||
}
|
||||
return 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __nv_drm_get_syncpt_data(
|
||||
@@ -274,6 +274,9 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
|
||||
|
||||
nv_new_crtc_state->nv_flip = NULL;
|
||||
}
|
||||
#if defined(NV_DRM_CRTC_STATE_HAS_VRR_ENABLED)
|
||||
requested_config->headRequestedConfig[nv_crtc->head].modeSetConfig.vrrEnabled = new_crtc_state->vrr_enabled;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -122,6 +122,11 @@ struct nv_drm_device {
|
||||
NvBool supportsSyncpts;
|
||||
|
||||
struct drm_property *nv_out_fence_property;
|
||||
struct drm_property *nv_input_colorspace_property;
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct drm_property *nv_hdr_output_metadata_property;
|
||||
#endif
|
||||
|
||||
struct nv_drm_device *next;
|
||||
};
|
||||
|
||||
@@ -16,7 +16,7 @@ NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-connector.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fb.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-modeset.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-prime-fence.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fence.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-linux.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-helper.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nv-pci-table.c
|
||||
@@ -54,16 +54,21 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_atomic_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_inc
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_dec_and_test
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_alpha_blending_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_fd_to_handle
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_handle_to_fd
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages_remote
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages_remote
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_lookup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_state_ref_counting
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_driver_has_gem_prime_res_obj
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_connector_dpms
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_funcs_have_mode_in_name
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_has_vrr_capable_property
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_framebuffer_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
|
||||
@@ -100,6 +105,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_has_resv
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_async_flip
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_pageflip_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_vrr_enabled
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_modifiers_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_node_is_allowed_has_tag_arg
|
||||
@@ -115,6 +121,15 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_plane_atomic_check_has_atomic_state_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_device_has_pdev
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_no_vblank
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_config_has_allow_fb_modifiers
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_has_hdr_output_metadata
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_add_fence
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_reserve_fences
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fences_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_has_leases
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_lookup
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
|
||||
|
||||
@@ -169,7 +169,6 @@ void nv_kthread_q_stop(nv_kthread_q_t *q)
|
||||
//
|
||||
// This function is never invoked when there is no NUMA preference (preferred
|
||||
// node is NUMA_NO_NODE).
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
nv_kthread_q_t *q,
|
||||
int preferred_node,
|
||||
@@ -217,7 +216,6 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
return thread[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
|
||||
{
|
||||
@@ -231,11 +229,7 @@ int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferr
|
||||
q->q_kthread = kthread_create(_main_loop, q, q_name);
|
||||
}
|
||||
else {
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
|
||||
#else
|
||||
return -ENOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (IS_ERR(q->q_kthread)) {
|
||||
@@ -307,7 +301,7 @@ static void _q_flush_function(void *args)
|
||||
static void _raw_q_flush(nv_kthread_q_t *q)
|
||||
{
|
||||
nv_kthread_q_item_t q_item;
|
||||
DECLARE_COMPLETION(completion);
|
||||
DECLARE_COMPLETION_ONSTACK(completion);
|
||||
|
||||
nv_kthread_q_item_init(&q_item, _q_flush_function, &completion);
|
||||
|
||||
|
||||
@@ -34,6 +34,9 @@
|
||||
#include <linux/file.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/freezer.h>
|
||||
|
||||
#include <acpi/video.h>
|
||||
|
||||
#include "nvstatus.h"
|
||||
|
||||
@@ -62,6 +65,9 @@
|
||||
static bool output_rounding_fix = true;
|
||||
module_param_named(output_rounding_fix, output_rounding_fix, bool, 0400);
|
||||
|
||||
static bool disable_vrr_memclk_switch = false;
|
||||
module_param_named(disable_vrr_memclk_switch, disable_vrr_memclk_switch, bool, 0400);
|
||||
|
||||
/* These parameters are used for fault injection tests. Normally the defaults
|
||||
* should be used. */
|
||||
MODULE_PARM_DESC(fail_malloc, "Fail the Nth call to nvkms_alloc");
|
||||
@@ -72,6 +78,15 @@ MODULE_PARM_DESC(malloc_verbose, "Report information about malloc calls on modul
|
||||
static bool malloc_verbose = false;
|
||||
module_param_named(malloc_verbose, malloc_verbose, bool, 0400);
|
||||
|
||||
/* This parameter is used to find the dpy override conf file */
|
||||
#define NVKMS_CONF_FILE_SPECIFIED (nvkms_conf != NULL)
|
||||
|
||||
MODULE_PARM_DESC(config_file,
|
||||
"Path to the nvidia-modeset configuration file "
|
||||
"(default: disabled)");
|
||||
static char *nvkms_conf = NULL;
|
||||
module_param_named(config_file, nvkms_conf, charp, 0400);
|
||||
|
||||
static atomic_t nvkms_alloc_called_count;
|
||||
|
||||
NvBool nvkms_output_rounding_fix(void)
|
||||
@@ -79,6 +94,11 @@ NvBool nvkms_output_rounding_fix(void)
|
||||
return output_rounding_fix;
|
||||
}
|
||||
|
||||
NvBool nvkms_disable_vrr_memclk_switch(void)
|
||||
{
|
||||
return disable_vrr_memclk_switch;
|
||||
}
|
||||
|
||||
#define NVKMS_SYNCPT_STUBS_NEEDED
|
||||
|
||||
/*************************************************************************
|
||||
@@ -180,7 +200,10 @@ static inline int nvkms_read_trylock_pm_lock(void)
|
||||
|
||||
static inline void nvkms_read_lock_pm_lock(void)
|
||||
{
|
||||
down_read(&nvkms_pm_lock);
|
||||
while (!down_read_trylock(&nvkms_pm_lock)) {
|
||||
try_to_freeze();
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void nvkms_read_unlock_pm_lock(void)
|
||||
@@ -956,6 +979,12 @@ nvkms_register_backlight(NvU32 gpu_id, NvU32 display_id, void *drv_priv,
|
||||
struct nvkms_backlight_device *nvkms_bd = NULL;
|
||||
int i;
|
||||
|
||||
#if defined(NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE)
|
||||
if (!acpi_video_backlight_use_native()) {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
gpu_info = nvkms_alloc(NV_MAX_GPUS * sizeof(*gpu_info), NV_TRUE);
|
||||
if (gpu_info == NULL) {
|
||||
return NULL;
|
||||
@@ -1078,7 +1107,7 @@ failed:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void nvkms_close_common(struct nvkms_per_open *popen)
|
||||
void nvkms_close_pm_locked(struct nvkms_per_open *popen)
|
||||
{
|
||||
/*
|
||||
* Don't use down_interruptible(): we need to free resources
|
||||
@@ -1116,13 +1145,13 @@ void nvkms_close_common(struct nvkms_per_open *popen)
|
||||
nvkms_free(popen, sizeof(*popen));
|
||||
}
|
||||
|
||||
static void nvkms_close_deferred(void *data)
|
||||
static void nvkms_close_pm_unlocked(void *data)
|
||||
{
|
||||
struct nvkms_per_open *popen = data;
|
||||
|
||||
nvkms_read_lock_pm_lock();
|
||||
|
||||
nvkms_close_common(popen);
|
||||
nvkms_close_pm_locked(popen);
|
||||
|
||||
nvkms_read_unlock_pm_lock();
|
||||
}
|
||||
@@ -1130,11 +1159,11 @@ static void nvkms_close_deferred(void *data)
|
||||
static void nvkms_close_popen(struct nvkms_per_open *popen)
|
||||
{
|
||||
if (nvkms_read_trylock_pm_lock() == 0) {
|
||||
nvkms_close_common(popen);
|
||||
nvkms_close_pm_locked(popen);
|
||||
nvkms_read_unlock_pm_lock();
|
||||
} else {
|
||||
nv_kthread_q_item_init(&popen->deferred_close_q_item,
|
||||
nvkms_close_deferred,
|
||||
nvkms_close_pm_unlocked,
|
||||
popen);
|
||||
nvkms_queue_work(&nvkms_deferred_close_kthread_q,
|
||||
&popen->deferred_close_q_item);
|
||||
@@ -1187,7 +1216,7 @@ struct nvkms_per_open* nvkms_open_from_kapi
|
||||
|
||||
void nvkms_close_from_kapi(struct nvkms_per_open *popen)
|
||||
{
|
||||
nvkms_close_popen(popen);
|
||||
nvkms_close_pm_unlocked(popen);
|
||||
}
|
||||
|
||||
NvBool nvkms_ioctl_from_kapi
|
||||
@@ -1346,30 +1375,119 @@ static void nvkms_proc_exit(void)
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
proc_remove(nvkms_proc_dir);
|
||||
#else
|
||||
/*
|
||||
* On kernel versions without proc_remove(), we need to explicitly
|
||||
* remove each proc file beneath nvkms_proc_dir.
|
||||
* nvkms_proc_init() only creates files directly under
|
||||
* nvkms_proc_dir, so those are the only files we need to remove
|
||||
* here: warn if there is any deeper directory nesting.
|
||||
*/
|
||||
{
|
||||
struct proc_dir_entry *entry = nvkms_proc_dir->subdir;
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
}
|
||||
|
||||
while (entry != NULL) {
|
||||
struct proc_dir_entry *next = entry->next;
|
||||
WARN_ON(entry->subdir != NULL);
|
||||
remove_proc_entry(entry->name, entry->parent);
|
||||
entry = next;
|
||||
}
|
||||
/*************************************************************************
|
||||
* NVKMS Config File Read
|
||||
************************************************************************/
|
||||
static NvBool nvkms_fs_mounted(void)
|
||||
{
|
||||
return current->fs != NULL;
|
||||
}
|
||||
|
||||
static size_t nvkms_config_file_open
|
||||
(
|
||||
char *fname,
|
||||
char ** const buff
|
||||
)
|
||||
{
|
||||
int i = 0;
|
||||
struct file *file;
|
||||
struct inode *file_inode;
|
||||
size_t file_size = 0;
|
||||
size_t read_size = 0;
|
||||
#if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
|
||||
loff_t pos = 0;
|
||||
#endif
|
||||
|
||||
if (!nvkms_fs_mounted()) {
|
||||
printk(KERN_ERR NVKMS_LOG_PREFIX "ERROR: Filesystems not mounted\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
remove_proc_entry(nvkms_proc_dir->name, nvkms_proc_dir->parent);
|
||||
#endif /* NV_PROC_REMOVE_PRESENT */
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
file = filp_open(fname, O_RDONLY, 0);
|
||||
if (file == NULL || IS_ERR(file)) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Failed to open %s\n",
|
||||
fname);
|
||||
return 0;
|
||||
}
|
||||
|
||||
file_inode = file->f_inode;
|
||||
if (file_inode == NULL || IS_ERR(file_inode)) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Inode is invalid\n");
|
||||
goto done;
|
||||
}
|
||||
file_size = file_inode->i_size;
|
||||
if (file_size > NVKMS_READ_FILE_MAX_SIZE) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: File exceeds maximum size\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
*buff = nvkms_alloc(file_size, NV_FALSE);
|
||||
if (*buff == NULL) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Out of memory\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: Once we have access to GPL symbols, this can be replaced with
|
||||
* kernel_read_file for kernels >= 4.6
|
||||
*/
|
||||
while ((read_size < file_size) && (i++ < NVKMS_READ_FILE_MAX_LOOPS)) {
|
||||
#if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
|
||||
ssize_t ret = kernel_read(file, *buff + read_size,
|
||||
file_size - read_size, &pos);
|
||||
#else
|
||||
ssize_t ret = kernel_read(file, read_size,
|
||||
*buff + read_size,
|
||||
file_size - read_size);
|
||||
#endif
|
||||
if (ret <= 0) {
|
||||
break;
|
||||
}
|
||||
read_size += ret;
|
||||
}
|
||||
|
||||
if (read_size != file_size) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Failed to read %s\n",
|
||||
fname);
|
||||
goto done;
|
||||
}
|
||||
|
||||
filp_close(file, current->files);
|
||||
return file_size;
|
||||
|
||||
done:
|
||||
nvkms_free(*buff, file_size);
|
||||
filp_close(file, current->files);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* must be called with nvkms_lock locked */
|
||||
static void nvkms_read_config_file_locked(void)
|
||||
{
|
||||
char *buffer = NULL;
|
||||
size_t buf_size = 0;
|
||||
|
||||
/* only read the config file if the kernel parameter is set */
|
||||
if (!NVKMS_CONF_FILE_SPECIFIED) {
|
||||
return;
|
||||
}
|
||||
|
||||
buf_size = nvkms_config_file_open(nvkms_conf, &buffer);
|
||||
|
||||
if (buf_size == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (nvKmsReadConf(buffer, buf_size, nvkms_config_file_open)) {
|
||||
printk(KERN_INFO NVKMS_LOG_PREFIX "Successfully read %s\n",
|
||||
nvkms_conf);
|
||||
}
|
||||
|
||||
nvkms_free(buffer, buf_size);
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
@@ -1543,10 +1661,12 @@ static int __init nvkms_init(void)
|
||||
if (!nvKmsModuleLoad()) {
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
up(&nvkms_lock);
|
||||
if (ret != 0) {
|
||||
up(&nvkms_lock);
|
||||
goto fail_module_load;
|
||||
}
|
||||
nvkms_read_config_file_locked();
|
||||
up(&nvkms_lock);
|
||||
|
||||
nvkms_proc_init();
|
||||
|
||||
@@ -1630,12 +1750,7 @@ restart:
|
||||
module_init(nvkms_init);
|
||||
module_exit(nvkms_exit);
|
||||
|
||||
#if defined(MODULE_LICENSE)
|
||||
MODULE_LICENSE("Dual MIT/GPL");
|
||||
#endif
|
||||
#if defined(MODULE_INFO)
|
||||
MODULE_INFO(supported, "external");
|
||||
#endif
|
||||
#if defined(MODULE_VERSION)
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
#endif
|
||||
|
||||
MODULE_INFO(supported, "external");
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
|
||||
@@ -40,17 +40,31 @@
|
||||
#include "nv_stdarg.h"
|
||||
|
||||
enum NvKmsSyncPtOp {
|
||||
/*
|
||||
* Call into Tegra's kernel nvhost driver, and allocate a syncpoint that can
|
||||
* be exclusively used by the caller. Internally, this operation will call
|
||||
* get() to set the initial refcount of the syncpoint to 1.
|
||||
*/
|
||||
NVKMS_SYNCPT_OP_ALLOC,
|
||||
NVKMS_SYNCPT_OP_GET,
|
||||
/*
|
||||
* Decrease the refcount of an already allocated syncpoint. Once the
|
||||
* refcount drops to 0, the syncpoint will be returned to the free pool that
|
||||
* nvhost manages, so PUT can also be used to balance out an ALLOC.
|
||||
*/
|
||||
NVKMS_SYNCPT_OP_PUT,
|
||||
NVKMS_SYNCPT_OP_INCR_MAX,
|
||||
NVKMS_SYNCPT_OP_CPU_INCR,
|
||||
/*
|
||||
* Extract syncpt id and thresh from the sync-file file descriptor
|
||||
*/
|
||||
NVKMS_SYNCPT_OP_FD_TO_ID_AND_THRESH,
|
||||
/*
|
||||
* Create dma-fence from syncpt id and thresh value and create sync_file
|
||||
* file descriptor for the dma-fence handle created.
|
||||
*/
|
||||
NVKMS_SYNCPT_OP_ID_AND_THRESH_TO_FD,
|
||||
/*
|
||||
* read syncpt minimum value of given syncpt
|
||||
*/
|
||||
NVKMS_SYNCPT_OP_READ_MINVAL,
|
||||
NVKMS_SYNCPT_OP_READ_MAXVAL,
|
||||
NVKMS_SYNCPT_OP_SET_MIN_EQ_MAX,
|
||||
NVKMS_SYNCPT_OP_SET_MAXVAL,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
@@ -60,24 +74,10 @@ typedef struct {
|
||||
NvU32 id; /* out */
|
||||
} alloc;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} get;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} put;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 incr; /* in */
|
||||
NvU32 value; /* out */
|
||||
} incr_max;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} cpu_incr;
|
||||
|
||||
struct {
|
||||
NvS32 fd; /* in */
|
||||
NvU32 id; /* out */
|
||||
@@ -94,24 +94,12 @@ typedef struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 minval; /* out */
|
||||
} read_minval;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 maxval; /* out */
|
||||
} read_maxval;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} set_min_eq_max;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 val; /* in */
|
||||
} set_maxval;
|
||||
} NvKmsSyncPtOpParams;
|
||||
|
||||
NvBool nvkms_output_rounding_fix(void);
|
||||
|
||||
NvBool nvkms_disable_vrr_memclk_switch(void);
|
||||
|
||||
void nvkms_call_rm (void *ops);
|
||||
void* nvkms_alloc (size_t size,
|
||||
NvBool zero);
|
||||
|
||||
@@ -85,15 +85,11 @@ $(obj)/$(NVIDIA_MODESET_INTERFACE): $(addprefix $(obj)/,$(NVIDIA_MODESET_OBJECTS
|
||||
|
||||
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_MODESET_OBJECTS)
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_kthread_create_on_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
|
||||
|
||||
@@ -42,6 +42,20 @@ typedef void nvkms_procfs_proc_t(void *data,
|
||||
char *buffer, size_t size,
|
||||
nvkms_procfs_out_string_func_t *outString);
|
||||
|
||||
/* max number of loops to prevent hanging the kernel if an edge case is hit */
|
||||
#define NVKMS_READ_FILE_MAX_LOOPS 1000
|
||||
/* max size for any file read by the config system */
|
||||
#define NVKMS_READ_FILE_MAX_SIZE 8192
|
||||
|
||||
/*
|
||||
* The read file callback should allocate a buffer pointed to by *buff, fill it
|
||||
* with the contents of fname, and return the size of the buffer. Buffer is not
|
||||
* guaranteed to be null-terminated. The caller is responsible for freeing the
|
||||
* buffer with nvkms_free, not nvFree.
|
||||
*/
|
||||
typedef size_t nvkms_config_read_file_func_t(char *fname,
|
||||
char ** const buff);
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
nvkms_procfs_proc_t *func;
|
||||
@@ -74,6 +88,9 @@ void nvKmsResume(NvU32 gpuId);
|
||||
|
||||
void nvKmsGetProcFiles(const nvkms_procfs_file_t **ppProcFiles);
|
||||
|
||||
NvBool nvKmsReadConf(const char *buff, size_t size,
|
||||
nvkms_config_read_file_func_t readfile);
|
||||
|
||||
void nvKmsKapiHandleEventQueueChange
|
||||
(
|
||||
struct NvKmsKapiDevice *device
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2011-2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2011-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -94,11 +94,10 @@ struct nvidia_p2p_params {
|
||||
} nvidia_p2p_params_t;
|
||||
|
||||
/*
|
||||
* Capability flag for users to detect
|
||||
* Macro for users to detect
|
||||
* driver support for persistent pages.
|
||||
*/
|
||||
extern int nvidia_p2p_cap_persistent_pages;
|
||||
#define NVIDIA_P2P_CAP_PERSISTENT_PAGES
|
||||
#define NVIDIA_P2P_CAP_GET_PAGES_PERSISTENT_API
|
||||
|
||||
/*
|
||||
* This API is not supported.
|
||||
@@ -173,11 +172,6 @@ struct nvidia_p2p_page_table {
|
||||
* A pointer to the function to be invoked when the pages
|
||||
* underlying the virtual address range are freed
|
||||
* implicitly.
|
||||
* If NULL, persistent pages will be returned.
|
||||
* This means the pages underlying the range of GPU virtual memory
|
||||
* will persist until explicitly freed by nvidia_p2p_put_pages().
|
||||
* Persistent GPU memory mappings are not supported on PowerPC,
|
||||
* MIG-enabled devices and vGPU.
|
||||
* @param[in] data
|
||||
* A non-NULL opaque pointer to private data to be passed to the
|
||||
* callback function.
|
||||
@@ -190,12 +184,48 @@ struct nvidia_p2p_page_table {
|
||||
* insufficient resources were available to complete the operation.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_get_pages(uint64_t p2p_token, uint32_t va_space,
|
||||
uint64_t virtual_address,
|
||||
int nvidia_p2p_get_pages( uint64_t p2p_token, uint32_t va_space,
|
||||
uint64_t virtual_address, uint64_t length,
|
||||
struct nvidia_p2p_page_table **page_table,
|
||||
void (*free_callback)(void *data), void *data);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Pin and make the pages underlying a range of GPU virtual memory
|
||||
* accessible to a third-party device. The pages will persist until
|
||||
* explicitly freed by nvidia_p2p_put_pages_persistent().
|
||||
*
|
||||
* Persistent GPU memory mappings are not supported on PowerPC,
|
||||
* MIG-enabled devices and vGPU.
|
||||
*
|
||||
* This API only supports pinned, GPU-resident memory, such as that provided
|
||||
* by cudaMalloc().
|
||||
*
|
||||
* This API may sleep.
|
||||
*
|
||||
* @param[in] virtual_address
|
||||
* The start address in the specified virtual address space.
|
||||
* Address must be aligned to the 64KB boundary.
|
||||
* @param[in] length
|
||||
* The length of the requested P2P mapping.
|
||||
* Length must be a multiple of 64KB.
|
||||
* @param[out] page_table
|
||||
* A pointer to an array of structures with P2P PTEs.
|
||||
* @param[in] flags
|
||||
* Must be set to zero for now.
|
||||
*
|
||||
* @return
|
||||
* 0 upon successful completion.
|
||||
* -EINVAL if an invalid argument was supplied.
|
||||
* -ENOTSUPP if the requested operation is not supported.
|
||||
* -ENOMEM if the driver failed to allocate memory or if
|
||||
* insufficient resources were available to complete the operation.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_get_pages_persistent(uint64_t virtual_address,
|
||||
uint64_t length,
|
||||
struct nvidia_p2p_page_table **page_table,
|
||||
void (*free_callback)(void *data),
|
||||
void *data);
|
||||
uint32_t flags);
|
||||
|
||||
#define NVIDIA_P2P_DMA_MAPPING_VERSION 0x00020003
|
||||
|
||||
@@ -268,6 +298,8 @@ int nvidia_p2p_dma_unmap_pages(struct pci_dev *peer,
|
||||
* Release a set of pages previously made accessible to
|
||||
* a third-party device.
|
||||
*
|
||||
* This API may sleep.
|
||||
*
|
||||
* @param[in] p2p_token
|
||||
* A token that uniquely identifies the P2P mapping.
|
||||
* @param[in] va_space
|
||||
@@ -282,10 +314,33 @@ int nvidia_p2p_dma_unmap_pages(struct pci_dev *peer,
|
||||
* -EINVAL if an invalid argument was supplied.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_put_pages(uint64_t p2p_token, uint32_t va_space,
|
||||
uint64_t virtual_address,
|
||||
int nvidia_p2p_put_pages(uint64_t p2p_token,
|
||||
uint32_t va_space, uint64_t virtual_address,
|
||||
struct nvidia_p2p_page_table *page_table);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Release a set of persistent pages previously made accessible to
|
||||
* a third-party device.
|
||||
*
|
||||
* This API may sleep.
|
||||
*
|
||||
* @param[in] virtual_address
|
||||
* The start address in the specified virtual address space.
|
||||
* @param[in] page_table
|
||||
* A pointer to the array of structures with P2P PTEs.
|
||||
* @param[in] flags
|
||||
* Must be set to zero for now.
|
||||
*
|
||||
* @return
|
||||
* 0 upon successful completion.
|
||||
* -EINVAL if an invalid argument was supplied.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_put_pages_persistent(uint64_t virtual_address,
|
||||
struct nvidia_p2p_page_table *page_table,
|
||||
uint32_t flags);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Free a third-party P2P page table. (This function is a no-op.)
|
||||
|
||||
@@ -30,8 +30,18 @@ NVIDIA_PEERMEM_CFLAGS += -UDEBUG -U_DEBUG -DNDEBUG -DNV_BUILD_MODULE_INSTANCES=0
|
||||
# MOFED's Module.symvers is needed for the build
|
||||
# to find the additional ib_* symbols.
|
||||
#
|
||||
# Also, MOFED doesn't use kbuild ARCH names.
|
||||
# So adapt OFA_ARCH to match MOFED's conventions.
|
||||
#
|
||||
ifeq ($(ARCH), arm64)
|
||||
OFA_ARCH := aarch64
|
||||
else ifeq ($(ARCH), powerpc)
|
||||
OFA_ARCH := ppc64le
|
||||
else
|
||||
OFA_ARCH := $(ARCH)
|
||||
endif
|
||||
OFA_DIR := /usr/src/ofa_kernel
|
||||
OFA_CANDIDATES = $(OFA_DIR)/$(ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
|
||||
OFA_CANDIDATES = $(OFA_DIR)/$(OFA_ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
|
||||
MLNX_OFED_KERNEL := $(shell for d in $(OFA_CANDIDATES); do \
|
||||
if [ -d "$$d" ]; then \
|
||||
echo "$$d"; \
|
||||
|
||||
@@ -284,8 +284,9 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
|
||||
static void nv_mem_put_pages_common(int nc,
|
||||
struct sg_table *sg_head,
|
||||
void *context)
|
||||
{
|
||||
int ret = 0;
|
||||
struct nv_mem_context *nv_mem_context =
|
||||
@@ -302,8 +303,13 @@ static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
|
||||
if (nv_mem_context->callback_task == current)
|
||||
return;
|
||||
|
||||
ret = nvidia_p2p_put_pages(0, 0, nv_mem_context->page_virt_start,
|
||||
nv_mem_context->page_table);
|
||||
if (nc) {
|
||||
ret = nvidia_p2p_put_pages_persistent(nv_mem_context->page_virt_start,
|
||||
nv_mem_context->page_table, 0);
|
||||
} else {
|
||||
ret = nvidia_p2p_put_pages(0, 0, nv_mem_context->page_virt_start,
|
||||
nv_mem_context->page_table);
|
||||
}
|
||||
|
||||
#ifdef _DEBUG_ONLY_
|
||||
/* Here we expect an error in real life cases that should be ignored - not printed.
|
||||
@@ -318,6 +324,16 @@ static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
|
||||
return;
|
||||
}
|
||||
|
||||
static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
|
||||
{
|
||||
nv_mem_put_pages_common(0, sg_head, context);
|
||||
}
|
||||
|
||||
static void nv_mem_put_pages_nc(struct sg_table *sg_head, void *context)
|
||||
{
|
||||
nv_mem_put_pages_common(1, sg_head, context);
|
||||
}
|
||||
|
||||
static void nv_mem_release(void *context)
|
||||
{
|
||||
struct nv_mem_context *nv_mem_context =
|
||||
@@ -396,8 +412,9 @@ static int nv_mem_get_pages_nc(unsigned long addr,
|
||||
nv_mem_context->core_context = core_context;
|
||||
nv_mem_context->page_size = GPU_PAGE_SIZE;
|
||||
|
||||
ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
|
||||
&nv_mem_context->page_table, NULL, NULL);
|
||||
ret = nvidia_p2p_get_pages_persistent(nv_mem_context->page_virt_start,
|
||||
nv_mem_context->mapped_size,
|
||||
&nv_mem_context->page_table, 0);
|
||||
if (ret < 0) {
|
||||
peer_err("error %d while calling nvidia_p2p_get_pages() with NULL callback\n", ret);
|
||||
return ret;
|
||||
@@ -407,13 +424,13 @@ static int nv_mem_get_pages_nc(unsigned long addr,
|
||||
}
|
||||
|
||||
static struct peer_memory_client nv_mem_client_nc = {
|
||||
.acquire = nv_mem_acquire,
|
||||
.get_pages = nv_mem_get_pages_nc,
|
||||
.dma_map = nv_dma_map,
|
||||
.dma_unmap = nv_dma_unmap,
|
||||
.put_pages = nv_mem_put_pages,
|
||||
.get_page_size = nv_mem_get_page_size,
|
||||
.release = nv_mem_release,
|
||||
.acquire = nv_mem_acquire,
|
||||
.get_pages = nv_mem_get_pages_nc,
|
||||
.dma_map = nv_dma_map,
|
||||
.dma_unmap = nv_dma_unmap,
|
||||
.put_pages = nv_mem_put_pages_nc,
|
||||
.get_page_size = nv_mem_get_page_size,
|
||||
.release = nv_mem_release,
|
||||
};
|
||||
|
||||
#endif /* NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT */
|
||||
@@ -477,9 +494,6 @@ static int __init nv_mem_client_init(void)
|
||||
}
|
||||
|
||||
// The nc client enables support for persistent pages.
|
||||
// Thanks to this check, nvidia-peermem requires the new symbol from nvidia.ko, which
|
||||
// prevents users to unintentionally load this module with unsupported nvidia.ko.
|
||||
BUG_ON(!nvidia_p2p_cap_persistent_pages);
|
||||
strcpy(nv_mem_client_nc.name, DRV_NAME "_nc");
|
||||
strcpy(nv_mem_client_nc.version, DRV_VERSION);
|
||||
reg_handle_nc = ib_register_peer_memory_client(&nv_mem_client_nc, NULL);
|
||||
|
||||
@@ -1,29 +1,33 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013 NVIDIA Corporation
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef __cla06fsubch_h__
|
||||
#define __cla06fsubch_h__
|
||||
#ifndef _cla06fsubch_h_
|
||||
#define _cla06fsubch_h_
|
||||
|
||||
#define NVA06F_SUBCHANNEL_2D 3
|
||||
#define NVA06F_SUBCHANNEL_3D 0
|
||||
#define NVA06F_SUBCHANNEL_COMPUTE 1
|
||||
#define NVA06F_SUBCHANNEL_COPY_ENGINE 4
|
||||
#define NVA06F_SUBCHANNEL_I2M 2
|
||||
|
||||
#endif // {__cla06fsubch_h__}
|
||||
#endif // _cla06fsubch_h_
|
||||
|
||||
@@ -1,25 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _cla16f_h_
|
||||
#define _cla16f_h_
|
||||
@@ -30,9 +30,48 @@ extern "C" {
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#define KEPLER_CHANNEL_GPFIFO_B (0x0000A16F)
|
||||
/* class KEPLER_CHANNEL_GPFIFO */
|
||||
/*
|
||||
* Documentation for KEPLER_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
|
||||
* chapter "User Control Registers". It is documented as device NV_UDMA.
|
||||
* The GPFIFO format itself is also documented in dev_pbdma.ref,
|
||||
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
|
||||
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
|
||||
*
|
||||
*/
|
||||
#define KEPLER_CHANNEL_GPFIFO_B (0x0000A16F)
|
||||
|
||||
/* pio method data structure */
|
||||
typedef volatile struct _cla16f_tag0 {
|
||||
NvV32 Reserved00[0x7c0];
|
||||
} NvA16FTypedef, KEPLER_ChannelGPFifoB;
|
||||
#define NVA16F_TYPEDEF KEPLER_CHANNELChannelGPFifo
|
||||
/* dma flow control data structure */
|
||||
typedef volatile struct _cla16f_tag1 {
|
||||
NvU32 Ignored00[0x010]; /* 0000-003f*/
|
||||
NvU32 Put; /* put offset, read/write 0040-0043*/
|
||||
NvU32 Get; /* get offset, read only 0044-0047*/
|
||||
NvU32 Reference; /* reference value, read only 0048-004b*/
|
||||
NvU32 PutHi; /* high order put offset bits 004c-004f*/
|
||||
NvU32 Ignored01[0x002]; /* 0050-0057*/
|
||||
NvU32 TopLevelGet; /* top level get offset, read only 0058-005b*/
|
||||
NvU32 TopLevelGetHi; /* high order top level get bits 005c-005f*/
|
||||
NvU32 GetHi; /* high order get offset bits 0060-0063*/
|
||||
NvU32 Ignored02[0x007]; /* 0064-007f*/
|
||||
NvU32 Ignored03; /* used to be engine yield 0080-0083*/
|
||||
NvU32 Ignored04[0x001]; /* 0084-0087*/
|
||||
NvU32 GPGet; /* GP FIFO get offset, read only 0088-008b*/
|
||||
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
|
||||
NvU32 Ignored05[0x5c];
|
||||
} NvA16FControl, KeplerBControlGPFifo;
|
||||
/* fields and values */
|
||||
#define NVA16F_NUMBER_OF_SUBCHANNELS (8)
|
||||
#define NVA16F_SET_OBJECT (0x00000000)
|
||||
#define NVA16F_SET_OBJECT_NVCLASS 15:0
|
||||
#define NVA16F_SET_OBJECT_ENGINE 20:16
|
||||
#define NVA16F_SET_OBJECT_ENGINE_SW 0x0000001f
|
||||
#define NVA16F_ILLEGAL (0x00000004)
|
||||
#define NVA16F_ILLEGAL_HANDLE 31:0
|
||||
#define NVA16F_NOP (0x00000008)
|
||||
#define NVA16F_NOP_HANDLE 31:0
|
||||
#define NVA16F_SEMAPHOREA (0x00000010)
|
||||
@@ -100,6 +139,12 @@ extern "C" {
|
||||
#define NVA16F_SET_REFERENCE_COUNT 31:0
|
||||
#define NVA16F_WFI (0x00000078)
|
||||
#define NVA16F_WFI_HANDLE 31:0
|
||||
#define NVA16F_CRC_CHECK (0x0000007c)
|
||||
#define NVA16F_CRC_CHECK_VALUE 31:0
|
||||
#define NVA16F_YIELD (0x00000080)
|
||||
#define NVA16F_YIELD_OP 1:0
|
||||
#define NVA16F_YIELD_OP_NOP 0x00000000
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVA16F_GP_ENTRY__SIZE 8
|
||||
@@ -126,13 +171,28 @@ extern "C" {
|
||||
#define NVA16F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
|
||||
/* dma method formats */
|
||||
#define NVA16F_DMA_METHOD_ADDRESS_OLD 12:2
|
||||
#define NVA16F_DMA_METHOD_ADDRESS 11:0
|
||||
#define NVA16F_DMA_SUBDEVICE_MASK 15:4
|
||||
#define NVA16F_DMA_METHOD_SUBCHANNEL 15:13
|
||||
#define NVA16F_DMA_TERT_OP 17:16
|
||||
#define NVA16F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
|
||||
#define NVA16F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
|
||||
#define NVA16F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
|
||||
#define NVA16F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
|
||||
#define NVA16F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
|
||||
#define NVA16F_DMA_METHOD_COUNT_OLD 28:18
|
||||
#define NVA16F_DMA_METHOD_COUNT 28:16
|
||||
#define NVA16F_DMA_IMMD_DATA 28:16
|
||||
#define NVA16F_DMA_SEC_OP 31:29
|
||||
#define NVA16F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
|
||||
#define NVA16F_DMA_SEC_OP_INC_METHOD (0x00000001)
|
||||
#define NVA16F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
|
||||
#define NVA16F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
|
||||
|
||||
#define NVA16F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
|
||||
#define NVA16F_DMA_SEC_OP_ONE_INC (0x00000005)
|
||||
#define NVA16F_DMA_SEC_OP_RESERVED6 (0x00000006)
|
||||
#define NVA16F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
|
||||
/* dma incrementing method format */
|
||||
#define NVA16F_DMA_INCR_ADDRESS 11:0
|
||||
#define NVA16F_DMA_INCR_SUBCHANNEL 15:13
|
||||
@@ -140,7 +200,6 @@ extern "C" {
|
||||
#define NVA16F_DMA_INCR_OPCODE 31:29
|
||||
#define NVA16F_DMA_INCR_OPCODE_VALUE (0x00000001)
|
||||
#define NVA16F_DMA_INCR_DATA 31:0
|
||||
|
||||
/* dma non-incrementing method format */
|
||||
#define NVA16F_DMA_NONINCR_ADDRESS 11:0
|
||||
#define NVA16F_DMA_NONINCR_SUBCHANNEL 15:13
|
||||
@@ -148,13 +207,45 @@ extern "C" {
|
||||
#define NVA16F_DMA_NONINCR_OPCODE 31:29
|
||||
#define NVA16F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
|
||||
#define NVA16F_DMA_NONINCR_DATA 31:0
|
||||
|
||||
/* dma increment-once method format */
|
||||
#define NVA16F_DMA_ONEINCR_ADDRESS 11:0
|
||||
#define NVA16F_DMA_ONEINCR_SUBCHANNEL 15:13
|
||||
#define NVA16F_DMA_ONEINCR_COUNT 28:16
|
||||
#define NVA16F_DMA_ONEINCR_OPCODE 31:29
|
||||
#define NVA16F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
|
||||
#define NVA16F_DMA_ONEINCR_DATA 31:0
|
||||
/* dma no-operation format */
|
||||
#define NVA16F_DMA_NOP (0x00000000)
|
||||
/* dma immediate-data format */
|
||||
#define NVA16F_DMA_IMMD_ADDRESS 11:0
|
||||
#define NVA16F_DMA_IMMD_SUBCHANNEL 15:13
|
||||
#define NVA16F_DMA_IMMD_DATA 28:16
|
||||
#define NVA16F_DMA_IMMD_OPCODE 31:29
|
||||
#define NVA16F_DMA_IMMD_OPCODE_VALUE (0x00000004)
|
||||
/* dma set sub-device mask format */
|
||||
#define NVA16F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVA16F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVA16F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
|
||||
/* dma store sub-device mask format */
|
||||
#define NVA16F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVA16F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVA16F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
|
||||
/* dma use sub-device mask format */
|
||||
#define NVA16F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVA16F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
|
||||
/* dma end-segment format */
|
||||
#define NVA16F_DMA_ENDSEG_OPCODE 31:29
|
||||
#define NVA16F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
|
||||
/* dma legacy incrementing/non-incrementing formats */
|
||||
#define NVA16F_DMA_ADDRESS 12:2
|
||||
#define NVA16F_DMA_SUBCH 15:13
|
||||
#define NVA16F_DMA_OPCODE3 17:16
|
||||
#define NVA16F_DMA_OPCODE3_NONE (0x00000000)
|
||||
#define NVA16F_DMA_COUNT 28:18
|
||||
#define NVA16F_DMA_OPCODE 31:29
|
||||
#define NVA16F_DMA_OPCODE_METHOD (0x00000000)
|
||||
#define NVA16F_DMA_OPCODE_NONINC_METHOD (0x00000002)
|
||||
#define NVA16F_DMA_DATA 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
|
||||
@@ -1,24 +1,26 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2014 NVidia Corporation
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
#ifndef _clb069_h_
|
||||
#define _clb069_h_
|
||||
|
||||
|
||||
@@ -1,28 +1,28 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2014 NVIDIA Corporation
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _clB06f_h_
|
||||
#define _clB06f_h_
|
||||
#ifndef _clb06f_h_
|
||||
#define _clb06f_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -30,10 +30,46 @@ extern "C" {
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
/* class MAXWELL_CHANNEL_GPFIFO */
|
||||
/*
|
||||
* Documentation for MAXWELL_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
|
||||
* chapter "User Control Registers". It is documented as device NV_UDMA.
|
||||
* The GPFIFO format itself is also documented in dev_pbdma.ref,
|
||||
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
|
||||
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
|
||||
*
|
||||
*/
|
||||
#define MAXWELL_CHANNEL_GPFIFO_A (0x0000B06F)
|
||||
|
||||
/* class MAXWELL_CHANNEL_GPFIFO */
|
||||
#define NVB06F_TYPEDEF MAXWELL_CHANNELChannelGPFifoA
|
||||
|
||||
/* dma flow control data structure */
|
||||
typedef volatile struct _clb06f_tag0 {
|
||||
NvU32 Ignored00[0x010]; /* 0000-003f*/
|
||||
NvU32 Put; /* put offset, read/write 0040-0043*/
|
||||
NvU32 Get; /* get offset, read only 0044-0047*/
|
||||
NvU32 Reference; /* reference value, read only 0048-004b*/
|
||||
NvU32 PutHi; /* high order put offset bits 004c-004f*/
|
||||
NvU32 Ignored01[0x002]; /* 0050-0057*/
|
||||
NvU32 TopLevelGet; /* top level get offset, read only 0058-005b*/
|
||||
NvU32 TopLevelGetHi; /* high order top level get bits 005c-005f*/
|
||||
NvU32 GetHi; /* high order get offset bits 0060-0063*/
|
||||
NvU32 Ignored02[0x007]; /* 0064-007f*/
|
||||
NvU32 Ignored03; /* used to be engine yield 0080-0083*/
|
||||
NvU32 Ignored04[0x001]; /* 0084-0087*/
|
||||
NvU32 GPGet; /* GP FIFO get offset, read only 0088-008b*/
|
||||
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
|
||||
NvU32 Ignored05[0x5c];
|
||||
} Nvb06FControl, MaxwellAControlGPFifo;
|
||||
|
||||
/* fields and values */
|
||||
#define NVB06F_NUMBER_OF_SUBCHANNELS (8)
|
||||
#define NVB06F_SET_OBJECT (0x00000000)
|
||||
#define NVB06F_SET_OBJECT_NVCLASS 15:0
|
||||
#define NVB06F_SET_OBJECT_ENGINE 20:16
|
||||
#define NVB06F_SET_OBJECT_ENGINE_SW 0x0000001f
|
||||
#define NVB06F_ILLEGAL (0x00000004)
|
||||
#define NVB06F_ILLEGAL_HANDLE 31:0
|
||||
#define NVB06F_NOP (0x00000008)
|
||||
#define NVB06F_NOP_HANDLE 31:0
|
||||
#define NVB06F_SEMAPHOREA (0x00000010)
|
||||
@@ -47,6 +83,8 @@ extern "C" {
|
||||
#define NVB06F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
|
||||
#define NVB06F_SEMAPHORED_OPERATION_RELEASE 0x00000002
|
||||
#define NVB06F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
|
||||
#define NVB06F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
|
||||
#define NVB06F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
|
||||
#define NVB06F_SEMAPHORED_ACQUIRE_SWITCH 12:12
|
||||
#define NVB06F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
|
||||
#define NVB06F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
|
||||
@@ -56,8 +94,22 @@ extern "C" {
|
||||
#define NVB06F_SEMAPHORED_RELEASE_SIZE 24:24
|
||||
#define NVB06F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
|
||||
#define NVB06F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
|
||||
|
||||
#define NVB06F_SEMAPHORED_REDUCTION 30:27
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_MIN 0x00000000
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_MAX 0x00000001
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_XOR 0x00000002
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_AND 0x00000003
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_OR 0x00000004
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_ADD 0x00000005
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_INC 0x00000006
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_DEC 0x00000007
|
||||
#define NVB06F_SEMAPHORED_FORMAT 31:31
|
||||
#define NVB06F_SEMAPHORED_FORMAT_SIGNED 0x00000000
|
||||
#define NVB06F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
|
||||
#define NVB06F_NON_STALL_INTERRUPT (0x00000020)
|
||||
#define NVB06F_NON_STALL_INTERRUPT_HANDLE 31:0
|
||||
#define NVB06F_FB_FLUSH (0x00000024)
|
||||
#define NVB06F_FB_FLUSH_HANDLE 31:0
|
||||
// NOTE - MEM_OP_A and MEM_OP_B have been removed for gm20x to make room for
|
||||
// possible future MEM_OP features. MEM_OP_C/D have identical functionality
|
||||
// to the previous MEM_OP_A/B methods.
|
||||
@@ -84,10 +136,27 @@ extern "C" {
|
||||
#define NVB06F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
|
||||
#define NVB06F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
|
||||
#define NVB06F_MEM_OP_D_TLB_INVALIDATE_ADDR_HI 7:0
|
||||
#define NVB06F_SET_REFERENCE (0x00000050)
|
||||
#define NVB06F_SET_REFERENCE_COUNT 31:0
|
||||
#define NVB06F_WFI (0x00000078)
|
||||
#define NVB06F_WFI_SCOPE 0:0
|
||||
#define NVB06F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
|
||||
#define NVB06F_WFI_SCOPE_ALL 0x00000001
|
||||
#define NVB06F_CRC_CHECK (0x0000007c)
|
||||
#define NVB06F_CRC_CHECK_VALUE 31:0
|
||||
#define NVB06F_YIELD (0x00000080)
|
||||
#define NVB06F_YIELD_OP 1:0
|
||||
#define NVB06F_YIELD_OP_NOP 0x00000000
|
||||
#define NVB06F_YIELD_OP_PBDMA_TIMESLICE 0x00000001
|
||||
#define NVB06F_YIELD_OP_RUNLIST_TIMESLICE 0x00000002
|
||||
#define NVB06F_YIELD_OP_TSG 0x00000003
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVB06F_GP_ENTRY__SIZE 8
|
||||
#define NVB06F_GP_ENTRY0_FETCH 0:0
|
||||
#define NVB06F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
|
||||
#define NVB06F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
|
||||
#define NVB06F_GP_ENTRY0_GET 31:2
|
||||
#define NVB06F_GP_ENTRY0_OPERAND 31:0
|
||||
#define NVB06F_GP_ENTRY1_GET_HI 7:0
|
||||
@@ -98,11 +167,38 @@ extern "C" {
|
||||
#define NVB06F_GP_ENTRY1_LEVEL_MAIN 0x00000000
|
||||
#define NVB06F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
|
||||
#define NVB06F_GP_ENTRY1_LENGTH 30:10
|
||||
#define NVB06F_GP_ENTRY1_SYNC 31:31
|
||||
#define NVB06F_GP_ENTRY1_SYNC_PROCEED 0x00000000
|
||||
#define NVB06F_GP_ENTRY1_SYNC_WAIT 0x00000001
|
||||
#define NVB06F_GP_ENTRY1_OPCODE 7:0
|
||||
#define NVB06F_GP_ENTRY1_OPCODE_NOP 0x00000000
|
||||
#define NVB06F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
|
||||
#define NVB06F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
|
||||
#define NVB06F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
|
||||
/* dma method formats */
|
||||
#define NVB06F_DMA_METHOD_ADDRESS_OLD 12:2
|
||||
#define NVB06F_DMA_METHOD_ADDRESS 11:0
|
||||
#define NVB06F_DMA_SUBDEVICE_MASK 15:4
|
||||
#define NVB06F_DMA_METHOD_SUBCHANNEL 15:13
|
||||
#define NVB06F_DMA_TERT_OP 17:16
|
||||
#define NVB06F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
|
||||
#define NVB06F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
|
||||
#define NVB06F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
|
||||
#define NVB06F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
|
||||
#define NVB06F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
|
||||
#define NVB06F_DMA_METHOD_COUNT_OLD 28:18
|
||||
#define NVB06F_DMA_METHOD_COUNT 28:16
|
||||
#define NVB06F_DMA_IMMD_DATA 28:16
|
||||
#define NVB06F_DMA_SEC_OP 31:29
|
||||
#define NVB06F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
|
||||
#define NVB06F_DMA_SEC_OP_INC_METHOD (0x00000001)
|
||||
#define NVB06F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
|
||||
#define NVB06F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
|
||||
#define NVB06F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
|
||||
#define NVB06F_DMA_SEC_OP_ONE_INC (0x00000005)
|
||||
#define NVB06F_DMA_SEC_OP_RESERVED6 (0x00000006)
|
||||
#define NVB06F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
|
||||
/* dma incrementing method format */
|
||||
#define NVB06F_DMA_INCR_ADDRESS 11:0
|
||||
#define NVB06F_DMA_INCR_SUBCHANNEL 15:13
|
||||
@@ -132,9 +228,33 @@ extern "C" {
|
||||
#define NVB06F_DMA_IMMD_DATA 28:16
|
||||
#define NVB06F_DMA_IMMD_OPCODE 31:29
|
||||
#define NVB06F_DMA_IMMD_OPCODE_VALUE (0x00000004)
|
||||
/* dma set sub-device mask format */
|
||||
#define NVB06F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVB06F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVB06F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
|
||||
/* dma store sub-device mask format */
|
||||
#define NVB06F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVB06F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVB06F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
|
||||
/* dma use sub-device mask format */
|
||||
#define NVB06F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVB06F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
|
||||
/* dma end-segment format */
|
||||
#define NVB06F_DMA_ENDSEG_OPCODE 31:29
|
||||
#define NVB06F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
|
||||
/* dma legacy incrementing/non-incrementing formats */
|
||||
#define NVB06F_DMA_ADDRESS 12:2
|
||||
#define NVB06F_DMA_SUBCH 15:13
|
||||
#define NVB06F_DMA_OPCODE3 17:16
|
||||
#define NVB06F_DMA_OPCODE3_NONE (0x00000000)
|
||||
#define NVB06F_DMA_COUNT 28:18
|
||||
#define NVB06F_DMA_OPCODE 31:29
|
||||
#define NVB06F_DMA_OPCODE_METHOD (0x00000000)
|
||||
#define NVB06F_DMA_OPCODE_NONINC_METHOD (0x00000002)
|
||||
#define NVB06F_DMA_DATA 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* _clB06F_h_ */
|
||||
#endif /* _clb06f_h_ */
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2014 NVIDIA Corporation
|
||||
Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
@@ -32,6 +32,10 @@ extern "C" {
|
||||
|
||||
#define MAXWELL_DMA_COPY_A (0x0000B0B5)
|
||||
|
||||
#define NVB0B5_NOP (0x00000100)
|
||||
#define NVB0B5_NOP_PARAMETER 31:0
|
||||
#define NVB0B5_PM_TRIGGER (0x00000140)
|
||||
#define NVB0B5_PM_TRIGGER_V 31:0
|
||||
#define NVB0B5_SET_SEMAPHORE_A (0x00000240)
|
||||
#define NVB0B5_SET_SEMAPHORE_A_UPPER 7:0
|
||||
#define NVB0B5_SET_SEMAPHORE_B (0x00000244)
|
||||
@@ -183,9 +187,75 @@ extern "C" {
|
||||
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
|
||||
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
|
||||
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
|
||||
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE (0x0000070C)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_WIDTH_QUARTER_GOB (0x0000000E)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_TESLA_4 (0x00000000)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVB0B5_SET_DST_WIDTH (0x00000710)
|
||||
#define NVB0B5_SET_DST_WIDTH_V 31:0
|
||||
#define NVB0B5_SET_DST_HEIGHT (0x00000714)
|
||||
#define NVB0B5_SET_DST_HEIGHT_V 31:0
|
||||
#define NVB0B5_SET_DST_DEPTH (0x00000718)
|
||||
#define NVB0B5_SET_DST_DEPTH_V 31:0
|
||||
#define NVB0B5_SET_DST_LAYER (0x0000071C)
|
||||
#define NVB0B5_SET_DST_LAYER_V 31:0
|
||||
#define NVB0B5_SET_DST_ORIGIN (0x00000720)
|
||||
#define NVB0B5_SET_DST_ORIGIN_X 15:0
|
||||
#define NVB0B5_SET_DST_ORIGIN_Y 31:16
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE (0x00000728)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_WIDTH_QUARTER_GOB (0x0000000E)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_TESLA_4 (0x00000000)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVB0B5_SET_SRC_WIDTH (0x0000072C)
|
||||
#define NVB0B5_SET_SRC_WIDTH_V 31:0
|
||||
#define NVB0B5_SET_SRC_HEIGHT (0x00000730)
|
||||
#define NVB0B5_SET_SRC_HEIGHT_V 31:0
|
||||
#define NVB0B5_SET_SRC_DEPTH (0x00000734)
|
||||
#define NVB0B5_SET_SRC_DEPTH_V 31:0
|
||||
#define NVB0B5_SET_SRC_LAYER (0x00000738)
|
||||
#define NVB0B5_SET_SRC_LAYER_V 31:0
|
||||
#define NVB0B5_SET_SRC_ORIGIN (0x0000073C)
|
||||
#define NVB0B5_SET_SRC_ORIGIN_X 15:0
|
||||
#define NVB0B5_SET_SRC_ORIGIN_Y 31:16
|
||||
#define NVB0B5_PM_TRIGGER_END (0x00001114)
|
||||
#define NVB0B5_PM_TRIGGER_END_V 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
#endif // _clb0b5_h
|
||||
|
||||
|
||||
@@ -1,25 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2014 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _clc06f_h_
|
||||
#define _clc06f_h_
|
||||
@@ -30,10 +30,47 @@ extern "C" {
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
/* class PASCAL_CHANNEL_GPFIFO */
|
||||
/*
|
||||
* Documentation for PASCAL_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
|
||||
* chapter "User Control Registers". It is documented as device NV_UDMA.
|
||||
* The GPFIFO format itself is also documented in dev_pbdma.ref,
|
||||
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
|
||||
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
|
||||
*
|
||||
* Note there is no .mfs file for this class.
|
||||
*/
|
||||
#define PASCAL_CHANNEL_GPFIFO_A (0x0000C06F)
|
||||
|
||||
/* class PASCAL_CHANNEL_GPFIFO_A */
|
||||
#define NVC06F_TYPEDEF PASCAL_CHANNELChannelGPFifoA
|
||||
|
||||
/* dma flow control data structure */
|
||||
typedef volatile struct Nvc06fControl_struct {
|
||||
NvU32 Ignored00[0x010]; /* 0000-003f*/
|
||||
NvU32 Put; /* put offset, read/write 0040-0043*/
|
||||
NvU32 Get; /* get offset, read only 0044-0047*/
|
||||
NvU32 Reference; /* reference value, read only 0048-004b*/
|
||||
NvU32 PutHi; /* high order put offset bits 004c-004f*/
|
||||
NvU32 Ignored01[0x002]; /* 0050-0057*/
|
||||
NvU32 TopLevelGet; /* top level get offset, read only 0058-005b*/
|
||||
NvU32 TopLevelGetHi; /* high order top level get bits 005c-005f*/
|
||||
NvU32 GetHi; /* high order get offset bits 0060-0063*/
|
||||
NvU32 Ignored02[0x007]; /* 0064-007f*/
|
||||
NvU32 Ignored03; /* used to be engine yield 0080-0083*/
|
||||
NvU32 Ignored04[0x001]; /* 0084-0087*/
|
||||
NvU32 GPGet; /* GP FIFO get offset, read only 0088-008b*/
|
||||
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
|
||||
NvU32 Ignored05[0x5c];
|
||||
} Nvc06fControl, PascalAControlGPFifo;
|
||||
|
||||
/* fields and values */
|
||||
#define NVC06F_NUMBER_OF_SUBCHANNELS (8)
|
||||
#define NVC06F_SET_OBJECT (0x00000000)
|
||||
#define NVC06F_SET_OBJECT_NVCLASS 15:0
|
||||
#define NVC06F_SET_OBJECT_ENGINE 20:16
|
||||
#define NVC06F_SET_OBJECT_ENGINE_SW 0x0000001f
|
||||
#define NVC06F_ILLEGAL (0x00000004)
|
||||
#define NVC06F_ILLEGAL_HANDLE 31:0
|
||||
#define NVC06F_NOP (0x00000008)
|
||||
#define NVC06F_NOP_HANDLE 31:0
|
||||
#define NVC06F_SEMAPHOREA (0x00000010)
|
||||
@@ -47,54 +84,33 @@ extern "C" {
|
||||
#define NVC06F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
|
||||
#define NVC06F_SEMAPHORED_OPERATION_RELEASE 0x00000002
|
||||
#define NVC06F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
|
||||
#define NVC06F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
|
||||
#define NVC06F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
|
||||
#define NVC06F_SEMAPHORED_ACQUIRE_SWITCH 12:12
|
||||
#define NVC06F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
|
||||
#define NVC06F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVC06F_GP_ENTRY__SIZE 8
|
||||
#define NVC06F_GP_ENTRY0_GET 31:2
|
||||
#define NVC06F_GP_ENTRY0_OPERAND 31:0
|
||||
#define NVC06F_GP_ENTRY1_GET_HI 7:0
|
||||
#define NVC06F_GP_ENTRY1_PRIV 8:8
|
||||
#define NVC06F_GP_ENTRY1_PRIV_USER 0x00000000
|
||||
#define NVC06F_GP_ENTRY1_PRIV_KERNEL 0x00000001
|
||||
#define NVC06F_GP_ENTRY1_LEVEL 9:9
|
||||
#define NVC06F_GP_ENTRY1_LEVEL_MAIN 0x00000000
|
||||
#define NVC06F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
|
||||
#define NVC06F_GP_ENTRY1_LENGTH 30:10
|
||||
|
||||
/* dma incrementing method format */
|
||||
#define NVC06F_DMA_INCR_ADDRESS 11:0
|
||||
#define NVC06F_DMA_INCR_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_INCR_COUNT 28:16
|
||||
#define NVC06F_DMA_INCR_OPCODE 31:29
|
||||
#define NVC06F_DMA_INCR_OPCODE_VALUE (0x00000001)
|
||||
#define NVC06F_DMA_INCR_DATA 31:0
|
||||
/* dma non-incrementing method format */
|
||||
#define NVC06F_DMA_NONINCR_ADDRESS 11:0
|
||||
#define NVC06F_DMA_NONINCR_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_NONINCR_COUNT 28:16
|
||||
#define NVC06F_DMA_NONINCR_OPCODE 31:29
|
||||
#define NVC06F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
|
||||
#define NVC06F_DMA_NONINCR_DATA 31:0
|
||||
/* dma increment-once method format */
|
||||
#define NVC06F_DMA_ONEINCR_ADDRESS 11:0
|
||||
#define NVC06F_DMA_ONEINCR_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_ONEINCR_COUNT 28:16
|
||||
#define NVC06F_DMA_ONEINCR_OPCODE 31:29
|
||||
#define NVC06F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
|
||||
#define NVC06F_DMA_ONEINCR_DATA 31:0
|
||||
/* dma no-operation format */
|
||||
#define NVC06F_DMA_NOP (0x00000000)
|
||||
/* dma immediate-data format */
|
||||
#define NVC06F_DMA_IMMD_ADDRESS 11:0
|
||||
#define NVC06F_DMA_IMMD_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_IMMD_DATA 28:16
|
||||
#define NVC06F_DMA_IMMD_OPCODE 31:29
|
||||
#define NVC06F_DMA_IMMD_OPCODE_VALUE (0x00000004)
|
||||
|
||||
#define NVC06F_SEMAPHORED_RELEASE_WFI 20:20
|
||||
#define NVC06F_SEMAPHORED_RELEASE_WFI_EN 0x00000000
|
||||
#define NVC06F_SEMAPHORED_RELEASE_WFI_DIS 0x00000001
|
||||
#define NVC06F_SEMAPHORED_RELEASE_SIZE 24:24
|
||||
#define NVC06F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
|
||||
#define NVC06F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
|
||||
#define NVC06F_SEMAPHORED_REDUCTION 30:27
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_MIN 0x00000000
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_MAX 0x00000001
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_XOR 0x00000002
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_AND 0x00000003
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_OR 0x00000004
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_ADD 0x00000005
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_INC 0x00000006
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_DEC 0x00000007
|
||||
#define NVC06F_SEMAPHORED_FORMAT 31:31
|
||||
#define NVC06F_SEMAPHORED_FORMAT_SIGNED 0x00000000
|
||||
#define NVC06F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
|
||||
#define NVC06F_NON_STALL_INTERRUPT (0x00000020)
|
||||
#define NVC06F_NON_STALL_INTERRUPT_HANDLE 31:0
|
||||
#define NVC06F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
|
||||
#define NVC06F_FB_FLUSH_HANDLE 31:0
|
||||
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
|
||||
// specifying the page address for a targeted TLB invalidate and the uTLB for
|
||||
// a targeted REPLAY_CANCEL for UVM.
|
||||
@@ -153,19 +169,142 @@ extern "C" {
|
||||
#define NVC06F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
|
||||
#define NVC06F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
|
||||
// CLEAN_LINES is an alias for Tegra/GPU IP usage
|
||||
#define NVC06F_MEM_OP_D_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
|
||||
// This B alias is confusing but it was missed as part of the update. Left here
|
||||
// for compatibility.
|
||||
#define NVC06F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
|
||||
#define NVC06F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
|
||||
#define NVC06F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
|
||||
#define NVC06F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015
|
||||
#define NVC06F_SET_REFERENCE (0x00000050)
|
||||
#define NVC06F_SET_REFERENCE_COUNT 31:0
|
||||
|
||||
// Syncpoint methods are only available on Tegra parts. Attempting to use
|
||||
// them on discrete GPUs will result in Host raising NV_PPBDMA_INTR_0_METHOD.
|
||||
#define NVC06F_SYNCPOINTA (0x00000070)
|
||||
#define NVC06F_SYNCPOINTA_PAYLOAD 31:0
|
||||
#define NVC06F_SYNCPOINTB (0x00000074)
|
||||
#define NVC06F_SYNCPOINTB_OPERATION 0:0
|
||||
#define NVC06F_SYNCPOINTB_OPERATION_WAIT 0x00000000
|
||||
#define NVC06F_SYNCPOINTB_OPERATION_INCR 0x00000001
|
||||
#define NVC06F_SYNCPOINTB_WAIT_SWITCH 4:4
|
||||
#define NVC06F_SYNCPOINTB_WAIT_SWITCH_DIS 0x00000000
|
||||
#define NVC06F_SYNCPOINTB_WAIT_SWITCH_EN 0x00000001
|
||||
#define NVC06F_SYNCPOINTB_SYNCPT_INDEX 19:8
|
||||
#define NVC06F_WFI (0x00000078)
|
||||
#define NVC06F_WFI_SCOPE 0:0
|
||||
#define NVC06F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
|
||||
#define NVC06F_WFI_SCOPE_ALL 0x00000001
|
||||
#define NVC06F_CRC_CHECK (0x0000007c)
|
||||
#define NVC06F_CRC_CHECK_VALUE 31:0
|
||||
#define NVC06F_YIELD (0x00000080)
|
||||
#define NVC06F_YIELD_OP 1:0
|
||||
#define NVC06F_YIELD_OP_NOP 0x00000000
|
||||
#define NVC06F_YIELD_OP_PBDMA_TIMESLICE 0x00000001
|
||||
#define NVC06F_YIELD_OP_RUNLIST_TIMESLICE 0x00000002
|
||||
#define NVC06F_YIELD_OP_TSG 0x00000003
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVC06F_GP_ENTRY__SIZE 8
|
||||
#define NVC06F_GP_ENTRY0_FETCH 0:0
|
||||
#define NVC06F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
|
||||
#define NVC06F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
|
||||
#define NVC06F_GP_ENTRY0_GET 31:2
|
||||
#define NVC06F_GP_ENTRY0_OPERAND 31:0
|
||||
#define NVC06F_GP_ENTRY1_GET_HI 7:0
|
||||
#define NVC06F_GP_ENTRY1_PRIV 8:8
|
||||
#define NVC06F_GP_ENTRY1_PRIV_USER 0x00000000
|
||||
#define NVC06F_GP_ENTRY1_PRIV_KERNEL 0x00000001
|
||||
#define NVC06F_GP_ENTRY1_LEVEL 9:9
|
||||
#define NVC06F_GP_ENTRY1_LEVEL_MAIN 0x00000000
|
||||
#define NVC06F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
|
||||
#define NVC06F_GP_ENTRY1_LENGTH 30:10
|
||||
#define NVC06F_GP_ENTRY1_SYNC 31:31
|
||||
#define NVC06F_GP_ENTRY1_SYNC_PROCEED 0x00000000
|
||||
#define NVC06F_GP_ENTRY1_SYNC_WAIT 0x00000001
|
||||
#define NVC06F_GP_ENTRY1_OPCODE 7:0
|
||||
#define NVC06F_GP_ENTRY1_OPCODE_NOP 0x00000000
|
||||
#define NVC06F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
|
||||
#define NVC06F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
|
||||
#define NVC06F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
|
||||
/* dma method formats */
|
||||
#define NVC06F_DMA_METHOD_ADDRESS_OLD 12:2
|
||||
#define NVC06F_DMA_METHOD_ADDRESS 11:0
|
||||
#define NVC06F_DMA_SUBDEVICE_MASK 15:4
|
||||
#define NVC06F_DMA_METHOD_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_TERT_OP 17:16
|
||||
#define NVC06F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
|
||||
#define NVC06F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
|
||||
#define NVC06F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
|
||||
#define NVC06F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
|
||||
#define NVC06F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
|
||||
#define NVC06F_DMA_METHOD_COUNT_OLD 28:18
|
||||
#define NVC06F_DMA_METHOD_COUNT 28:16
|
||||
#define NVC06F_DMA_IMMD_DATA 28:16
|
||||
#define NVC06F_DMA_SEC_OP 31:29
|
||||
#define NVC06F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
|
||||
#define NVC06F_DMA_SEC_OP_INC_METHOD (0x00000001)
|
||||
#define NVC06F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
|
||||
#define NVC06F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
|
||||
#define NVC06F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
|
||||
#define NVC06F_DMA_SEC_OP_ONE_INC (0x00000005)
|
||||
#define NVC06F_DMA_SEC_OP_RESERVED6 (0x00000006)
|
||||
#define NVC06F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
|
||||
/* dma incrementing method format */
|
||||
#define NVC06F_DMA_INCR_ADDRESS 11:0
|
||||
#define NVC06F_DMA_INCR_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_INCR_COUNT 28:16
|
||||
#define NVC06F_DMA_INCR_OPCODE 31:29
|
||||
#define NVC06F_DMA_INCR_OPCODE_VALUE (0x00000001)
|
||||
#define NVC06F_DMA_INCR_DATA 31:0
|
||||
/* dma non-incrementing method format */
|
||||
#define NVC06F_DMA_NONINCR_ADDRESS 11:0
|
||||
#define NVC06F_DMA_NONINCR_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_NONINCR_COUNT 28:16
|
||||
#define NVC06F_DMA_NONINCR_OPCODE 31:29
|
||||
#define NVC06F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
|
||||
#define NVC06F_DMA_NONINCR_DATA 31:0
|
||||
/* dma increment-once method format */
|
||||
#define NVC06F_DMA_ONEINCR_ADDRESS 11:0
|
||||
#define NVC06F_DMA_ONEINCR_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_ONEINCR_COUNT 28:16
|
||||
#define NVC06F_DMA_ONEINCR_OPCODE 31:29
|
||||
#define NVC06F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
|
||||
#define NVC06F_DMA_ONEINCR_DATA 31:0
|
||||
/* dma no-operation format */
|
||||
#define NVC06F_DMA_NOP (0x00000000)
|
||||
/* dma immediate-data format */
|
||||
#define NVC06F_DMA_IMMD_ADDRESS 11:0
|
||||
#define NVC06F_DMA_IMMD_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_IMMD_DATA 28:16
|
||||
#define NVC06F_DMA_IMMD_OPCODE 31:29
|
||||
#define NVC06F_DMA_IMMD_OPCODE_VALUE (0x00000004)
|
||||
/* dma set sub-device mask format */
|
||||
#define NVC06F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC06F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC06F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
|
||||
/* dma store sub-device mask format */
|
||||
#define NVC06F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC06F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC06F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
|
||||
/* dma use sub-device mask format */
|
||||
#define NVC06F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC06F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
|
||||
/* dma end-segment format */
|
||||
#define NVC06F_DMA_ENDSEG_OPCODE 31:29
|
||||
#define NVC06F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
|
||||
/* dma legacy incrementing/non-incrementing formats */
|
||||
#define NVC06F_DMA_ADDRESS 12:2
|
||||
#define NVC06F_DMA_SUBCH 15:13
|
||||
#define NVC06F_DMA_OPCODE3 17:16
|
||||
#define NVC06F_DMA_OPCODE3_NONE (0x00000000)
|
||||
#define NVC06F_DMA_COUNT 28:18
|
||||
#define NVC06F_DMA_OPCODE 31:29
|
||||
#define NVC06F_DMA_OPCODE_METHOD (0x00000000)
|
||||
#define NVC06F_DMA_OPCODE_NONINC_METHOD (0x00000002)
|
||||
#define NVC06F_DMA_DATA 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2014 NVIDIA Corporation
|
||||
Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
@@ -32,6 +32,10 @@ extern "C" {
|
||||
|
||||
#define PASCAL_DMA_COPY_A (0x0000C0B5)
|
||||
|
||||
#define NVC0B5_NOP (0x00000100)
|
||||
#define NVC0B5_NOP_PARAMETER 31:0
|
||||
#define NVC0B5_PM_TRIGGER (0x00000140)
|
||||
#define NVC0B5_PM_TRIGGER_V 31:0
|
||||
#define NVC0B5_SET_SEMAPHORE_A (0x00000240)
|
||||
#define NVC0B5_SET_SEMAPHORE_A_UPPER 16:0
|
||||
#define NVC0B5_SET_SEMAPHORE_B (0x00000244)
|
||||
@@ -115,6 +119,10 @@ extern "C" {
|
||||
#define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2 20:20
|
||||
#define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING (0x00000000)
|
||||
#define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE (0x00000001)
|
||||
#define NVC0B5_LAUNCH_DMA_DST_BYPASS_L2 21:21
|
||||
#define NVC0B5_LAUNCH_DMA_DST_BYPASS_L2_USE_PTE_SETTING (0x00000000)
|
||||
#define NVC0B5_LAUNCH_DMA_DST_BYPASS_L2_FORCE_VOLATILE (0x00000001)
|
||||
#define NVC0B5_LAUNCH_DMA_RESERVED 31:28
|
||||
#define NVC0B5_OFFSET_IN_UPPER (0x00000400)
|
||||
#define NVC0B5_OFFSET_IN_UPPER_UPPER 16:0
|
||||
#define NVC0B5_OFFSET_IN_LOWER (0x00000404)
|
||||
@@ -183,6 +191,68 @@ extern "C" {
|
||||
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE (0x0000070C)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC0B5_SET_DST_WIDTH (0x00000710)
|
||||
#define NVC0B5_SET_DST_WIDTH_V 31:0
|
||||
#define NVC0B5_SET_DST_HEIGHT (0x00000714)
|
||||
#define NVC0B5_SET_DST_HEIGHT_V 31:0
|
||||
#define NVC0B5_SET_DST_DEPTH (0x00000718)
|
||||
#define NVC0B5_SET_DST_DEPTH_V 31:0
|
||||
#define NVC0B5_SET_DST_LAYER (0x0000071C)
|
||||
#define NVC0B5_SET_DST_LAYER_V 31:0
|
||||
#define NVC0B5_SET_DST_ORIGIN (0x00000720)
|
||||
#define NVC0B5_SET_DST_ORIGIN_X 15:0
|
||||
#define NVC0B5_SET_DST_ORIGIN_Y 31:16
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE (0x00000728)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC0B5_SET_SRC_WIDTH (0x0000072C)
|
||||
#define NVC0B5_SET_SRC_WIDTH_V 31:0
|
||||
#define NVC0B5_SET_SRC_HEIGHT (0x00000730)
|
||||
#define NVC0B5_SET_SRC_HEIGHT_V 31:0
|
||||
#define NVC0B5_SET_SRC_DEPTH (0x00000734)
|
||||
#define NVC0B5_SET_SRC_DEPTH_V 31:0
|
||||
#define NVC0B5_SET_SRC_LAYER (0x00000738)
|
||||
#define NVC0B5_SET_SRC_LAYER_V 31:0
|
||||
#define NVC0B5_SET_SRC_ORIGIN (0x0000073C)
|
||||
#define NVC0B5_SET_SRC_ORIGIN_X 15:0
|
||||
#define NVC0B5_SET_SRC_ORIGIN_Y 31:16
|
||||
#define NVC0B5_PM_TRIGGER_END (0x00001114)
|
||||
#define NVC0B5_PM_TRIGGER_END_V 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2014 NVIDIA Corporation
|
||||
Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
@@ -32,6 +32,10 @@ extern "C" {
|
||||
|
||||
#define PASCAL_DMA_COPY_B (0x0000C1B5)
|
||||
|
||||
#define NVC1B5_NOP (0x00000100)
|
||||
#define NVC1B5_NOP_PARAMETER 31:0
|
||||
#define NVC1B5_PM_TRIGGER (0x00000140)
|
||||
#define NVC1B5_PM_TRIGGER_V 31:0
|
||||
#define NVC1B5_SET_SEMAPHORE_A (0x00000240)
|
||||
#define NVC1B5_SET_SEMAPHORE_A_UPPER 16:0
|
||||
#define NVC1B5_SET_SEMAPHORE_B (0x00000244)
|
||||
@@ -115,6 +119,14 @@ extern "C" {
|
||||
#define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2 20:20
|
||||
#define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING (0x00000000)
|
||||
#define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE (0x00000001)
|
||||
#define NVC1B5_LAUNCH_DMA_DST_BYPASS_L2 21:21
|
||||
#define NVC1B5_LAUNCH_DMA_DST_BYPASS_L2_USE_PTE_SETTING (0x00000000)
|
||||
#define NVC1B5_LAUNCH_DMA_DST_BYPASS_L2_FORCE_VOLATILE (0x00000001)
|
||||
#define NVC1B5_LAUNCH_DMA_VPRMODE 23:22
|
||||
#define NVC1B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC1B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC1B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC1B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
|
||||
#define NVC1B5_OFFSET_IN_UPPER (0x00000400)
|
||||
#define NVC1B5_OFFSET_IN_UPPER_UPPER 16:0
|
||||
#define NVC1B5_OFFSET_IN_LOWER (0x00000404)
|
||||
@@ -183,6 +195,76 @@ extern "C" {
|
||||
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE (0x0000070C)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC1B5_SET_DST_WIDTH (0x00000710)
|
||||
#define NVC1B5_SET_DST_WIDTH_V 31:0
|
||||
#define NVC1B5_SET_DST_HEIGHT (0x00000714)
|
||||
#define NVC1B5_SET_DST_HEIGHT_V 31:0
|
||||
#define NVC1B5_SET_DST_DEPTH (0x00000718)
|
||||
#define NVC1B5_SET_DST_DEPTH_V 31:0
|
||||
#define NVC1B5_SET_DST_LAYER (0x0000071C)
|
||||
#define NVC1B5_SET_DST_LAYER_V 31:0
|
||||
#define NVC1B5_SET_DST_ORIGIN (0x00000720)
|
||||
#define NVC1B5_SET_DST_ORIGIN_X 15:0
|
||||
#define NVC1B5_SET_DST_ORIGIN_Y 31:16
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE (0x00000728)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC1B5_SET_SRC_WIDTH (0x0000072C)
|
||||
#define NVC1B5_SET_SRC_WIDTH_V 31:0
|
||||
#define NVC1B5_SET_SRC_HEIGHT (0x00000730)
|
||||
#define NVC1B5_SET_SRC_HEIGHT_V 31:0
|
||||
#define NVC1B5_SET_SRC_DEPTH (0x00000734)
|
||||
#define NVC1B5_SET_SRC_DEPTH_V 31:0
|
||||
#define NVC1B5_SET_SRC_LAYER (0x00000738)
|
||||
#define NVC1B5_SET_SRC_LAYER_V 31:0
|
||||
#define NVC1B5_SET_SRC_ORIGIN (0x0000073C)
|
||||
#define NVC1B5_SET_SRC_ORIGIN_X 15:0
|
||||
#define NVC1B5_SET_SRC_ORIGIN_Y 31:16
|
||||
#define NVC1B5_SRC_ORIGIN_X (0x00000744)
|
||||
#define NVC1B5_SRC_ORIGIN_X_VALUE 31:0
|
||||
#define NVC1B5_SRC_ORIGIN_Y (0x00000748)
|
||||
#define NVC1B5_SRC_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC1B5_DST_ORIGIN_X (0x0000074C)
|
||||
#define NVC1B5_DST_ORIGIN_X_VALUE 31:0
|
||||
#define NVC1B5_DST_ORIGIN_Y (0x00000750)
|
||||
#define NVC1B5_DST_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC1B5_PM_TRIGGER_END (0x00001114)
|
||||
#define NVC1B5_PM_TRIGGER_END_V 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016 NVIDIA Corporation
|
||||
Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
@@ -32,6 +32,10 @@ extern "C" {
|
||||
|
||||
#define VOLTA_DMA_COPY_A (0x0000C3B5)
|
||||
|
||||
#define NVC3B5_NOP (0x00000100)
|
||||
#define NVC3B5_NOP_PARAMETER 31:0
|
||||
#define NVC3B5_PM_TRIGGER (0x00000140)
|
||||
#define NVC3B5_PM_TRIGGER_V 31:0
|
||||
#define NVC3B5_SET_SEMAPHORE_A (0x00000240)
|
||||
#define NVC3B5_SET_SEMAPHORE_A_UPPER 16:0
|
||||
#define NVC3B5_SET_SEMAPHORE_B (0x00000244)
|
||||
@@ -69,6 +73,9 @@ extern "C" {
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE 25:25
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
|
||||
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
|
||||
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
|
||||
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
|
||||
@@ -123,8 +130,6 @@ extern "C" {
|
||||
#define NVC3B5_LAUNCH_DMA_VPRMODE 23:22
|
||||
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
|
||||
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
|
||||
#define NVC3B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC3B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
|
||||
#define NVC3B5_OFFSET_IN_UPPER (0x00000400)
|
||||
@@ -195,6 +200,76 @@ extern "C" {
|
||||
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE (0x0000070C)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC3B5_SET_DST_WIDTH (0x00000710)
|
||||
#define NVC3B5_SET_DST_WIDTH_V 31:0
|
||||
#define NVC3B5_SET_DST_HEIGHT (0x00000714)
|
||||
#define NVC3B5_SET_DST_HEIGHT_V 31:0
|
||||
#define NVC3B5_SET_DST_DEPTH (0x00000718)
|
||||
#define NVC3B5_SET_DST_DEPTH_V 31:0
|
||||
#define NVC3B5_SET_DST_LAYER (0x0000071C)
|
||||
#define NVC3B5_SET_DST_LAYER_V 31:0
|
||||
#define NVC3B5_SET_DST_ORIGIN (0x00000720)
|
||||
#define NVC3B5_SET_DST_ORIGIN_X 15:0
|
||||
#define NVC3B5_SET_DST_ORIGIN_Y 31:16
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE (0x00000728)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC3B5_SET_SRC_WIDTH (0x0000072C)
|
||||
#define NVC3B5_SET_SRC_WIDTH_V 31:0
|
||||
#define NVC3B5_SET_SRC_HEIGHT (0x00000730)
|
||||
#define NVC3B5_SET_SRC_HEIGHT_V 31:0
|
||||
#define NVC3B5_SET_SRC_DEPTH (0x00000734)
|
||||
#define NVC3B5_SET_SRC_DEPTH_V 31:0
|
||||
#define NVC3B5_SET_SRC_LAYER (0x00000738)
|
||||
#define NVC3B5_SET_SRC_LAYER_V 31:0
|
||||
#define NVC3B5_SET_SRC_ORIGIN (0x0000073C)
|
||||
#define NVC3B5_SET_SRC_ORIGIN_X 15:0
|
||||
#define NVC3B5_SET_SRC_ORIGIN_Y 31:16
|
||||
#define NVC3B5_SRC_ORIGIN_X (0x00000744)
|
||||
#define NVC3B5_SRC_ORIGIN_X_VALUE 31:0
|
||||
#define NVC3B5_SRC_ORIGIN_Y (0x00000748)
|
||||
#define NVC3B5_SRC_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC3B5_DST_ORIGIN_X (0x0000074C)
|
||||
#define NVC3B5_DST_ORIGIN_X_VALUE 31:0
|
||||
#define NVC3B5_DST_ORIGIN_Y (0x00000750)
|
||||
#define NVC3B5_DST_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC3B5_PM_TRIGGER_END (0x00001114)
|
||||
#define NVC3B5_PM_TRIGGER_END_V 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
|
||||
97
kernel-open/nvidia-uvm/clcba2.h
Normal file
97
kernel-open/nvidia-uvm/clcba2.h
Normal file
@@ -0,0 +1,97 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#ifndef _clcba2_h_
|
||||
#define _clcba2_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define HOPPER_SEC2_WORK_LAUNCH_A (0x0000CBA2)
|
||||
|
||||
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI (0x00000400)
|
||||
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI_DATA 24:0
|
||||
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_LO (0x00000404)
|
||||
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_LO_DATA 31:4
|
||||
#define NVCBA2_DECRYPT_COPY_DST_ADDR_HI (0x00000408)
|
||||
#define NVCBA2_DECRYPT_COPY_DST_ADDR_HI_DATA 24:0
|
||||
#define NVCBA2_DECRYPT_COPY_DST_ADDR_LO (0x0000040c)
|
||||
#define NVCBA2_DECRYPT_COPY_DST_ADDR_LO_DATA 31:4
|
||||
#define NVCBA2_DECRYPT_COPY_SIZE (0x00000410)
|
||||
#define NVCBA2_DECRYPT_COPY_SIZE_DATA 31:2
|
||||
#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_HI (0x00000414)
|
||||
#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_HI_DATA 24:0
|
||||
#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_LO (0x00000418)
|
||||
#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_LO_DATA 31:4
|
||||
#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_HI (0x0000041C)
|
||||
#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_HI_DATA 24:0
|
||||
#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_LO (0x00000420)
|
||||
#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_LO_DATA 31:4
|
||||
#define NVCBA2_SEMAPHORE_A (0x00000440)
|
||||
#define NVCBA2_SEMAPHORE_A_UPPER 24:0
|
||||
#define NVCBA2_SEMAPHORE_B (0x00000444)
|
||||
#define NVCBA2_SEMAPHORE_B_LOWER 31:2
|
||||
#define NVCBA2_SET_SEMAPHORE_PAYLOAD_LOWER (0x00000448)
|
||||
#define NVCBA2_SET_SEMAPHORE_PAYLOAD_LOWER_DATA 31:0
|
||||
#define NVCBA2_SET_SEMAPHORE_PAYLOAD_UPPER (0x0000044C)
|
||||
#define NVCBA2_SET_SEMAPHORE_PAYLOAD_UPPER_DATA 31:0
|
||||
#define NVCBA2_SEMAPHORE_D (0x00000450)
|
||||
#define NVCBA2_SEMAPHORE_D_NOTIFY_INTR 0:0
|
||||
#define NVCBA2_SEMAPHORE_D_NOTIFY_INTR_DISABLE (0x00000000)
|
||||
#define NVCBA2_SEMAPHORE_D_NOTIFY_INTR_ENABLE (0x00000001)
|
||||
#define NVCBA2_SEMAPHORE_D_PAYLOAD_SIZE 1:1
|
||||
#define NVCBA2_SEMAPHORE_D_PAYLOAD_SIZE_32_BIT (0x00000000)
|
||||
#define NVCBA2_SEMAPHORE_D_PAYLOAD_SIZE_64_BIT (0x00000001)
|
||||
#define NVCBA2_SEMAPHORE_D_TIMESTAMP 2:2
|
||||
#define NVCBA2_SEMAPHORE_D_TIMESTAMP_DISABLE (0x00000000)
|
||||
#define NVCBA2_SEMAPHORE_D_TIMESTAMP_ENABLE (0x00000001)
|
||||
#define NVCBA2_SEMAPHORE_D_FLUSH_DISABLE 3:3
|
||||
#define NVCBA2_SEMAPHORE_D_FLUSH_DISABLE_FALSE (0x00000000)
|
||||
#define NVCBA2_SEMAPHORE_D_FLUSH_DISABLE_TRUE (0x00000001)
|
||||
#define NVCBA2_EXECUTE (0x00000470)
|
||||
#define NVCBA2_EXECUTE_NOTIFY 0:0
|
||||
#define NVCBA2_EXECUTE_NOTIFY_DISABLE (0x00000000)
|
||||
#define NVCBA2_EXECUTE_NOTIFY_ENABLE (0x00000001)
|
||||
#define NVCBA2_EXECUTE_NOTIFY_ON 1:1
|
||||
#define NVCBA2_EXECUTE_NOTIFY_ON_END (0x00000000)
|
||||
#define NVCBA2_EXECUTE_NOTIFY_ON_BEGIN (0x00000001)
|
||||
#define NVCBA2_EXECUTE_FLUSH_DISABLE 2:2
|
||||
#define NVCBA2_EXECUTE_FLUSH_DISABLE_FALSE (0x00000000)
|
||||
#define NVCBA2_EXECUTE_FLUSH_DISABLE_TRUE (0x00000001)
|
||||
#define NVCBA2_EXECUTE_NOTIFY_INTR 3:3
|
||||
#define NVCBA2_EXECUTE_NOTIFY_INTR_DISABLE (0x00000000)
|
||||
#define NVCBA2_EXECUTE_NOTIFY_INTR_ENABLE (0x00000001)
|
||||
#define NVCBA2_EXECUTE_PAYLOAD_SIZE 4:4
|
||||
#define NVCBA2_EXECUTE_PAYLOAD_SIZE_32_BIT (0x00000000)
|
||||
#define NVCBA2_EXECUTE_PAYLOAD_SIZE_64_BIT (0x00000001)
|
||||
#define NVCBA2_EXECUTE_TIMESTAMP 5:5
|
||||
#define NVCBA2_EXECUTE_TIMESTAMP_DISABLE (0x00000000)
|
||||
#define NVCBA2_EXECUTE_TIMESTAMP_ENABLE (0x00000001)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
#endif // _clcba2_h
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2021 NVIDIA Corporation
|
||||
Copyright (c) 2013-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
||||
@@ -481,16 +481,6 @@ static int _check_cpu_affinity_test(void)
|
||||
int result, node;
|
||||
nv_kthread_q_t local_q;
|
||||
|
||||
// If the API does not support CPU affinity, check whether the correct
|
||||
// error code is returned.
|
||||
// Non-affinitized queue allocation has been verified by previous test
|
||||
// so just ensure that the affinitized version also works.
|
||||
if (!NV_KTHREAD_Q_SUPPORTS_AFFINITY()) {
|
||||
result = nv_kthread_q_init_on_node(&local_q, "should_fail", 0);
|
||||
TEST_CHECK_RET(result == -ENOTSUPP);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for_each_online_node(node) {
|
||||
unsigned i;
|
||||
const unsigned max_i = 100;
|
||||
|
||||
@@ -169,7 +169,6 @@ void nv_kthread_q_stop(nv_kthread_q_t *q)
|
||||
//
|
||||
// This function is never invoked when there is no NUMA preference (preferred
|
||||
// node is NUMA_NO_NODE).
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
nv_kthread_q_t *q,
|
||||
int preferred_node,
|
||||
@@ -217,7 +216,6 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
return thread[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
|
||||
{
|
||||
@@ -231,11 +229,7 @@ int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferr
|
||||
q->q_kthread = kthread_create(_main_loop, q, q_name);
|
||||
}
|
||||
else {
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
|
||||
#else
|
||||
return -ENOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (IS_ERR(q->q_kthread)) {
|
||||
@@ -307,7 +301,7 @@ static void _q_flush_function(void *args)
|
||||
static void _raw_q_flush(nv_kthread_q_t *q)
|
||||
{
|
||||
nv_kthread_q_item_t q_item;
|
||||
DECLARE_COMPLETION(completion);
|
||||
DECLARE_COMPLETION_ONSTACK(completion);
|
||||
|
||||
nv_kthread_q_item_init(&q_item, _q_flush_function, &completion);
|
||||
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
NVIDIA_UVM_SOURCES ?=
|
||||
NVIDIA_UVM_SOURCES_CXX ?=
|
||||
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
|
||||
@@ -58,6 +57,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
|
||||
@@ -72,6 +72,12 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_policy.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_utils.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_kvmalloc.c
|
||||
@@ -94,7 +100,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_rng.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_allocator_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_semaphore_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hmm_sanity_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_mem_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rm_mem_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_page_tree_test.c
|
||||
|
||||
@@ -36,7 +36,7 @@ NVIDIA_UVM_KO = nvidia-uvm/nvidia-uvm.ko
|
||||
#
|
||||
|
||||
ifeq ($(UVM_BUILD_TYPE),debug)
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG $(call cc-option,-Og,-O0) -g
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG -O1 -g
|
||||
else
|
||||
ifeq ($(UVM_BUILD_TYPE),develop)
|
||||
# -DDEBUG is required, in order to allow pr_devel() print statements to
|
||||
@@ -67,17 +67,11 @@ endif
|
||||
|
||||
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += address_space_init_once
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vzalloc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += wait_on_bit_lock_argument_count
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += bitmap_clear
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += usleep_range
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
@@ -87,18 +81,21 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_set
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmgrab
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += kuid_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += address_space
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages_remote
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages_remote
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
@@ -107,6 +104,12 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += make_device_exclusive_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_device_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -28,6 +28,7 @@
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_test.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_va_space_mm.h"
|
||||
#include "uvm_va_range.h"
|
||||
#include "uvm_va_block.h"
|
||||
#include "uvm_tools.h"
|
||||
@@ -35,93 +36,200 @@
|
||||
#include "uvm_linux_ioctl.h"
|
||||
#include "uvm_hmm.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
|
||||
#define NVIDIA_UVM_DEVICE_NAME "nvidia-uvm"
|
||||
|
||||
static dev_t g_uvm_base_dev;
|
||||
static struct cdev g_uvm_cdev;
|
||||
static const struct file_operations uvm_fops;
|
||||
|
||||
// List of fault service contexts for CPU faults
|
||||
static LIST_HEAD(g_cpu_service_block_context_list);
|
||||
|
||||
static uvm_spinlock_t g_cpu_service_block_context_list_lock;
|
||||
|
||||
NV_STATUS uvm_service_block_context_init(void)
|
||||
bool uvm_file_is_nvidia_uvm(struct file *filp)
|
||||
{
|
||||
unsigned num_preallocated_contexts = 4;
|
||||
return (filp != NULL) && (filp->f_op == &uvm_fops);
|
||||
}
|
||||
|
||||
uvm_spin_lock_init(&g_cpu_service_block_context_list_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
|
||||
{
|
||||
unsigned long uptr;
|
||||
uvm_fd_type_t type;
|
||||
void *ptr;
|
||||
|
||||
// Pre-allocate some fault service contexts for the CPU and add them to the global list
|
||||
while (num_preallocated_contexts-- > 0) {
|
||||
uvm_service_block_context_t *service_context = uvm_kvmalloc(sizeof(*service_context));
|
||||
if (!service_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
|
||||
|
||||
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
|
||||
uptr = atomic_long_read_acquire((atomic_long_t *) (&filp->private_data));
|
||||
type = (uvm_fd_type_t)(uptr & UVM_FD_TYPE_MASK);
|
||||
ptr = (void *)(uptr & ~UVM_FD_TYPE_MASK);
|
||||
BUILD_BUG_ON(UVM_FD_COUNT > UVM_FD_TYPE_MASK + 1);
|
||||
|
||||
switch (type) {
|
||||
case UVM_FD_UNINITIALIZED:
|
||||
case UVM_FD_INITIALIZING:
|
||||
UVM_ASSERT(!ptr);
|
||||
break;
|
||||
|
||||
case UVM_FD_VA_SPACE:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
case UVM_FD_MM:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(struct file) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
default:
|
||||
UVM_ASSERT(0);
|
||||
}
|
||||
|
||||
if (ptr_val)
|
||||
*ptr_val = ptr;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
|
||||
|
||||
if (uvm_fd_type(filp, &ptr) == type)
|
||||
return ptr;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_va_space_mm_t *va_space_mm;
|
||||
struct file *uvm_file;
|
||||
uvm_fd_type_t old_fd_type;
|
||||
struct mm_struct *mm;
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_file = fget(params->uvmFd);
|
||||
if (!uvm_file_is_nvidia_uvm(uvm_file)) {
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (uvm_fd_type(uvm_file, (void **)&va_space) != UVM_FD_VA_SPACE) {
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
// Tell userspace the MM FD is not required and it may be released
|
||||
// with no loss of functionality.
|
||||
if (!uvm_va_space_mm_enabled(va_space)) {
|
||||
status = NV_WARN_NOTHING_TO_DO;
|
||||
goto err;
|
||||
}
|
||||
|
||||
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type != UVM_FD_UNINITIALIZED) {
|
||||
status = NV_ERR_IN_USE;
|
||||
goto err;
|
||||
}
|
||||
|
||||
va_space_mm = &va_space->va_space_mm;
|
||||
uvm_spin_lock(&va_space_mm->lock);
|
||||
switch (va_space->va_space_mm.state) {
|
||||
// We only allow the va_space_mm to be initialised once. If
|
||||
// userspace passed the UVM FD to another process it is up to
|
||||
// userspace to ensure it also passes the UVM MM FD that
|
||||
// initialised the va_space_mm or arranges some other way to keep
|
||||
// a reference on the FD.
|
||||
case UVM_VA_SPACE_MM_STATE_ALIVE:
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto err_release_unlock;
|
||||
break;
|
||||
|
||||
// Once userspace has released the va_space_mm the GPU is
|
||||
// effectively dead and no new work can be started. We don't
|
||||
// support re-initializing once userspace has closed the FD.
|
||||
case UVM_VA_SPACE_MM_STATE_RELEASED:
|
||||
status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
|
||||
goto err_release_unlock;
|
||||
break;
|
||||
|
||||
// Keep the warnings at bay
|
||||
case UVM_VA_SPACE_MM_STATE_UNINITIALIZED:
|
||||
mm = va_space->va_space_mm.mm;
|
||||
if (!mm || !mmget_not_zero(mm)) {
|
||||
status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
|
||||
goto err_release_unlock;
|
||||
}
|
||||
|
||||
va_space_mm->state = UVM_VA_SPACE_MM_STATE_ALIVE;
|
||||
break;
|
||||
|
||||
default:
|
||||
UVM_ASSERT(0);
|
||||
break;
|
||||
}
|
||||
uvm_spin_unlock(&va_space_mm->lock);
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)uvm_file | UVM_FD_MM);
|
||||
|
||||
return NV_OK;
|
||||
|
||||
err_release_unlock:
|
||||
uvm_spin_unlock(&va_space_mm->lock);
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
|
||||
|
||||
err:
|
||||
if (uvm_file)
|
||||
fput(uvm_file);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_service_block_context_exit(void)
|
||||
{
|
||||
uvm_service_block_context_t *service_context, *service_context_tmp;
|
||||
|
||||
// Free fault service contexts for the CPU and add clear the global list
|
||||
list_for_each_entry_safe(service_context, service_context_tmp, &g_cpu_service_block_context_list,
|
||||
cpu_fault.service_context_list) {
|
||||
uvm_kvfree(service_context);
|
||||
}
|
||||
INIT_LIST_HEAD(&g_cpu_service_block_context_list);
|
||||
}
|
||||
|
||||
// Get a fault service context from the global list or allocate a new one if there are no
|
||||
// available entries
|
||||
static uvm_service_block_context_t *uvm_service_block_context_cpu_alloc(void)
|
||||
{
|
||||
uvm_service_block_context_t *service_context;
|
||||
|
||||
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
service_context = list_first_entry_or_null(&g_cpu_service_block_context_list, uvm_service_block_context_t,
|
||||
cpu_fault.service_context_list);
|
||||
|
||||
if (service_context)
|
||||
list_del(&service_context->cpu_fault.service_context_list);
|
||||
|
||||
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
if (!service_context)
|
||||
service_context = uvm_kvmalloc(sizeof(*service_context));
|
||||
|
||||
return service_context;
|
||||
}
|
||||
|
||||
// Put a fault service context in the global list
|
||||
static void uvm_service_block_context_cpu_free(uvm_service_block_context_t *service_context)
|
||||
{
|
||||
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
|
||||
|
||||
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
|
||||
}
|
||||
|
||||
// Called when opening /dev/nvidia-uvm. This code doesn't take any UVM locks, so
|
||||
// there's no need to acquire g_uvm_global.pm.lock, but if that changes the PM
|
||||
// lock will need to be taken.
|
||||
static int uvm_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct address_space *mapping;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
|
||||
if (status == NV_OK) {
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
if (status != NV_OK)
|
||||
return -nv_status_to_errno(status);
|
||||
|
||||
status = uvm_va_space_create(inode, filp);
|
||||
mapping = uvm_kvmalloc(sizeof(*mapping));
|
||||
if (!mapping)
|
||||
return -ENOMEM;
|
||||
|
||||
uvm_up_read(&g_uvm_global.pm.lock);
|
||||
}
|
||||
// By default all struct files on the same inode share the same
|
||||
// address_space structure (the inode's) across all processes. This means
|
||||
// unmap_mapping_range would unmap virtual mappings across all processes on
|
||||
// that inode.
|
||||
//
|
||||
// Since the UVM driver uses the mapping offset as the VA of the file's
|
||||
// process, we need to isolate the mappings to each process.
|
||||
address_space_init_once(mapping);
|
||||
mapping->host = inode;
|
||||
|
||||
return -nv_status_to_errno(status);
|
||||
// Some paths in the kernel, for example force_page_cache_readahead which
|
||||
// can be invoked from user-space via madvise MADV_WILLNEED and fadvise
|
||||
// POSIX_FADV_WILLNEED, check the function pointers within
|
||||
// file->f_mapping->a_ops for validity. However, those paths assume that a_ops
|
||||
// itself is always valid. Handle that by using the inode's a_ops pointer,
|
||||
// which is what f_mapping->a_ops would point to anyway if we weren't re-
|
||||
// assigning f_mapping.
|
||||
mapping->a_ops = inode->i_mapping->a_ops;
|
||||
|
||||
#if defined(NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO)
|
||||
mapping->backing_dev_info = inode->i_mapping->backing_dev_info;
|
||||
#endif
|
||||
|
||||
filp->private_data = NULL;
|
||||
filp->f_mapping = mapping;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static int uvm_open_entry(struct inode *inode, struct file *filp)
|
||||
@@ -145,11 +253,44 @@ static void uvm_release_deferred(void *data)
|
||||
uvm_up_read(&g_uvm_global.pm.lock);
|
||||
}
|
||||
|
||||
static void uvm_mm_release(struct file *filp, struct file *uvm_file)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(uvm_file);
|
||||
uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
|
||||
struct mm_struct *mm = va_space_mm->mm;
|
||||
|
||||
if (uvm_va_space_mm_enabled(va_space)) {
|
||||
uvm_va_space_mm_unregister(va_space);
|
||||
|
||||
if (uvm_va_space_mm_enabled(va_space))
|
||||
uvm_mmput(mm);
|
||||
|
||||
va_space_mm->mm = NULL;
|
||||
fput(uvm_file);
|
||||
}
|
||||
}
|
||||
|
||||
static int uvm_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
void *ptr;
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_fd_type_t fd_type;
|
||||
int ret;
|
||||
|
||||
fd_type = uvm_fd_type(filp, &ptr);
|
||||
UVM_ASSERT(fd_type != UVM_FD_INITIALIZING);
|
||||
if (fd_type == UVM_FD_UNINITIALIZED) {
|
||||
uvm_kvfree(filp->f_mapping);
|
||||
return 0;
|
||||
}
|
||||
else if (fd_type == UVM_FD_MM) {
|
||||
uvm_kvfree(filp->f_mapping);
|
||||
uvm_mm_release(filp, (struct file *)ptr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
UVM_ASSERT(fd_type == UVM_FD_VA_SPACE);
|
||||
va_space = (uvm_va_space_t *)ptr;
|
||||
filp->private_data = NULL;
|
||||
filp->f_mapping = NULL;
|
||||
|
||||
@@ -167,7 +308,7 @@ static int uvm_release(struct inode *inode, struct file *filp)
|
||||
// been destroyed, and va_space->mapping won't be used again. Still,
|
||||
// the va_space survives the inode if its destruction is deferred, in
|
||||
// which case the references are rendered stale.
|
||||
address_space_init_once(&va_space->mapping);
|
||||
address_space_init_once(va_space->mapping);
|
||||
|
||||
nv_kthread_q_item_init(&va_space->deferred_release_q_item, uvm_release_deferred, va_space);
|
||||
ret = nv_kthread_q_schedule_q_item(&g_uvm_global.deferred_release_q, &va_space->deferred_release_q_item);
|
||||
@@ -430,14 +571,12 @@ static void uvm_vm_open_managed_entry(struct vm_area_struct *vma)
|
||||
static void uvm_vm_close_managed(struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_processor_id_t gpu_id;
|
||||
bool make_zombie = false;
|
||||
|
||||
if (current->mm != NULL)
|
||||
uvm_record_lock_mmap_lock_write(current->mm);
|
||||
|
||||
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
|
||||
|
||||
// current->mm will be NULL on process teardown, in which case we have
|
||||
// special handling.
|
||||
if (current->mm == NULL) {
|
||||
@@ -467,13 +606,11 @@ static void uvm_vm_close_managed(struct vm_area_struct *vma)
|
||||
|
||||
uvm_destroy_vma_managed(vma, make_zombie);
|
||||
|
||||
// Notify GPU address spaces that the fault buffer needs to be flushed to avoid finding stale entries
|
||||
// that can be attributed to new VA ranges reallocated at the same address
|
||||
for_each_va_space_gpu_in_mask(gpu, va_space, &va_space->registered_gpu_va_spaces) {
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
|
||||
UVM_ASSERT(gpu_va_space);
|
||||
|
||||
gpu_va_space->needs_fault_buffer_flush = true;
|
||||
// Notify GPU address spaces that the fault buffer needs to be flushed to
|
||||
// avoid finding stale entries that can be attributed to new VA ranges
|
||||
// reallocated at the same address.
|
||||
for_each_gpu_id_in_mask(gpu_id, &va_space->registered_gpu_va_spaces) {
|
||||
uvm_processor_mask_set_atomic(&va_space->needs_fault_buffer_flush, gpu_id);
|
||||
}
|
||||
uvm_va_space_up_write(va_space);
|
||||
|
||||
@@ -489,139 +626,10 @@ static void uvm_vm_close_managed_entry(struct vm_area_struct *vma)
|
||||
static vm_fault_t uvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
|
||||
uvm_va_block_t *va_block;
|
||||
NvU64 fault_addr = nv_page_fault_va(vmf);
|
||||
bool is_write = vmf->flags & FAULT_FLAG_WRITE;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
bool tools_enabled;
|
||||
bool major_fault = false;
|
||||
uvm_service_block_context_t *service_context;
|
||||
uvm_global_processor_mask_t gpus_to_check_for_ecc;
|
||||
|
||||
if (status != NV_OK)
|
||||
goto convert_error;
|
||||
|
||||
// TODO: Bug 2583279: Lock tracking is disabled for the power management
|
||||
// lock in order to suppress reporting of a lock policy violation.
|
||||
// The violation consists in acquiring the power management lock multiple
|
||||
// times, and it is manifested as an error during release. The
|
||||
// re-acquisition of the power management locks happens upon re-entry in the
|
||||
// UVM module, and it is benign on itself, but when combined with certain
|
||||
// power management scenarios, it is indicative of a potential deadlock.
|
||||
// Tracking will be re-enabled once the power management locking strategy is
|
||||
// modified to avoid deadlocks.
|
||||
if (!uvm_down_read_trylock_no_tracking(&g_uvm_global.pm.lock)) {
|
||||
status = NV_ERR_BUSY_RETRY;
|
||||
goto convert_error;
|
||||
}
|
||||
|
||||
service_context = uvm_service_block_context_cpu_alloc();
|
||||
if (!service_context) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
service_context->cpu_fault.wakeup_time_stamp = 0;
|
||||
|
||||
// The mmap_lock might be held in write mode, but the mode doesn't matter
|
||||
// for the purpose of lock ordering and we don't rely on it being in write
|
||||
// anywhere so just record it as read mode in all cases.
|
||||
uvm_record_lock_mmap_lock_read(vma->vm_mm);
|
||||
|
||||
do {
|
||||
bool do_sleep = false;
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
|
||||
NvU64 now = NV_GETTIME();
|
||||
if (now < service_context->cpu_fault.wakeup_time_stamp)
|
||||
do_sleep = true;
|
||||
|
||||
if (do_sleep)
|
||||
uvm_tools_record_throttling_start(va_space, fault_addr, UVM_ID_CPU);
|
||||
|
||||
// Drop the VA space lock while we sleep
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
// usleep_range is preferred because msleep has a 20ms granularity
|
||||
// and udelay uses a busy-wait loop. usleep_range uses high-resolution
|
||||
// timers and, by adding a range, the Linux scheduler may coalesce
|
||||
// our wakeup with others, thus saving some interrupts.
|
||||
if (do_sleep) {
|
||||
unsigned long nap_us = (service_context->cpu_fault.wakeup_time_stamp - now) / 1000;
|
||||
|
||||
usleep_range(nap_us, nap_us + nap_us / 2);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
if (do_sleep)
|
||||
uvm_tools_record_throttling_end(va_space, fault_addr, UVM_ID_CPU);
|
||||
|
||||
status = uvm_va_block_find_create_managed(va_space, fault_addr, &va_block);
|
||||
if (status != NV_OK) {
|
||||
UVM_ASSERT_MSG(status == NV_ERR_NO_MEMORY, "status: %s\n", nvstatusToString(status));
|
||||
break;
|
||||
}
|
||||
|
||||
// Watch out, current->mm might not be vma->vm_mm
|
||||
UVM_ASSERT(vma == uvm_va_range_vma(va_block->va_range));
|
||||
|
||||
// Loop until thrashing goes away.
|
||||
status = uvm_va_block_cpu_fault(va_block, fault_addr, is_write, service_context);
|
||||
} while (status == NV_WARN_MORE_PROCESSING_REQUIRED);
|
||||
|
||||
if (status != NV_OK) {
|
||||
UvmEventFatalReason reason;
|
||||
|
||||
reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||||
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
|
||||
|
||||
uvm_tools_record_cpu_fatal_fault(va_space, fault_addr, is_write, reason);
|
||||
}
|
||||
|
||||
tools_enabled = va_space->tools.enabled;
|
||||
|
||||
if (status == NV_OK) {
|
||||
uvm_va_space_global_gpus_in_mask(va_space,
|
||||
&gpus_to_check_for_ecc,
|
||||
&service_context->cpu_fault.gpus_to_check_for_ecc);
|
||||
uvm_global_mask_retain(&gpus_to_check_for_ecc);
|
||||
}
|
||||
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_record_unlock_mmap_lock_read(vma->vm_mm);
|
||||
|
||||
if (status == NV_OK) {
|
||||
status = uvm_global_mask_check_ecc_error(&gpus_to_check_for_ecc);
|
||||
uvm_global_mask_release(&gpus_to_check_for_ecc);
|
||||
}
|
||||
|
||||
if (tools_enabled)
|
||||
uvm_tools_flush_events();
|
||||
|
||||
// Major faults involve I/O in order to resolve the fault.
|
||||
// If any pages were DMA'ed between the GPU and host memory, that makes it a major fault.
|
||||
// A process can also get statistics for major and minor faults by calling readproc().
|
||||
major_fault = service_context->cpu_fault.did_migrate;
|
||||
uvm_service_block_context_cpu_free(service_context);
|
||||
|
||||
unlock:
|
||||
// TODO: Bug 2583279: See the comment above the matching lock acquisition
|
||||
uvm_up_read_no_tracking(&g_uvm_global.pm.lock);
|
||||
|
||||
convert_error:
|
||||
switch (status) {
|
||||
case NV_OK:
|
||||
case NV_ERR_BUSY_RETRY:
|
||||
return VM_FAULT_NOPAGE | (major_fault ? VM_FAULT_MAJOR : 0);
|
||||
case NV_ERR_NO_MEMORY:
|
||||
return VM_FAULT_OOM;
|
||||
default:
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
return uvm_va_space_cpu_fault_managed(va_space, vma, vmf);
|
||||
}
|
||||
|
||||
|
||||
static vm_fault_t uvm_vm_fault_entry(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
UVM_ENTRY_RET(uvm_vm_fault(vma, vmf));
|
||||
@@ -752,7 +760,7 @@ static struct vm_operations_struct uvm_vm_ops_semaphore_pool =
|
||||
|
||||
static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_va_range_t *va_range;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
int ret = 0;
|
||||
@@ -761,8 +769,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
if (status != NV_OK)
|
||||
return -nv_status_to_errno(status);
|
||||
|
||||
status = uvm_va_space_initialized(va_space);
|
||||
if (status != NV_OK)
|
||||
va_space = uvm_fd_va_space(filp);
|
||||
if (!va_space)
|
||||
return -EBADFD;
|
||||
|
||||
// When the VA space is associated with an mm, all vmas under the VA space
|
||||
@@ -814,7 +822,11 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
// Using VM_DONTCOPY would be nice, but madvise(MADV_DOFORK) can reset that
|
||||
// so we have to handle vm_open on fork anyway. We could disable MADV_DOFORK
|
||||
// with VM_IO, but that causes other mapping issues.
|
||||
vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
|
||||
// Make the default behavior be VM_DONTCOPY to avoid the performance impact
|
||||
// of removing CPU mappings in the parent on fork()+exec(). Users can call
|
||||
// madvise(MDV_DOFORK) if the child process requires access to the
|
||||
// allocation.
|
||||
nv_vm_flags_set(vma, VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTCOPY);
|
||||
|
||||
vma->vm_ops = &uvm_vm_ops_managed;
|
||||
|
||||
@@ -874,6 +886,13 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool uvm_vma_is_managed(struct vm_area_struct *vma)
|
||||
{
|
||||
return vma->vm_ops == &uvm_vm_ops_disabled ||
|
||||
vma->vm_ops == &uvm_vm_ops_managed ||
|
||||
vma->vm_ops == &uvm_vm_ops_semaphore_pool;
|
||||
}
|
||||
|
||||
static int uvm_mmap_entry(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
UVM_ENTRY_RET(uvm_mmap(filp, vma));
|
||||
@@ -881,7 +900,56 @@ static int uvm_mmap_entry(struct file *filp, struct vm_area_struct *vma)
|
||||
|
||||
static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *filp)
|
||||
{
|
||||
return uvm_va_space_initialize(uvm_va_space_get(filp), params->flags);
|
||||
uvm_va_space_t *va_space;
|
||||
NV_STATUS status;
|
||||
uvm_fd_type_t old_fd_type;
|
||||
|
||||
// Normally we expect private_data == UVM_FD_UNINITIALIZED. However multiple
|
||||
// threads may call this ioctl concurrently so we have to be careful to
|
||||
// avoid initializing multiple va_spaces and/or leaking memory. To do this
|
||||
// we do an atomic compare and swap. Only one thread will observe
|
||||
// UVM_FD_UNINITIALIZED and that thread will allocate and setup the
|
||||
// va_space.
|
||||
//
|
||||
// Other threads will either see UVM_FD_INITIALIZING or UVM_FD_VA_SPACE. In
|
||||
// the case of UVM_FD_VA_SPACE we return success if and only if the
|
||||
// initialization flags match. If another thread is still initializing the
|
||||
// va_space we return NV_ERR_BUSY_RETRY.
|
||||
//
|
||||
// If va_space initialization fails we return the failure code and reset the
|
||||
// FD state back to UVM_FD_UNINITIALIZED to allow another initialization
|
||||
// attempt to be made. This is safe because other threads will have only had
|
||||
// a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
|
||||
// case.
|
||||
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type == UVM_FD_UNINITIALIZED) {
|
||||
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
|
||||
if (status != NV_OK) {
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
|
||||
return status;
|
||||
}
|
||||
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)va_space | UVM_FD_VA_SPACE);
|
||||
}
|
||||
else if (old_fd_type == UVM_FD_VA_SPACE) {
|
||||
va_space = uvm_va_space_get(filp);
|
||||
|
||||
if (params->flags != va_space->initialization_flags)
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
else
|
||||
status = NV_OK;
|
||||
}
|
||||
else if (old_fd_type == UVM_FD_MM) {
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(old_fd_type == UVM_FD_INITIALIZING);
|
||||
status = NV_ERR_BUSY_RETRY;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_api_pageable_mem_access(UVM_PAGEABLE_MEM_ACCESS_PARAMS *params, struct file *filp)
|
||||
@@ -899,6 +967,7 @@ static long uvm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
return 0;
|
||||
|
||||
UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_INITIALIZE, uvm_api_initialize);
|
||||
UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_MM_INITIALIZE, uvm_api_mm_initialize);
|
||||
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_PAGEABLE_MEM_ACCESS, uvm_api_pageable_mem_access);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_PAGEABLE_MEM_ACCESS_ON_GPU, uvm_api_pageable_mem_access_on_gpu);
|
||||
@@ -978,16 +1047,9 @@ static const struct file_operations uvm_fops =
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
bool uvm_file_is_nvidia_uvm(struct file *filp)
|
||||
{
|
||||
return (filp != NULL) && (filp->f_op == &uvm_fops);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params, struct file *filp)
|
||||
{
|
||||
long ret;
|
||||
int write = 1;
|
||||
int force = 0;
|
||||
struct page *page;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
@@ -998,7 +1060,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
|
||||
// are not used because unload_state_buf may be a managed memory pointer and
|
||||
// therefore a locking assertion from the CPU fault handler could be fired.
|
||||
nv_mmap_read_lock(current->mm);
|
||||
ret = NV_GET_USER_PAGES(params->unload_state_buf, 1, write, force, &page, NULL);
|
||||
ret = NV_PIN_USER_PAGES(params->unload_state_buf, 1, FOLL_WRITE, &page, NULL);
|
||||
nv_mmap_read_unlock(current->mm);
|
||||
|
||||
if (ret < 0)
|
||||
@@ -1008,7 +1070,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
if (g_uvm_global.unload_state.ptr) {
|
||||
put_page(page);
|
||||
NV_UNPIN_USER_PAGE(page);
|
||||
status = NV_ERR_IN_USE;
|
||||
goto error;
|
||||
}
|
||||
@@ -1027,7 +1089,7 @@ static void uvm_test_unload_state_exit(void)
|
||||
{
|
||||
if (g_uvm_global.unload_state.ptr) {
|
||||
kunmap(g_uvm_global.unload_state.page);
|
||||
put_page(g_uvm_global.unload_state.page);
|
||||
NV_UNPIN_USER_PAGE(g_uvm_global.unload_state.page);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@
|
||||
#ifndef _UVM_H_
|
||||
#define _UVM_H_
|
||||
|
||||
#define UVM_API_LATEST_REVISION 7
|
||||
#define UVM_API_LATEST_REVISION 8
|
||||
|
||||
#if !defined(UVM_API_REVISION)
|
||||
#error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
|
||||
@@ -211,12 +211,12 @@ NV_STATUS UvmDeinitialize(void);
|
||||
// UvmReopen
|
||||
//
|
||||
// Reinitializes the UVM driver after checking for minimal user-mode state.
|
||||
// Before calling this function, all GPUs must be unregistered with
|
||||
// Before calling this function, all GPUs must be unregistered with
|
||||
// UvmUnregisterGpu() and all allocated VA ranges must be freed with UvmFree().
|
||||
// Note that it is not required to release VA ranges that were reserved with
|
||||
// UvmReserveVa().
|
||||
//
|
||||
// UvmReopen() closes the open file returned by UvmGetFileDescriptor() and
|
||||
// UvmReopen() closes the open file returned by UvmGetFileDescriptor() and
|
||||
// replaces it with a new open file with the same name.
|
||||
//
|
||||
// Arguments:
|
||||
@@ -410,6 +410,12 @@ NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
|
||||
// location will have their range group association changed to
|
||||
// UVM_RANGE_GROUP_ID_NONE.
|
||||
//
|
||||
// If the Confidential Computing feature is enabled in the system, any VA
|
||||
// ranges allocated using UvmAllocSemaphorePool and owned by this GPU will be
|
||||
// unmapped from all GPUs and the CPU. UvmFree must still be called on those
|
||||
// ranges to reclaim the VA. See UvmAllocSemaphorePool to determine which GPU
|
||||
// is considered the owner.
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to unregister.
|
||||
@@ -1094,10 +1100,12 @@ NV_STATUS UvmAllowMigrationRangeGroups(const NvU64 *rangeGroupIds,
|
||||
// Creates a new mapping in the virtual address space of the process, populates
|
||||
// it at the specified preferred location, maps it on the provided list of
|
||||
// processors if feasible and associates the range with the given range group.
|
||||
// If the preferredLocationUuid is the UUID of the CPU, preferred location is
|
||||
// set to all CPU nodes allowed by the global and thread memory policies.
|
||||
//
|
||||
// This API is equivalent to the following code sequence:
|
||||
// UvmMemMap(base, length);
|
||||
// UvmSetPreferredLocation(base, length, preferredLocationUuid);
|
||||
// UvmSetPreferredLocation(base, length, preferredLocationUuid, -1);
|
||||
// for (i = 0; i < accessedByCount; i++) {
|
||||
// UvmSetAccessedBy(base, length, &accessedByUuids[i]);
|
||||
// }
|
||||
@@ -1262,6 +1270,12 @@ NV_STATUS UvmCleanUpZombieResources(void);
|
||||
//
|
||||
// The VA range can be unmapped and freed via a call to UvmFree.
|
||||
//
|
||||
// If the Confidential Computing feature is enabled in the system, at least one
|
||||
// GPU must be provided in the perGpuAttribs array. The first GPU in the array
|
||||
// is considered the owning GPU. If the owning GPU is unregistered via
|
||||
// UvmUnregisterGpu, this allocation will no longer be usable.
|
||||
// See UvmUnregisterGpu.
|
||||
//
|
||||
// Arguments:
|
||||
// base: (INPUT)
|
||||
// Base address of the virtual address range.
|
||||
@@ -1298,6 +1312,8 @@ NV_STATUS UvmCleanUpZombieResources(void);
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// perGpuAttribs is NULL but gpuAttribsCount is non-zero or vice-versa,
|
||||
// or caching is requested on more than one GPU.
|
||||
// The Confidential Computing feature is enabled and the perGpuAttribs
|
||||
// list is empty.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The current process is not the one which called UvmInitialize, and
|
||||
@@ -1444,7 +1460,7 @@ NV_STATUS UvmMigrate(void *base,
|
||||
NV_STATUS UvmMigrate(void *base,
|
||||
NvLength length,
|
||||
const NvProcessorUuid *destinationUuid,
|
||||
NvU32 preferredCpuMemoryNode);
|
||||
NvS32 preferredCpuMemoryNode);
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -1537,7 +1553,7 @@ NV_STATUS UvmMigrateAsync(void *base,
|
||||
NV_STATUS UvmMigrateAsync(void *base,
|
||||
NvLength length,
|
||||
const NvProcessorUuid *destinationUuid,
|
||||
NvU32 preferredCpuMemoryNode,
|
||||
NvS32 preferredCpuMemoryNode,
|
||||
void *semaphoreAddress,
|
||||
NvU32 semaphorePayload);
|
||||
#endif
|
||||
@@ -1746,17 +1762,20 @@ NV_STATUS UvmCreateExternalRange(void *base,
|
||||
// GPUs. The external allocation can be unmapped from a specific GPU using
|
||||
// UvmUnmapExternal or from all GPUs using UvmFree.
|
||||
//
|
||||
// The virtual address range specified by (base, length) must be aligned to the
|
||||
// allocation's physical page size and must fall within a VA range previously
|
||||
// created with UvmCreateExternalRange. A GPU VA space must have been registered
|
||||
// for each GPU in the list. The offset in the physical allocation at which the
|
||||
// allocation must be mapped should also be aligned to the allocation's physical
|
||||
// page size. The (base, length) range must lie within the largest possible
|
||||
// virtual address supported by the specified GPUs.
|
||||
// The virtual address range specified by (base, length) must fall within a VA
|
||||
// range previously created with UvmCreateExternalRange. A GPU VA space must
|
||||
// have been registered for each GPU in the list. The (base, length) range must
|
||||
// lie within the largest possible virtual address supported by the specified
|
||||
// GPUs.
|
||||
//
|
||||
// The page size used for the mapping is the largest supported page size less
|
||||
// than or equal to the alignments of base, length, offset, and the allocation
|
||||
// page size.
|
||||
//
|
||||
// If the range specified by (base, length) falls within any existing mappings,
|
||||
// the behavior is the same as if UvmUnmapExternal with the range specified by
|
||||
// (base, length) had been called first.
|
||||
// (base, length) had been called first, provided that base and length are
|
||||
// aligned to the page size used for the existing one.
|
||||
//
|
||||
// If the allocation resides in GPU memory, that GPU must have been registered
|
||||
// via UvmRegisterGpu. If the allocation resides in GPU memory and a mapping is
|
||||
@@ -1838,8 +1857,9 @@ NV_STATUS UvmCreateExternalRange(void *base,
|
||||
// - The requested address range does not fall entirely within an
|
||||
// existing external VA range created with a single call to
|
||||
// UvmCreateExternalRange.
|
||||
// - At least one of base and length is not aligned to the allocation's
|
||||
// physical page size.
|
||||
// - The mapping page size allowed by the alignments of base, length,
|
||||
// and offset is smaller than the minimum supported page size on the
|
||||
// GPU.
|
||||
// - base or base + length fall within an existing mapping but are not
|
||||
// aligned to that mapping's page size.
|
||||
//
|
||||
@@ -1848,8 +1868,7 @@ NV_STATUS UvmCreateExternalRange(void *base,
|
||||
// address supported by one or more of the specified GPUs.
|
||||
//
|
||||
// NV_ERR_INVALID_OFFSET:
|
||||
// offset is not aligned to the allocation's physical page size or
|
||||
// offset+length exceeds the allocation size.
|
||||
// - offset+length exceeds the allocation size.
|
||||
//
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
// One of the following occurred:
|
||||
@@ -2214,11 +2233,10 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// supported by the specified processor.
|
||||
//
|
||||
// The virtual address range specified by (base, length) must have been
|
||||
// allocated via a call to either UvmAlloc or UvmMemMap, or be supported
|
||||
// system-allocated pageable memory. If the input range is pageable memory and
|
||||
// at least one GPU in the system supports transparent access to pageable
|
||||
// memory, the behavior described below does not take effect and the preferred
|
||||
// location of the pages in the given range does not change.
|
||||
// allocated via a call to either UvmAlloc or UvmMemMap (managed memory), or be
|
||||
// supported system-allocated pageable memory. If the input range corresponds to
|
||||
// a file backed shared mapping and least one GPU in the system supports
|
||||
// transparent access to pageable memory, the behavior below is not guaranteed.
|
||||
//
|
||||
// If any pages in the VA range are associated with a range group that was made
|
||||
// non-migratable via UvmPreventMigrationRangeGroups, then those pages are
|
||||
@@ -2237,17 +2255,17 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// not cause a migration if a mapping for that page from that processor can be
|
||||
// established without migrating the page.
|
||||
//
|
||||
// When a page migrates away from its preferred location, the mapping on the
|
||||
// preferred location's processor is cleared so that the next access from that
|
||||
// processor will cause a fault and migrate the page back to its preferred
|
||||
// location. In other words, a page is mapped on the preferred location's
|
||||
// processor only if the page is in its preferred location. Thus, when the
|
||||
// preferred location changes, mappings to pages in the given range are removed
|
||||
// from the new preferred location if the pages are resident in a different
|
||||
// processor. Note that if the preferred location's processor is a GPU, then a
|
||||
// mapping from that GPU to a page in the VA range is only created if a GPU VA
|
||||
// space has been registered for that GPU and the page is in its preferred
|
||||
// location.
|
||||
// When a page that was allocated via either UvmAlloc or UvmMemMap migrates away
|
||||
// from its preferred location, the mapping on the preferred location's
|
||||
// processor is cleared so that the next access from that processor will cause a
|
||||
// fault and migrate the page back to its preferred location. In other words, a
|
||||
// page is mapped on the preferred location's processor only if the page is in
|
||||
// its preferred location. Thus, when the preferred location changes, mappings
|
||||
// to pages in the given range are removed from the new preferred location if
|
||||
// the pages are resident in a different processor. Note that if the preferred
|
||||
// location's processor is a GPU, then a mapping from that GPU to a page in the
|
||||
// VA range is only created if a GPU VA space has been registered for that GPU
|
||||
// and the page is in its preferred location.
|
||||
//
|
||||
// If read duplication has been enabled for any pages in this VA range and
|
||||
// UvmPreventMigrationRangeGroups has not been called on the range group that
|
||||
@@ -2260,7 +2278,7 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
//
|
||||
// If the preferred location processor is present in the accessed-by list of any
|
||||
// of the pages in this VA range, then the migration and mapping policies
|
||||
// associated with associated with the accessed-by list.
|
||||
// associated with this API override those associated with the accessed-by list.
|
||||
//
|
||||
// The state set by this API can be cleared either by calling
|
||||
// UvmUnsetPreferredLocation for the same VA range or by calling
|
||||
@@ -2281,35 +2299,66 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// preferredLocationUuid: (INPUT)
|
||||
// UUID of the preferred location.
|
||||
//
|
||||
// preferredCpuNumaNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if preferredLocationUuid is the
|
||||
// UUID of the CPU. -1 is a special value which indicates all CPU nodes
|
||||
// allowed by the global and thread memory policies. This argument is
|
||||
// ignored if preferredLocationUuid refers to a GPU or the given virtual
|
||||
// address range corresponds to managed memory. If NUMA is not enabled,
|
||||
// only 0 or -1 is allowed.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
// base and length are not properly aligned, or the range does not
|
||||
// represent a valid UVM allocation, or the range is pageable memory and
|
||||
// the system does not support accessing pageable memory, or the range
|
||||
// does not represent a supported Operating System allocation.
|
||||
// One of the following occurred:
|
||||
// - base and length are not properly aligned.
|
||||
// - The range does not represent a valid UVM allocation.
|
||||
// - The range is pageable memory and the system does not support
|
||||
// accessing pageable memory.
|
||||
// - The range does not represent a supported Operating System
|
||||
// allocation.
|
||||
//
|
||||
// NV_ERR_OUT_OF_RANGE:
|
||||
// The VA range exceeds the largest virtual address supported by the
|
||||
// specified processor.
|
||||
//
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
// preferredLocationUuid is neither the UUID of the CPU nor the UUID of
|
||||
// a GPU that was registered by this process. Or at least one page in
|
||||
// VA range belongs to a non-migratable range group and the specified
|
||||
// UUID represents a fault-capable GPU. Or preferredLocationUuid is the
|
||||
// UUID of a non-fault-capable GPU and at least one page in the VA range
|
||||
// belongs to a non-migratable range group and another non-fault-capable
|
||||
// GPU is in the accessed-by list of the same page but P2P support
|
||||
// between both GPUs has not been enabled.
|
||||
// One of the following occurred:
|
||||
// - preferredLocationUuid is neither the UUID of the CPU nor the UUID
|
||||
// of a GPU that was registered by this process.
|
||||
// - At least one page in VA range belongs to a non-migratable range
|
||||
// group and the specified UUID represents a fault-capable GPU.
|
||||
// - preferredLocationUuid is the UUID of a non-fault-capable GPU and at
|
||||
// least one page in the VA range belongs to a non-migratable range
|
||||
// group and another non-fault-capable GPU is in the accessed-by list
|
||||
// of the same page but P2P support between both GPUs has not been
|
||||
// enabled.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// One of the following occured:
|
||||
// - preferredLocationUuid is the UUID of a CPU and preferredCpuNumaNode
|
||||
// refers to a registered GPU.
|
||||
// - preferredCpuNumaNode is invalid and preferredLocationUuid is the
|
||||
// UUID of the CPU.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The UVM file descriptor is associated with another process and the
|
||||
// input virtual range corresponds to system-allocated pageable memory.
|
||||
//
|
||||
// NV_ERR_GENERIC:
|
||||
// Unexpected error. We try hard to avoid returning this error code,
|
||||
// because it is not very informative.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(7)
|
||||
NV_STATUS UvmSetPreferredLocation(void *base,
|
||||
NvLength length,
|
||||
const NvProcessorUuid *preferredLocationUuid);
|
||||
#else
|
||||
NV_STATUS UvmSetPreferredLocation(void *base,
|
||||
NvLength length,
|
||||
const NvProcessorUuid *preferredLocationUuid,
|
||||
NvS32 preferredCpuNumaNode);
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmUnsetPreferredLocation
|
||||
@@ -2323,10 +2372,9 @@ NV_STATUS UvmSetPreferredLocation(void *base,
|
||||
//
|
||||
// The virtual address range specified by (base, length) must have been
|
||||
// allocated via a call to either UvmAlloc or UvmMemMap, or be supported
|
||||
// system-allocated pageable memory. If the input range is pageable memory and
|
||||
// at least one GPU in the system supports transparent access to pageable
|
||||
// memory, the behavior described below does not take effect and the preferred
|
||||
// location of the pages in the given range does not change.
|
||||
// system-allocated pageable memory. If the input range corresponds to a file
|
||||
// backed shared mapping and least one GPU in the system supports transparent
|
||||
// access to pageable memory, the behavior below is not guaranteed.
|
||||
//
|
||||
// If the VA range is associated with a non-migratable range group, then that
|
||||
// association is cleared. i.e. the pages in this VA range have their range
|
||||
@@ -2345,10 +2393,18 @@ NV_STATUS UvmSetPreferredLocation(void *base,
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
// base and length are not properly aligned or the range does not
|
||||
// represent a valid UVM allocation, or the range is pageable memory and
|
||||
// the system does not support accessing pageable memory, or the range
|
||||
// does not represent a supported Operating System allocation.
|
||||
// One of the following occured:
|
||||
// - base and length are not properly aligned or the range does not
|
||||
// represent a valid UVM allocation.
|
||||
// - The range is pageable memory and the system does not support
|
||||
// accessing pageable memory.
|
||||
// - The range does not represent a supported Operating System
|
||||
// allocation.
|
||||
// - The range contains both managed and pageable memory allocations.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The UVM file descriptor is associated with another process and the
|
||||
// input virtual range corresponds to system-allocated pageable memory.
|
||||
//
|
||||
// NV_ERR_GENERIC:
|
||||
// Unexpected error. We try hard to avoid returning this error code,
|
||||
@@ -2629,13 +2685,34 @@ NV_STATUS UvmDisableSystemWideAtomics(const NvProcessorUuid *gpuUuid);
|
||||
// NV_ERR_INVALID_STATE:
|
||||
// UVM was not initialized before calling this function.
|
||||
//
|
||||
// NV_ERR_GENERIC:
|
||||
// Unexpected error. We try hard to avoid returning this error code,
|
||||
// because it is not very informative.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmGetFileDescriptor(UvmFileDescriptor *returnedFd);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmGetMmFileDescriptor
|
||||
//
|
||||
// Returns the UVM file descriptor currently being used to keep the
|
||||
// memory management context valid. The data type of the returned file
|
||||
// descriptor is platform specific.
|
||||
//
|
||||
// If UvmInitialize has not yet been called, an error is returned.
|
||||
//
|
||||
// Arguments:
|
||||
// returnedFd: (OUTPUT)
|
||||
// A platform specific file descriptor.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// returnedFd is NULL.
|
||||
//
|
||||
// NV_ERR_INVALID_STATE:
|
||||
// UVM was not initialized before calling this function.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// This file descriptor is not required on this platform.
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmGetMmFileDescriptor(UvmFileDescriptor *returnedFd);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmIs8Supported
|
||||
//
|
||||
|
||||
@@ -49,11 +49,13 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
// A single top level PDE on Ada covers 128 TB and that's the minimum size
|
||||
// that can be used.
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = 384ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
parent_gpu->ce_phys_vidmem_write_supported = true;
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
|
||||
// Not all units on Ada support 49-bit addressing, including those which
|
||||
|
||||
@@ -47,14 +47,16 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
// A single top level PDE on Ampere covers 128 TB and that's the minimum
|
||||
// size that can be used.
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = 384ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
// See uvm_mmu.h for mapping placement
|
||||
parent_gpu->flat_vidmem_va_base = 136ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->flat_sysmem_va_base = 256ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->flat_vidmem_va_base = 136 * UVM_SIZE_1TB;
|
||||
parent_gpu->flat_sysmem_va_base = 256 * UVM_SIZE_1TB;
|
||||
|
||||
parent_gpu->ce_phys_vidmem_write_supported = true;
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
#include "clc7b5.h"
|
||||
#include "clc56f.h" // Needed because HAL ce_init pushes SET_OBJECT
|
||||
|
||||
bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return true;
|
||||
@@ -112,7 +112,7 @@ NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void)
|
||||
return HWCONST(C7B5, LAUNCH_DMA, DISABLE_PLC, TRUE);
|
||||
}
|
||||
|
||||
bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
NvU64 push_begin_gpu_va;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
@@ -121,6 +121,8 @@ bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t
|
||||
return true;
|
||||
|
||||
if (uvm_channel_is_proxy(push->channel)) {
|
||||
uvm_pushbuffer_t *pushbuffer;
|
||||
|
||||
if (dst.is_virtual) {
|
||||
UVM_ERR_PRINT("Destination address of memcopy must be physical, not virtual\n");
|
||||
return false;
|
||||
@@ -142,7 +144,8 @@ bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t
|
||||
return false;
|
||||
}
|
||||
|
||||
push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
|
||||
pushbuffer = uvm_channel_get_pushbuffer(push->channel);
|
||||
push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
|
||||
|
||||
if ((src.address < push_begin_gpu_va) || (src.address >= push_begin_gpu_va + uvm_push_get_size(push))) {
|
||||
UVM_ERR_PRINT("Source address of memcopy must point to pushbuffer\n");
|
||||
@@ -177,13 +180,19 @@ bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t
|
||||
// irrespective of the virtualization mode.
|
||||
void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src)
|
||||
{
|
||||
uvm_pushbuffer_t *pushbuffer;
|
||||
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return;
|
||||
|
||||
src->address -= uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
|
||||
pushbuffer = uvm_channel_get_pushbuffer(push->channel);
|
||||
src->address -= uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
|
||||
}
|
||||
|
||||
bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
size_t num_elements,
|
||||
size_t element_size)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
#include "clc56f.h"
|
||||
#include "clc076.h"
|
||||
|
||||
bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
@@ -82,7 +82,7 @@ bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_hal_ampere_host_sw_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_hal_ampere_host_sw_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return true;
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#define __UVM_API_H__
|
||||
|
||||
#include "uvm_types.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_ioctl.h"
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_lock.h"
|
||||
@@ -51,8 +52,10 @@
|
||||
\
|
||||
params.rmStatus = uvm_global_get_status(); \
|
||||
if (params.rmStatus == NV_OK) { \
|
||||
if (do_init_check) \
|
||||
params.rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp)); \
|
||||
if (do_init_check) { \
|
||||
if (!uvm_fd_va_space(filp)) \
|
||||
params.rmStatus = NV_ERR_ILLEGAL_ACTION; \
|
||||
} \
|
||||
if (likely(params.rmStatus == NV_OK)) \
|
||||
params.rmStatus = function_name(¶ms, filp); \
|
||||
} \
|
||||
@@ -88,8 +91,10 @@
|
||||
\
|
||||
params->rmStatus = uvm_global_get_status(); \
|
||||
if (params->rmStatus == NV_OK) { \
|
||||
if (do_init_check) \
|
||||
params->rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp)); \
|
||||
if (do_init_check) { \
|
||||
if (!uvm_fd_va_space(filp)) \
|
||||
params->rmStatus = NV_ERR_ILLEGAL_ACTION; \
|
||||
} \
|
||||
if (likely(params->rmStatus == NV_OK)) \
|
||||
params->rmStatus = function_name(params, filp); \
|
||||
} \
|
||||
@@ -196,21 +201,20 @@ static bool uvm_api_range_invalid_64k(NvU64 base, NvU64 length)
|
||||
return uvm_api_range_invalid_aligned(base, length, UVM_PAGE_SIZE_64K);
|
||||
}
|
||||
|
||||
// Returns true if the interval [start, start + length -1] is entirely covered
|
||||
// by vmas.
|
||||
//
|
||||
// LOCKING: mm->mmap_lock must be held in at least read mode.
|
||||
bool uvm_is_valid_vma_range(struct mm_struct *mm, NvU64 start, NvU64 length);
|
||||
typedef enum
|
||||
{
|
||||
UVM_API_RANGE_TYPE_MANAGED,
|
||||
UVM_API_RANGE_TYPE_HMM,
|
||||
UVM_API_RANGE_TYPE_ATS,
|
||||
UVM_API_RANGE_TYPE_INVALID
|
||||
} uvm_api_range_type_t;
|
||||
|
||||
// Check that the interval [base, base + length) is fully covered by UVM
|
||||
// managed ranges (NV_OK is returned), or (if ATS is enabled and mm != NULL)
|
||||
// fully covered by valid vmas (NV_WARN_NOTHING_TO_DO is returned), or (if HMM
|
||||
// is enabled and mm != NULL) fully covered by valid vmas (NV_OK is returned).
|
||||
// Any other input results in a return status of NV_ERR_INVALID_ADDRESS.
|
||||
// If the interval [base, base + length) is fully covered by VMAs which all have
|
||||
// the same uvm_api_range_type_t, that range type is returned.
|
||||
//
|
||||
// LOCKING: va_space->lock must be held in at least read mode. If mm != NULL,
|
||||
// mm->mmap_lock must also be held in at least read mode.
|
||||
NV_STATUS uvm_api_range_type_check(uvm_va_space_t *va_space, struct mm_struct *mm, NvU64 base, NvU64 length);
|
||||
uvm_api_range_type_t uvm_api_range_type_check(uvm_va_space_t *va_space, struct mm_struct *mm, NvU64 base, NvU64 length);
|
||||
|
||||
NV_STATUS uvm_api_pageable_mem_access_on_gpu(UVM_PAGEABLE_MEM_ACCESS_ON_GPU_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_register_gpu(UVM_REGISTER_GPU_PARAMS *params, struct file *filp);
|
||||
|
||||
@@ -44,6 +44,8 @@ void uvm_ats_init(const UvmPlatformInfo *platform_info)
|
||||
|
||||
void uvm_ats_init_va_space(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_init_rwsem(&va_space->ats.lock, UVM_LOCK_ORDER_LEAF);
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_init_va_space(va_space);
|
||||
}
|
||||
@@ -57,6 +59,10 @@ NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
return uvm_ats_ibm_add_gpu(parent_gpu);
|
||||
}
|
||||
else if (UVM_ATS_SVA_SUPPORTED()) {
|
||||
if (g_uvm_global.ats.enabled)
|
||||
return uvm_ats_sva_add_gpu(parent_gpu);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
@@ -71,6 +77,10 @@ void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
uvm_ats_ibm_remove_gpu(parent_gpu);
|
||||
}
|
||||
else if (UVM_ATS_SVA_SUPPORTED()) {
|
||||
if (g_uvm_global.ats.enabled)
|
||||
uvm_ats_sva_remove_gpu(parent_gpu);
|
||||
}
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
@@ -87,6 +97,8 @@ NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
status = uvm_ats_ibm_bind_gpu(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
status = uvm_ats_sva_bind_gpu(gpu_va_space);
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -100,6 +112,8 @@ void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_unbind_gpu(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
uvm_ats_sva_unbind_gpu(gpu_va_space);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
@@ -126,6 +140,8 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
status = uvm_ats_ibm_register_gpu_va_space(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
status = uvm_ats_sva_register_gpu_va_space(gpu_va_space);
|
||||
|
||||
if (status == NV_OK)
|
||||
uvm_processor_mask_set(&va_space->ats.registered_gpu_va_spaces, gpu_id);
|
||||
@@ -148,6 +164,8 @@ void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_unregister_gpu_va_space(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
uvm_ats_sva_unregister_gpu_va_space(gpu_va_space);
|
||||
|
||||
uvm_va_space_down_write(va_space);
|
||||
uvm_processor_mask_clear(&va_space->ats.registered_gpu_va_spaces, gpu_id);
|
||||
|
||||
@@ -28,8 +28,21 @@
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "nv_uvm_types.h"
|
||||
#include "uvm_lock.h"
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED())
|
||||
#include "uvm_ats_sva.h"
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
|
||||
|
||||
// ATS prefetcher uses hmm_range_fault() to query residency information.
|
||||
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
|
||||
// of memory regions while hmm_range_fault() is being called, MMU interval
|
||||
// notifiers are needed.
|
||||
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
|
||||
#define UVM_ATS_PREFETCH_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_ATS_PREFETCH_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -37,10 +50,15 @@ typedef struct
|
||||
// indexed by gpu->id. This mask is protected by the VA space lock.
|
||||
uvm_processor_mask_t registered_gpu_va_spaces;
|
||||
|
||||
// Protects racing invalidates in the VA space while hmm_range_fault() is
|
||||
// being called in ats_compute_residency_mask().
|
||||
uvm_rw_semaphore_t lock;
|
||||
|
||||
union
|
||||
{
|
||||
uvm_ibm_va_space_t ibm;
|
||||
|
||||
uvm_sva_va_space_t sva;
|
||||
};
|
||||
} uvm_ats_va_space_t;
|
||||
|
||||
@@ -58,6 +76,7 @@ typedef struct
|
||||
{
|
||||
uvm_ibm_gpu_va_space_t ibm;
|
||||
|
||||
uvm_sva_gpu_va_space_t sva;
|
||||
};
|
||||
} uvm_ats_gpu_va_space_t;
|
||||
|
||||
@@ -90,6 +109,8 @@ void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
// LOCKING: mmap_lock must be lockable.
|
||||
// VA space lock must be lockable.
|
||||
// gpu_va_space->gpu must be retained.
|
||||
// mm must be retained with uvm_va_space_mm_retain() iff
|
||||
// UVM_ATS_SVA_SUPPORTED() is 1
|
||||
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// Decrements the refcount on the {gpu, mm} pair. Removes the binding from the
|
||||
|
||||
@@ -20,21 +20,33 @@
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_api.h"
|
||||
#include "uvm_tools.h"
|
||||
#include "uvm_va_range.h"
|
||||
#include "uvm_ats.h"
|
||||
#include "uvm_ats_faults.h"
|
||||
#include "uvm_migrate_pageable.h"
|
||||
#include <linux/nodemask.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
|
||||
static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU64 fault_addr,
|
||||
uvm_fault_access_type_t access_type)
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
#include <linux/hmm.h>
|
||||
#endif
|
||||
|
||||
static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 start,
|
||||
size_t length,
|
||||
uvm_fault_access_type_t access_type,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
|
||||
NV_STATUS status;
|
||||
NvU64 start;
|
||||
NvU64 length;
|
||||
NvU64 user_space_start;
|
||||
NvU64 user_space_length;
|
||||
|
||||
// Request uvm_migrate_pageable() to touch the corresponding page after
|
||||
// population.
|
||||
@@ -43,17 +55,18 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
// 2) guest physical -> host physical
|
||||
//
|
||||
// The overall ATS translation will fault if either of those translations is
|
||||
// invalid. The get_user_pages() call above handles translation #1, but not
|
||||
// #2. We don't know if we're running as a guest, but in case we are we can
|
||||
// force that translation to be valid by touching the guest physical address
|
||||
// from the CPU. If the translation is not valid then the access will cause
|
||||
// a hypervisor fault. Note that dma_map_page() can't establish mappings
|
||||
// used by GPU ATS SVA translations. GPU accesses to host physical addresses
|
||||
// obtained as a result of the address translation request uses the CPU
|
||||
// address space instead of the IOMMU address space since the translated
|
||||
// host physical address isn't necessarily an IOMMU address. The only way to
|
||||
// establish guest physical to host physical mapping in the CPU address
|
||||
// space is to touch the page from the CPU.
|
||||
// invalid. The pin_user_pages() call within uvm_migrate_pageable() call
|
||||
// below handles translation #1, but not #2. We don't know if we're running
|
||||
// as a guest, but in case we are we can force that translation to be valid
|
||||
// by touching the guest physical address from the CPU. If the translation
|
||||
// is not valid then the access will cause a hypervisor fault. Note that
|
||||
// dma_map_page() can't establish mappings used by GPU ATS SVA translations.
|
||||
// GPU accesses to host physical addresses obtained as a result of the
|
||||
// address translation request uses the CPU address space instead of the
|
||||
// IOMMU address space since the translated host physical address isn't
|
||||
// necessarily an IOMMU address. The only way to establish guest physical to
|
||||
// host physical mapping in the CPU address space is to touch the page from
|
||||
// the CPU.
|
||||
//
|
||||
// We assume that the hypervisor mappings are all VM_PFNMAP, VM_SHARED, and
|
||||
// VM_WRITE, meaning that the mappings are all granted write access on any
|
||||
@@ -64,23 +77,22 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
uvm_migrate_args_t uvm_migrate_args =
|
||||
{
|
||||
.va_space = va_space,
|
||||
.mm = mm,
|
||||
.start = fault_addr,
|
||||
.length = PAGE_SIZE,
|
||||
.dst_id = gpu_va_space->gpu->parent->id,
|
||||
.dst_node_id = -1,
|
||||
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
|
||||
.touch = true,
|
||||
.skip_mapped = true,
|
||||
.user_space_start = &start,
|
||||
.user_space_length = &length,
|
||||
.va_space = va_space,
|
||||
.mm = mm,
|
||||
.dst_id = ats_context->residency_id,
|
||||
.dst_node_id = ats_context->residency_node,
|
||||
.start = start,
|
||||
.length = length,
|
||||
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
|
||||
.touch = true,
|
||||
.skip_mapped = true,
|
||||
.populate_on_cpu_alloc_failures = true,
|
||||
.user_space_start = &user_space_start,
|
||||
.user_space_length = &user_space_length,
|
||||
};
|
||||
|
||||
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
|
||||
|
||||
// TODO: Bug 2103669: Service more than a single fault at a time
|
||||
//
|
||||
// We are trying to use migrate_vma API in the kernel (if it exists) to
|
||||
// populate and map the faulting region on the GPU. We want to do this only
|
||||
// on the first touch. That is, pages which are not already mapped. So, we
|
||||
@@ -95,110 +107,448 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_buffer_entry_t *current_entry,
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate)
|
||||
static void flush_tlb_write_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU64 addr,
|
||||
size_t size,
|
||||
uvm_fault_client_type_t client_type)
|
||||
{
|
||||
NvU64 gmmu_region_base;
|
||||
bool in_gmmu_region;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_fault_access_type_t service_access_type;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate;
|
||||
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
else
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
|
||||
|
||||
if (!ats_invalidate->write_faults_in_batch) {
|
||||
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->write_faults_tlb_batch);
|
||||
ats_invalidate->write_faults_in_batch = true;
|
||||
}
|
||||
|
||||
uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
|
||||
}
|
||||
|
||||
static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
int residency = uvm_gpu_numa_node(gpu);
|
||||
|
||||
#if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
|
||||
struct mempolicy *vma_policy = vma_policy(vma);
|
||||
unsigned short mode;
|
||||
|
||||
ats_context->prefetch_state.has_preferred_location = false;
|
||||
|
||||
// It's safe to read vma_policy since the mmap_lock is held in at least read
|
||||
// mode in this path.
|
||||
uvm_assert_mmap_lock_locked(vma->vm_mm);
|
||||
|
||||
if (!vma_policy)
|
||||
goto done;
|
||||
|
||||
mode = vma_policy->mode;
|
||||
|
||||
if ((mode == MPOL_BIND) || (mode == MPOL_PREFERRED_MANY) || (mode == MPOL_PREFERRED)) {
|
||||
int home_node = NUMA_NO_NODE;
|
||||
|
||||
#if defined(NV_MEMPOLICY_HAS_HOME_NODE)
|
||||
if ((mode != MPOL_PREFERRED) && (vma_policy->home_node != NUMA_NO_NODE))
|
||||
home_node = vma_policy->home_node;
|
||||
#endif
|
||||
|
||||
// Prefer home_node if set. Otherwise, prefer the faulting GPU if it's
|
||||
// in the list of preferred nodes, else prefer the closest_cpu_numa_node
|
||||
// to the GPU if closest_cpu_numa_node is in the list of preferred
|
||||
// nodes. Fallback to the faulting GPU if all else fails.
|
||||
if (home_node != NUMA_NO_NODE) {
|
||||
residency = home_node;
|
||||
}
|
||||
else if (!node_isset(residency, vma_policy->nodes)) {
|
||||
int closest_cpu_numa_node = gpu->parent->closest_cpu_numa_node;
|
||||
|
||||
if ((closest_cpu_numa_node != NUMA_NO_NODE) && node_isset(closest_cpu_numa_node, vma_policy->nodes))
|
||||
residency = gpu->parent->closest_cpu_numa_node;
|
||||
else
|
||||
residency = first_node(vma_policy->nodes);
|
||||
}
|
||||
|
||||
if (!nodes_empty(vma_policy->nodes))
|
||||
ats_context->prefetch_state.has_preferred_location = true;
|
||||
}
|
||||
|
||||
// Update gpu if residency is not the faulting gpu.
|
||||
if (residency != uvm_gpu_numa_node(gpu))
|
||||
gpu = uvm_va_space_find_gpu_with_memory_node_id(gpu_va_space->va_space, residency);
|
||||
|
||||
done:
|
||||
#else
|
||||
ats_context->prefetch_state.has_preferred_location = false;
|
||||
#endif
|
||||
|
||||
ats_context->residency_id = gpu ? gpu->parent->id : UVM_ID_CPU;
|
||||
ats_context->residency_node = residency;
|
||||
}
|
||||
|
||||
static void get_range_in_vma(struct vm_area_struct *vma, NvU64 base, NvU64 *start, NvU64 *end)
|
||||
{
|
||||
*start = max(vma->vm_start, (unsigned long) base);
|
||||
*end = min(vma->vm_end, (unsigned long) (base + UVM_VA_BLOCK_SIZE));
|
||||
}
|
||||
|
||||
static uvm_page_index_t uvm_ats_cpu_page_index(NvU64 base, NvU64 addr)
|
||||
{
|
||||
UVM_ASSERT(addr >= base);
|
||||
UVM_ASSERT(addr <= (base + UVM_VA_BLOCK_SIZE));
|
||||
|
||||
return (addr - base) / PAGE_SIZE;
|
||||
}
|
||||
|
||||
// start and end must be aligned to PAGE_SIZE and must fall within
|
||||
// [base, base + UVM_VA_BLOCK_SIZE]
|
||||
static uvm_va_block_region_t uvm_ats_region_from_start_end(NvU64 start, NvU64 end)
|
||||
{
|
||||
// base can be greater than, less than or equal to the start of a VMA.
|
||||
NvU64 base = UVM_VA_BLOCK_ALIGN_DOWN(start);
|
||||
|
||||
UVM_ASSERT(start < end);
|
||||
UVM_ASSERT(PAGE_ALIGNED(start));
|
||||
UVM_ASSERT(PAGE_ALIGNED(end));
|
||||
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
|
||||
|
||||
return uvm_va_block_region(uvm_ats_cpu_page_index(base, start), uvm_ats_cpu_page_index(base, end));
|
||||
}
|
||||
|
||||
static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma, NvU64 base)
|
||||
{
|
||||
NvU64 start;
|
||||
NvU64 end;
|
||||
|
||||
get_range_in_vma(vma, base, &start, &end);
|
||||
|
||||
return uvm_ats_region_from_start_end(start, end);
|
||||
}
|
||||
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
|
||||
static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
|
||||
{
|
||||
uvm_ats_fault_context_t *ats_context = container_of(mni, uvm_ats_fault_context_t, prefetch_state.notifier);
|
||||
uvm_va_space_t *va_space = ats_context->prefetch_state.va_space;
|
||||
|
||||
// The following write lock protects against concurrent invalidates while
|
||||
// hmm_range_fault() is being called in ats_compute_residency_mask().
|
||||
uvm_down_write(&va_space->ats.lock);
|
||||
|
||||
mmu_interval_set_seq(mni, cur_seq);
|
||||
|
||||
uvm_up_write(&va_space->ats.lock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool uvm_ats_invalidate_notifier_entry(struct mmu_interval_notifier *mni,
|
||||
const struct mmu_notifier_range *range,
|
||||
unsigned long cur_seq)
|
||||
{
|
||||
UVM_ENTRY_RET(uvm_ats_invalidate_notifier(mni, cur_seq));
|
||||
}
|
||||
|
||||
static const struct mmu_interval_notifier_ops uvm_ats_notifier_ops =
|
||||
{
|
||||
.invalidate = uvm_ats_invalidate_notifier_entry,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
int ret;
|
||||
NvU64 start;
|
||||
NvU64 end;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
struct hmm_range range;
|
||||
uvm_page_index_t page_index;
|
||||
uvm_va_block_region_t vma_region;
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
|
||||
uvm_assert_rwsem_locked_read(&va_space->lock);
|
||||
|
||||
ats_context->prefetch_state.first_touch = true;
|
||||
|
||||
uvm_page_mask_zero(residency_mask);
|
||||
|
||||
get_range_in_vma(vma, base, &start, &end);
|
||||
|
||||
vma_region = uvm_ats_region_from_start_end(start, end);
|
||||
|
||||
range.notifier = &ats_context->prefetch_state.notifier;
|
||||
range.start = start;
|
||||
range.end = end;
|
||||
range.hmm_pfns = ats_context->prefetch_state.pfns;
|
||||
range.default_flags = 0;
|
||||
range.pfn_flags_mask = 0;
|
||||
range.dev_private_owner = NULL;
|
||||
|
||||
ats_context->prefetch_state.va_space = va_space;
|
||||
|
||||
// mmu_interval_notifier_insert() will try to acquire mmap_lock for write
|
||||
// and will deadlock since mmap_lock is already held for read in this path.
|
||||
// This is prevented by calling __mmu_notifier_register() during va_space
|
||||
// creation. See the comment in uvm_mmu_notifier_register() for more
|
||||
// details.
|
||||
ret = mmu_interval_notifier_insert(range.notifier, mm, start, end, &uvm_ats_notifier_ops);
|
||||
if (ret)
|
||||
return errno_to_nv_status(ret);
|
||||
|
||||
while (true) {
|
||||
range.notifier_seq = mmu_interval_read_begin(range.notifier);
|
||||
ret = hmm_range_fault(&range);
|
||||
if (ret == -EBUSY)
|
||||
continue;
|
||||
if (ret) {
|
||||
status = errno_to_nv_status(ret);
|
||||
UVM_ASSERT(status != NV_OK);
|
||||
break;
|
||||
}
|
||||
|
||||
uvm_down_read(&va_space->ats.lock);
|
||||
|
||||
// Pages may have been freed or re-allocated after hmm_range_fault() is
|
||||
// called. So the PTE might point to a different page or nothing. In the
|
||||
// memory hot-unplug case it is not safe to call page_to_nid() on the
|
||||
// page as the struct page itself may have been freed. To protect
|
||||
// against these cases, uvm_ats_invalidate_entry() blocks on va_space
|
||||
// ATS write lock for concurrent invalidates since va_space ATS lock is
|
||||
// held for read in this path.
|
||||
if (!mmu_interval_read_retry(range.notifier, range.notifier_seq))
|
||||
break;
|
||||
|
||||
uvm_up_read(&va_space->ats.lock);
|
||||
}
|
||||
|
||||
if (status == NV_OK) {
|
||||
for_each_va_block_page_in_region(page_index, vma_region) {
|
||||
unsigned long pfn = ats_context->prefetch_state.pfns[page_index - vma_region.first];
|
||||
|
||||
if (pfn & HMM_PFN_VALID) {
|
||||
struct page *page = hmm_pfn_to_page(pfn);
|
||||
|
||||
if (page_to_nid(page) == ats_context->residency_node)
|
||||
uvm_page_mask_set(residency_mask, page_index);
|
||||
|
||||
ats_context->prefetch_state.first_touch = false;
|
||||
}
|
||||
}
|
||||
|
||||
uvm_up_read(&va_space->ats.lock);
|
||||
}
|
||||
|
||||
mmu_interval_notifier_remove(range.notifier);
|
||||
|
||||
#endif
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void ats_expand_fault_region(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
uvm_ats_fault_context_t *ats_context,
|
||||
uvm_va_block_region_t max_prefetch_region,
|
||||
uvm_page_mask_t *faulted_mask)
|
||||
{
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;
|
||||
|
||||
if (uvm_page_mask_empty(faulted_mask))
|
||||
return;
|
||||
|
||||
uvm_perf_prefetch_compute_ats(gpu_va_space->va_space,
|
||||
faulted_mask,
|
||||
uvm_va_block_region_from_mask(NULL, faulted_mask),
|
||||
max_prefetch_region,
|
||||
residency_mask,
|
||||
bitmap_tree,
|
||||
prefetch_mask);
|
||||
|
||||
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
|
||||
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
|
||||
}
|
||||
|
||||
static NV_STATUS ats_fault_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
|
||||
|
||||
if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
|
||||
return status;
|
||||
|
||||
if (uvm_page_mask_empty(faulted_mask))
|
||||
return status;
|
||||
|
||||
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// Prefetch the entire region if none of the pages are resident on any node
|
||||
// and if preferred_location is the faulting GPU.
|
||||
if (ats_context->prefetch_state.has_preferred_location &&
|
||||
ats_context->prefetch_state.first_touch &&
|
||||
uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->parent->id)) {
|
||||
|
||||
uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
|
||||
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
|
||||
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
ats_expand_fault_region(gpu_va_space, vma, ats_context, max_prefetch_region, faulted_mask);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_block_region_t subregion;
|
||||
uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
|
||||
uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
|
||||
uvm_fault_client_type_t client_type = ats_context->client_type;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
|
||||
UVM_ASSERT(g_uvm_global.ats.enabled);
|
||||
UVM_ASSERT(gpu_va_space);
|
||||
UVM_ASSERT(gpu_va_space->ats.enabled);
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
|
||||
|
||||
UVM_ASSERT(current_entry->fault_access_type ==
|
||||
uvm_fault_access_type_mask_highest(current_entry->access_type_mask));
|
||||
uvm_page_mask_zero(faults_serviced_mask);
|
||||
uvm_page_mask_zero(reads_serviced_mask);
|
||||
|
||||
service_access_type = current_entry->fault_access_type;
|
||||
if (!(vma->vm_flags & VM_READ))
|
||||
return status;
|
||||
|
||||
// ATS lookups are disabled on all addresses within the same
|
||||
// UVM_GMMU_ATS_GRANULARITY as existing GMMU mappings (see documentation in
|
||||
// uvm_mmu.h). User mode is supposed to reserve VAs as appropriate to
|
||||
// prevent any system memory allocations from falling within the NO_ATS
|
||||
// range of other GMMU mappings, so this shouldn't happen during normal
|
||||
// operation. However, since this scenario may lead to infinite fault loops,
|
||||
// we handle it by canceling the fault.
|
||||
//
|
||||
// TODO: Bug 2103669: Remove redundant VA range lookups
|
||||
gmmu_region_base = UVM_ALIGN_DOWN(current_entry->fault_address, UVM_GMMU_ATS_GRANULARITY);
|
||||
in_gmmu_region = !uvm_va_space_range_empty(current_entry->va_space,
|
||||
gmmu_region_base,
|
||||
gmmu_region_base + UVM_GMMU_ATS_GRANULARITY - 1);
|
||||
if (in_gmmu_region) {
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
else {
|
||||
// TODO: Bug 2103669: Service more than a single fault at a time
|
||||
status = uvm_ats_service_fault(gpu_va_space, current_entry->fault_address, service_access_type);
|
||||
if (!(vma->vm_flags & VM_WRITE)) {
|
||||
// If VMA doesn't have write permissions, all write faults are fatal.
|
||||
// Try servicing such faults for read iff they are also present in
|
||||
// read_fault_mask. This is because for replayable faults, if there are
|
||||
// pending read accesses on the same page, we have to service them
|
||||
// before we can cancel the write/atomic faults. So we try with read
|
||||
// fault access type even though these write faults are fatal.
|
||||
if (ats_context->client_type == UVM_FAULT_CLIENT_TYPE_GPC)
|
||||
uvm_page_mask_and(write_fault_mask, write_fault_mask, read_fault_mask);
|
||||
else
|
||||
uvm_page_mask_zero(write_fault_mask);
|
||||
}
|
||||
|
||||
// Do not flag prefetch faults as fatal unless something fatal happened
|
||||
if (status == NV_ERR_INVALID_ADDRESS) {
|
||||
if (current_entry->fault_access_type != UVM_FAULT_ACCESS_TYPE_PREFETCH) {
|
||||
current_entry->is_fatal = true;
|
||||
current_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||||
ats_batch_select_residency(gpu_va_space, vma, ats_context);
|
||||
|
||||
// Compute cancel mode for replayable faults
|
||||
if (current_entry->is_replayable) {
|
||||
if (service_access_type == UVM_FAULT_ACCESS_TYPE_READ || in_gmmu_region)
|
||||
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
else
|
||||
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_WRITE_AND_ATOMIC;
|
||||
ats_fault_prefetch(gpu_va_space, vma, base, ats_context);
|
||||
|
||||
// If there are pending read accesses on the same page, we have to
|
||||
// service them before we can cancel the write/atomic faults. So we
|
||||
// retry with read fault access type.
|
||||
if (!in_gmmu_region &&
|
||||
current_entry->fault_access_type > UVM_FAULT_ACCESS_TYPE_READ &&
|
||||
uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
|
||||
status = uvm_ats_service_fault(gpu_va_space,
|
||||
current_entry->fault_address,
|
||||
UVM_FAULT_ACCESS_TYPE_READ);
|
||||
for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
|
||||
uvm_fault_access_type_t access_type = (vma->vm_flags & VM_WRITE) ?
|
||||
UVM_FAULT_ACCESS_TYPE_WRITE :
|
||||
UVM_FAULT_ACCESS_TYPE_READ;
|
||||
|
||||
// If read accesses are also invalid, cancel the fault. If a
|
||||
// different error code is returned, exit
|
||||
if (status == NV_ERR_INVALID_ADDRESS)
|
||||
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
else if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
}
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
if (vma->vm_flags & VM_WRITE) {
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
|
||||
|
||||
// The Linux kernel never invalidates TLB entries on mapping
|
||||
// permission upgrade. This is a problem if the GPU has cached
|
||||
// entries with the old permission. The GPU will re-fetch the entry
|
||||
// if the PTE is invalid and page size is not 4K (this is the case
|
||||
// on P9). However, if a page gets upgraded from R/O to R/W and GPU
|
||||
// has the PTEs cached with R/O permissions we will enter an
|
||||
// infinite loop because we just forward the fault to the Linux
|
||||
// kernel and it will see that the permissions in the page table are
|
||||
// correct. Therefore, we flush TLB entries on ATS write faults.
|
||||
flush_tlb_write_faults(gpu_va_space, start, length, client_type);
|
||||
}
|
||||
else {
|
||||
current_entry->is_invalid_prefetch = true;
|
||||
uvm_page_mask_region_fill(reads_serviced_mask, subregion);
|
||||
}
|
||||
|
||||
// Do not fail overall fault servicing due to logical errors
|
||||
status = NV_OK;
|
||||
}
|
||||
|
||||
// The Linux kernel never invalidates TLB entries on mapping permission
|
||||
// upgrade. This is a problem if the GPU has cached entries with the old
|
||||
// permission. The GPU will re-fetch the entry if the PTE is invalid and
|
||||
// page size is not 4K (this is the case on P9). However, if a page gets
|
||||
// upgraded from R/O to R/W and GPU has the PTEs cached with R/O
|
||||
// permissions we will enter an infinite loop because we just forward the
|
||||
// fault to the Linux kernel and it will see that the permissions in the
|
||||
// page table are correct. Therefore, we flush TLB entries on ATS write
|
||||
// faults.
|
||||
if (!current_entry->is_fatal && current_entry->fault_access_type > UVM_FAULT_ACCESS_TYPE_READ) {
|
||||
if (!ats_invalidate->write_faults_in_batch) {
|
||||
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->write_faults_tlb_batch);
|
||||
ats_invalidate->write_faults_in_batch = true;
|
||||
}
|
||||
// Remove write faults from read_fault_mask
|
||||
uvm_page_mask_andnot(read_fault_mask, read_fault_mask, write_fault_mask);
|
||||
|
||||
uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch,
|
||||
current_entry->fault_address,
|
||||
PAGE_SIZE,
|
||||
PAGE_SIZE,
|
||||
UVM_MEMBAR_NONE);
|
||||
for_each_va_block_subregion_in_mask(subregion, read_fault_mask, region) {
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
|
||||
uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_READ;
|
||||
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next)
|
||||
{
|
||||
uvm_va_range_t *prev;
|
||||
NvU64 gmmu_region_base = UVM_ALIGN_DOWN(address, UVM_GMMU_ATS_GRANULARITY);
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
|
||||
if (next) {
|
||||
if (next->node.start <= gmmu_region_base + UVM_GMMU_ATS_GRANULARITY - 1)
|
||||
return true;
|
||||
|
||||
prev = uvm_va_range_container(uvm_range_tree_prev(&va_space->va_range_tree, &next->node));
|
||||
}
|
||||
else {
|
||||
// No VA range exists after address, so check the last VA range in the
|
||||
// tree.
|
||||
prev = uvm_va_range_container(uvm_range_tree_last(&va_space->va_range_tree));
|
||||
}
|
||||
|
||||
return prev && (prev->node.end >= gmmu_region_base);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate,
|
||||
uvm_tracker_t *out_tracker)
|
||||
@@ -230,3 +580,4 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@@ -25,10 +25,31 @@
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_gpu.h"
|
||||
|
||||
NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_buffer_entry_t *current_entry,
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate);
|
||||
// Service ATS faults in the range (base, base + UVM_VA_BLOCK_SIZE) with service
|
||||
// type for individual pages in the range requested by page masks set in
|
||||
// ats_context->read_fault_mask/write_fault_mask. base must be aligned to
|
||||
// UVM_VA_BLOCK_SIZE. The caller is responsible for ensuring that faulting
|
||||
// addresses fall completely within the VMA. The caller is also responsible for
|
||||
// ensuring that the faulting addresses don't overlap a GMMU region. (See
|
||||
// uvm_ats_check_in_gmmu_region). The caller is also responsible for handling
|
||||
// any errors returned by this function (fault cancellations etc.).
|
||||
//
|
||||
// Returns the fault service status in ats_context->faults_serviced_mask. In
|
||||
// addition, ats_context->reads_serviced_mask returns whether read servicing
|
||||
// worked on write faults iff the read service was also requested in the
|
||||
// corresponding bit in read_fault_mask. These returned masks are only valid if
|
||||
// the return status is NV_OK. Status other than NV_OK indicate system global
|
||||
// fault servicing failures.
|
||||
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context);
|
||||
|
||||
// Return whether there are any VA ranges (and thus GMMU mappings) within the
|
||||
// UVM_GMMU_ATS_GRANULARITY-aligned region containing address.
|
||||
bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next);
|
||||
|
||||
// This function performs pending TLB invalidations for ATS and clears the
|
||||
// ats_invalidate->write_faults_in_batch flag
|
||||
|
||||
156
kernel-open/nvidia-uvm/uvm_ats_sva.c
Normal file
156
kernel-open/nvidia-uvm/uvm_ats_sva.c
Normal file
@@ -0,0 +1,156 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_ats_sva.h"
|
||||
|
||||
#if UVM_ATS_SVA_SUPPORTED()
|
||||
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_va_space_mm.h"
|
||||
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/mm_types.h>
|
||||
|
||||
// linux/sched/mm.h is needed for mmget_not_zero and mmput to get the mm
|
||||
// reference required for the iommu_sva_bind_device() call. This header is not
|
||||
// present in all the supported versions. Instead of adding a conftest just for
|
||||
// this header file, use UVM_ATS_SVA_SUPPORTED().
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
// iommu_sva_bind_device() removed drvdata paramter with commit
|
||||
// 942fd5435dccb273f90176b046ae6bbba60cfbd8 (10/31/2022).
|
||||
#if defined(NV_IOMMU_SVA_BIND_DEVICE_HAS_DRVDATA_ARG)
|
||||
#define UVM_IOMMU_SVA_BIND_DEVICE(dev, mm) iommu_sva_bind_device(dev, mm, NULL)
|
||||
#else
|
||||
#define UVM_IOMMU_SVA_BIND_DEVICE(dev, mm) iommu_sva_bind_device(dev, mm)
|
||||
#endif
|
||||
|
||||
NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = iommu_dev_enable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
|
||||
return errno_to_nv_status(ret);
|
||||
}
|
||||
|
||||
void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
iommu_dev_disable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_sva_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
struct iommu_sva *iommu_handle;
|
||||
struct pci_dev *pci_dev = gpu_va_space->gpu->parent->pci_dev;
|
||||
uvm_sva_gpu_va_space_t *sva_gpu_va_space = &gpu_va_space->ats.sva;
|
||||
struct mm_struct *mm = gpu_va_space->va_space->va_space_mm.mm;
|
||||
|
||||
UVM_ASSERT(gpu_va_space->ats.enabled);
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_INIT);
|
||||
UVM_ASSERT(mm);
|
||||
|
||||
// The mmput() below may trigger the kernel's mm teardown with exit_mmap()
|
||||
// and uvm_va_space_mm_shutdown() and uvm_vm_close_managed() in that path
|
||||
// will try to grab the va_space lock and deadlock if va_space was already
|
||||
// locked.
|
||||
uvm_assert_unlocked_order(UVM_LOCK_ORDER_VA_SPACE);
|
||||
|
||||
// iommu_sva_bind_device() requires the mm reference to be acquired. Since
|
||||
// the mm is already retained, mm is still valid but may be inactive since
|
||||
// mm_users can still be zero since UVM doesn't use mm_users and maintains a
|
||||
// separate refcount (retained_count) for the mm in va_space_mm. See the
|
||||
// block comment in va_space_mm.c for more details. So, return an error if
|
||||
// mm_users is zero.
|
||||
if (!mmget_not_zero(mm))
|
||||
return NV_ERR_PAGE_TABLE_NOT_AVAIL;
|
||||
|
||||
// Multiple calls for the {same pci_dev, mm} pair are refcounted by the ARM
|
||||
// SMMU Layer.
|
||||
iommu_handle = UVM_IOMMU_SVA_BIND_DEVICE(&pci_dev->dev, mm);
|
||||
if (IS_ERR(iommu_handle)) {
|
||||
status = errno_to_nv_status(PTR_ERR(iommu_handle));
|
||||
goto out;
|
||||
}
|
||||
|
||||
// If this is not the first bind of the gpu in the mm, then the previously
|
||||
// stored iommu_handle in the gpu_va_space must match the handle returned by
|
||||
// iommu_sva_bind_device().
|
||||
if (sva_gpu_va_space->iommu_handle) {
|
||||
UVM_ASSERT(sva_gpu_va_space->iommu_handle == iommu_handle);
|
||||
nv_kref_get(&sva_gpu_va_space->kref);
|
||||
}
|
||||
else {
|
||||
sva_gpu_va_space->iommu_handle = iommu_handle;
|
||||
nv_kref_init(&sva_gpu_va_space->kref);
|
||||
}
|
||||
|
||||
out:
|
||||
mmput(mm);
|
||||
return status;
|
||||
}
|
||||
|
||||
static void uvm_sva_reset_iommu_handle(nv_kref_t *nv_kref)
|
||||
{
|
||||
uvm_sva_gpu_va_space_t *sva_gpu_va_space = container_of(nv_kref, uvm_sva_gpu_va_space_t, kref);
|
||||
sva_gpu_va_space->iommu_handle = NULL;
|
||||
}
|
||||
|
||||
void uvm_ats_sva_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
uvm_sva_gpu_va_space_t *sva_gpu_va_space = &gpu_va_space->ats.sva;
|
||||
|
||||
// ARM SMMU layer decrements the refcount for the {pci_dev, mm} pair.
|
||||
// The actual unbind happens only when the refcount reaches zero.
|
||||
if (sva_gpu_va_space->iommu_handle) {
|
||||
iommu_sva_unbind_device(sva_gpu_va_space->iommu_handle);
|
||||
nv_kref_put(&sva_gpu_va_space->kref, uvm_sva_reset_iommu_handle);
|
||||
}
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_sva_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
NvU32 pasid;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_sva_gpu_va_space_t *sva_gpu_va_space = &gpu_va_space->ats.sva;
|
||||
|
||||
// A successful iommu_sva_bind_device() should have preceded this call.
|
||||
UVM_ASSERT(sva_gpu_va_space->iommu_handle);
|
||||
|
||||
pasid = iommu_sva_get_pasid(sva_gpu_va_space->iommu_handle);
|
||||
if (pasid == IOMMU_PASID_INVALID)
|
||||
status = errno_to_nv_status(ENODEV);
|
||||
else
|
||||
gpu_va_space->ats.pasid = pasid;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
gpu_va_space->ats.pasid = -1U;
|
||||
}
|
||||
|
||||
#endif // UVM_ATS_SVA_SUPPORTED()
|
||||
112
kernel-open/nvidia-uvm/uvm_ats_sva.h
Normal file
112
kernel-open/nvidia-uvm/uvm_ats_sva.h
Normal file
@@ -0,0 +1,112 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef __UVM_ATS_SVA_H__
|
||||
#define __UVM_ATS_SVA_H__
|
||||
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_forward_decl.h"
|
||||
|
||||
#include <linux/iommu.h>
|
||||
|
||||
// For ATS support on aarch64, arm_smmu_sva_bind() is needed for
|
||||
// iommu_sva_bind_device() calls. Unfortunately, arm_smmu_sva_bind() is not
|
||||
// conftest-able. We instead look for the presence of ioasid_get() or
|
||||
// mm_pasid_set(). ioasid_get() was added in the same patch series as
|
||||
// arm_smmu_sva_bind() and removed in v6.0. mm_pasid_set() was added in the
|
||||
// same patch as the removal of ioasid_get(). We assume the presence of
|
||||
// arm_smmu_sva_bind() if ioasid_get(v5.11 - v5.17) or mm_pasid_set(v5.18+) is
|
||||
// present.
|
||||
//
|
||||
// arm_smmu_sva_bind() was added with commit
|
||||
// 32784a9562fb0518b12e9797ee2aec52214adf6f and ioasid_get() was added with
|
||||
// commit cb4789b0d19ff231ce9f73376a023341300aed96 (11/23/2020). Commit
|
||||
// 701fac40384f07197b106136012804c3cae0b3de (02/15/2022) removed ioasid_get()
|
||||
// and added mm_pasid_set().
|
||||
#if UVM_CAN_USE_MMU_NOTIFIERS() && (defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_SET_PRESENT))
|
||||
#define UVM_ATS_SVA_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_ATS_SVA_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int placeholder;
|
||||
} uvm_sva_va_space_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Reference count for the iommu_handle
|
||||
nv_kref_t kref;
|
||||
struct iommu_sva *iommu_handle;
|
||||
} uvm_sva_gpu_va_space_t;
|
||||
|
||||
#if UVM_ATS_SVA_SUPPORTED()
|
||||
NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// LOCKING: mmap_lock must be lockable
|
||||
// VA space lock must not be held.
|
||||
NV_STATUS uvm_ats_sva_bind_gpu(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// LOCKING: VA space lock must not be held.
|
||||
void uvm_ats_sva_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// LOCKING: None
|
||||
NV_STATUS uvm_ats_sva_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// LOCKING: None
|
||||
void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
#else
|
||||
static NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_ats_sva_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_ats_sva_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_ats_sva_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
|
||||
}
|
||||
#endif // UVM_ATS_SVA_SUPPORTED
|
||||
|
||||
#endif // __UVM_ATS_SVA_H__
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -24,12 +24,14 @@
|
||||
#include "uvm_channel.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_test.h"
|
||||
#include "uvm_tracker.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_rm_mem.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_gpu.h"
|
||||
|
||||
#define CE_TEST_MEM_SIZE (2 * 1024 * 1024)
|
||||
#define CE_TEST_MEM_END_SIZE 32
|
||||
@@ -52,6 +54,11 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
|
||||
uvm_push_t push;
|
||||
bool is_proxy;
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, CE_TEST_MEM_SIZE, 0, &host_mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
|
||||
@@ -66,7 +73,7 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
is_proxy = uvm_channel_is_proxy(push.channel);
|
||||
host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, is_proxy);
|
||||
host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, is_proxy).address;
|
||||
|
||||
// All of the following CE transfers are done from a single (L)CE and
|
||||
// disabling pipelining is enough to order them when needed. Only push_end
|
||||
@@ -74,7 +81,7 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
|
||||
|
||||
// Initialize to a bad value
|
||||
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
|
||||
mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy);
|
||||
mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy).address;
|
||||
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
@@ -83,7 +90,7 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
|
||||
|
||||
// Set the first buffer to 1
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[0], gpu, is_proxy);
|
||||
mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[0], gpu, is_proxy).address;
|
||||
gpu->parent->ce_hal->memset_v_4(&push, mem_gpu_va, 1, CE_TEST_MEM_SIZE);
|
||||
|
||||
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
|
||||
@@ -91,9 +98,9 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
|
||||
if (dst == CE_TEST_MEM_COUNT)
|
||||
dst_va = host_mem_gpu_va;
|
||||
else
|
||||
dst_va = uvm_rm_mem_get_gpu_va(mem[dst], gpu, is_proxy);
|
||||
dst_va = uvm_rm_mem_get_gpu_va(mem[dst], gpu, is_proxy).address;
|
||||
|
||||
src_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy);
|
||||
src_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy).address;
|
||||
|
||||
// The first memcpy needs to be non-pipelined as otherwise the previous
|
||||
// memset/memcpy to the source may not be done yet.
|
||||
@@ -167,6 +174,11 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
|
||||
uvm_push_t push;
|
||||
NvU32 value;
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(NvU32), 0, &host_mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
|
||||
@@ -175,7 +187,7 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Membar test");
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, uvm_channel_is_proxy(push.channel));
|
||||
host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, uvm_channel_is_proxy(push.channel)).address;
|
||||
|
||||
for (i = 0; i < REDUCTIONS; ++i) {
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
@@ -333,6 +345,16 @@ static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
// If physical accesses aren't supported, silently convert to virtual to
|
||||
// test the flat mapping.
|
||||
TEST_CHECK_RET(gpu_verif_addr.is_virtual);
|
||||
|
||||
if (!src.is_virtual)
|
||||
src = uvm_gpu_address_copy(gpu, uvm_gpu_phys_address(src.aperture, src.address));
|
||||
|
||||
if (!dst.is_virtual)
|
||||
dst = uvm_gpu_address_copy(gpu, uvm_gpu_phys_address(dst.aperture, dst.address));
|
||||
|
||||
// Memset src with the appropriate element size, then memcpy to dst and from
|
||||
// dst to the verif location (physical sysmem).
|
||||
|
||||
@@ -374,7 +396,7 @@ static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
|
||||
static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
bool is_proxy_va_space;
|
||||
bool is_proxy_va_space = false;
|
||||
uvm_gpu_address_t gpu_verif_addr;
|
||||
void *cpu_verif_addr;
|
||||
uvm_mem_t *verif_mem = NULL;
|
||||
@@ -382,17 +404,17 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
uvm_mem_t *gpu_uvm_mem = NULL;
|
||||
uvm_rm_mem_t *sys_rm_mem = NULL;
|
||||
uvm_rm_mem_t *gpu_rm_mem = NULL;
|
||||
uvm_gpu_address_t gpu_addresses[4];
|
||||
NvU64 gpu_va;
|
||||
size_t size;
|
||||
uvm_gpu_address_t gpu_addresses[4] = {0};
|
||||
size_t size = gpu->big_page.internal_size;
|
||||
static const size_t element_sizes[] = {1, 4, 8};
|
||||
const size_t iterations = 4;
|
||||
size_t i, j, k, s;
|
||||
uvm_mem_alloc_params_t mem_params = {0};
|
||||
|
||||
size = gpu->big_page.internal_size;
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &verif_mem), done);
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(size, gpu, current->mm, &verif_mem), done);
|
||||
else
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &verif_mem), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, gpu), done);
|
||||
|
||||
gpu_verif_addr = uvm_mem_gpu_address_virtual_kernel(verif_mem, gpu);
|
||||
@@ -410,6 +432,34 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
}
|
||||
}
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by sysmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
|
||||
gpu_addresses[0] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
for (i = 0; i < iterations; ++i) {
|
||||
for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
|
||||
TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
|
||||
gpu_addresses[0],
|
||||
gpu_addresses[0],
|
||||
size,
|
||||
element_sizes[s],
|
||||
gpu_verif_addr,
|
||||
cpu_verif_addr,
|
||||
i),
|
||||
done);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Because gpu_verif_addr is in sysmem, when the Confidential
|
||||
// Computing feature is enabled, only the previous cases are valid.
|
||||
// TODO: Bug 3839176: the test partially waived on Confidential
|
||||
// Computing because it assumes that GPU can access system memory
|
||||
// without using encryption.
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Using a page size equal to the allocation size ensures that the UVM
|
||||
// memories about to be allocated are physically contiguous. And since the
|
||||
// size is a valid GPU page size, the memories can be virtually mapped on
|
||||
@@ -421,23 +471,17 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
// Physical address in sysmem
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &sys_uvm_mem), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(sys_uvm_mem, gpu), done);
|
||||
gpu_addresses[0] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
|
||||
gpu_addresses[1] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
|
||||
|
||||
// Physical address in vidmem
|
||||
mem_params.backing_gpu = gpu;
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
|
||||
gpu_addresses[1] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
|
||||
gpu_addresses[2] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by vidmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
|
||||
is_proxy_va_space = false;
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
|
||||
gpu_addresses[2] = uvm_gpu_address_virtual(gpu_va);
|
||||
gpu_addresses[3] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by sysmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
|
||||
gpu_addresses[3] = uvm_gpu_address_virtual(gpu_va);
|
||||
|
||||
for (i = 0; i < iterations; ++i) {
|
||||
for (j = 0; j < ARRAY_SIZE(gpu_addresses); ++j) {
|
||||
@@ -513,6 +557,11 @@ static NV_STATUS test_semaphore_reduction_inc(uvm_gpu_t *gpu)
|
||||
// Semaphore reduction needs 1 word (4 bytes).
|
||||
const size_t size = sizeof(NvU32);
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = test_semaphore_alloc_sem(gpu, size, &mem);
|
||||
TEST_CHECK_RET(status == NV_OK);
|
||||
|
||||
@@ -560,6 +609,11 @@ static NV_STATUS test_semaphore_release(uvm_gpu_t *gpu)
|
||||
// Semaphore release needs 1 word (4 bytes).
|
||||
const size_t size = sizeof(NvU32);
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = test_semaphore_alloc_sem(gpu, size, &mem);
|
||||
TEST_CHECK_RET(status == NV_OK);
|
||||
|
||||
@@ -609,6 +663,11 @@ static NV_STATUS test_semaphore_timestamp(uvm_gpu_t *gpu)
|
||||
// The semaphore is 4 words long (16 bytes).
|
||||
const size_t size = 16;
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = test_semaphore_alloc_sem(gpu, size, &mem);
|
||||
TEST_CHECK_RET(status == NV_OK);
|
||||
|
||||
@@ -645,6 +704,517 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
static bool mem_match(uvm_mem_t *mem1, uvm_mem_t *mem2, size_t size)
|
||||
{
|
||||
void *mem1_addr;
|
||||
void *mem2_addr;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_sysmem(mem1));
|
||||
UVM_ASSERT(uvm_mem_is_sysmem(mem2));
|
||||
UVM_ASSERT(mem1->size >= size);
|
||||
UVM_ASSERT(mem2->size >= size);
|
||||
|
||||
mem1_addr = uvm_mem_get_cpu_addr_kernel(mem1);
|
||||
mem2_addr = uvm_mem_get_cpu_addr_kernel(mem2);
|
||||
|
||||
return !memcmp(mem1_addr, mem2_addr, size);
|
||||
}
|
||||
|
||||
static NV_STATUS zero_vidmem(uvm_mem_t *mem)
|
||||
{
|
||||
uvm_push_t push;
|
||||
uvm_gpu_address_t gpu_address;
|
||||
uvm_gpu_t *gpu = mem->backing_gpu;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_vidmem(mem));
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "zero vidmem"));
|
||||
|
||||
gpu_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu);
|
||||
gpu->parent->ce_hal->memset_1(&push, gpu_address, 0, mem->size);
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void write_range_cpu(uvm_mem_t *mem, NvU64 base_val)
|
||||
{
|
||||
NvU64 *mem_cpu_va;
|
||||
unsigned i;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_sysmem(mem));
|
||||
UVM_ASSERT(IS_ALIGNED(mem->size, sizeof(*mem_cpu_va)));
|
||||
|
||||
mem_cpu_va = (NvU64 *) uvm_mem_get_cpu_addr_kernel(mem);
|
||||
|
||||
for (i = 0; i < (mem->size / sizeof(*mem_cpu_va)); i++)
|
||||
mem_cpu_va[i] = base_val++;
|
||||
}
|
||||
|
||||
static NV_STATUS alloc_vidmem_protected(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(mem);
|
||||
|
||||
*mem = NULL;
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem(size, gpu, mem));
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
|
||||
TEST_NV_CHECK_GOTO(zero_vidmem(*mem), err);
|
||||
|
||||
return NV_OK;
|
||||
|
||||
err:
|
||||
uvm_mem_free(*mem);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS alloc_sysmem_unprotected(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(mem);
|
||||
|
||||
*mem = NULL;
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem_dma(size, gpu, NULL, mem));
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_cpu_kernel(*mem), err);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
|
||||
|
||||
memset(uvm_mem_get_cpu_addr_kernel(*mem), 0, (*mem)->size);
|
||||
|
||||
return NV_OK;
|
||||
|
||||
err:
|
||||
uvm_mem_free(*mem);
|
||||
return status;
|
||||
}
|
||||
|
||||
static void cpu_encrypt(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
{
|
||||
size_t offset = 0;
|
||||
char *src_plain = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
|
||||
char *dst_cipher = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
|
||||
char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
|
||||
|
||||
while (offset < size) {
|
||||
uvm_conf_computing_cpu_encrypt(channel, dst_cipher, src_plain, NULL, copy_size, auth_tag_buffer);
|
||||
|
||||
offset += copy_size;
|
||||
dst_cipher += copy_size;
|
||||
src_plain += copy_size;
|
||||
auth_tag_buffer += UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void cpu_acquire_encryption_ivs(uvm_channel_t *channel,
|
||||
size_t size,
|
||||
NvU32 copy_size,
|
||||
UvmCslIv *ivs)
|
||||
{
|
||||
size_t offset = 0;
|
||||
int i = 0;
|
||||
|
||||
for (; offset < size; offset += copy_size)
|
||||
uvm_conf_computing_acquire_encryption_iv(channel, &ivs[i++]);
|
||||
}
|
||||
|
||||
static void cpu_encrypt_rev(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size,
|
||||
UvmCslIv *encrypt_iv)
|
||||
{
|
||||
char *src_plain = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
|
||||
char *dst_cipher = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
|
||||
char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
|
||||
int i;
|
||||
|
||||
// CPU encrypt order is the opposite of the GPU decrypt order
|
||||
for (i = (size / copy_size) - 1; i >= 0; i--) {
|
||||
uvm_conf_computing_cpu_encrypt(channel,
|
||||
dst_cipher + i * copy_size,
|
||||
src_plain + i * copy_size,
|
||||
encrypt_iv + i,
|
||||
copy_size,
|
||||
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
const UvmCslIv *decrypt_iv,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
{
|
||||
size_t i;
|
||||
char *dst_plain = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
|
||||
char *src_cipher = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
|
||||
char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
|
||||
|
||||
for (i = 0; i < size / copy_size; i++) {
|
||||
TEST_NV_CHECK_RET(uvm_conf_computing_cpu_decrypt(channel,
|
||||
dst_plain + i * copy_size,
|
||||
src_cipher + i * copy_size,
|
||||
decrypt_iv + i,
|
||||
copy_size,
|
||||
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
const UvmCslIv *decrypt_iv,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
{
|
||||
int i;
|
||||
char *dst_plain = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
|
||||
char *src_cipher = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
|
||||
char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
|
||||
|
||||
UVM_ASSERT((size / copy_size) <= INT_MAX);
|
||||
|
||||
// CPU decrypt order is the opposite of the GPU decrypt order
|
||||
for (i = (size / copy_size) - 1; i >= 0; i--) {
|
||||
TEST_NV_CHECK_RET(uvm_conf_computing_cpu_decrypt(channel,
|
||||
dst_plain + i * copy_size,
|
||||
src_cipher + i * copy_size,
|
||||
decrypt_iv + i,
|
||||
copy_size,
|
||||
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// GPU address to use as source or destination in CE decrypt/encrypt operations.
|
||||
// If the uvm_mem backing storage is contiguous in the [offset, offset + size)
|
||||
// interval, the physical address gets priority over the virtual counterpart.
|
||||
static uvm_gpu_address_t gpu_address(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU64 offset, NvU32 size)
|
||||
{
|
||||
uvm_gpu_address_t gpu_virtual_address;
|
||||
|
||||
if (uvm_mem_is_physically_contiguous(mem, offset, size))
|
||||
return uvm_mem_gpu_address_physical(mem, gpu, offset, size);
|
||||
|
||||
gpu_virtual_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu);
|
||||
gpu_virtual_address.address += offset;
|
||||
|
||||
return gpu_virtual_address;
|
||||
}
|
||||
|
||||
// Automatically get the correct address for the authentication tag. The
|
||||
// addressing mode of the tag should match that of the reference address
|
||||
// (destination pointer for GPU encrypt, source pointer for GPU encrypt)
|
||||
static uvm_gpu_address_t auth_tag_gpu_address(uvm_mem_t *auth_tag_mem,
|
||||
uvm_gpu_t *gpu,
|
||||
size_t offset,
|
||||
uvm_gpu_address_t reference)
|
||||
{
|
||||
uvm_gpu_address_t auth_tag_gpu_address;
|
||||
|
||||
if (!reference.is_virtual)
|
||||
return uvm_mem_gpu_address_physical(auth_tag_mem, gpu, offset, UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
|
||||
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(auth_tag_mem, gpu);
|
||||
auth_tag_gpu_address.address += offset;
|
||||
|
||||
return auth_tag_gpu_address;
|
||||
}
|
||||
|
||||
// Note: no membar is issued in any of the GPU transfers (encryptions)
|
||||
static void gpu_encrypt(uvm_push_t *push,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
UvmCslIv *decrypt_iv,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
{
|
||||
size_t i;
|
||||
size_t num_iterations = size / copy_size;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
for (i = 0; i < num_iterations; i++) {
|
||||
uvm_gpu_address_t dst_cipher = gpu_address(dst_mem, gpu, i * copy_size, copy_size);
|
||||
uvm_gpu_address_t src_plain = gpu_address(src_mem, gpu, i * copy_size, copy_size);
|
||||
uvm_gpu_address_t auth_tag = auth_tag_gpu_address(auth_tag_mem,
|
||||
gpu,
|
||||
i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
dst_cipher);
|
||||
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, decrypt_iv);
|
||||
|
||||
if (i > 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
|
||||
gpu->parent->ce_hal->encrypt(push, dst_cipher, src_plain, copy_size, auth_tag);
|
||||
decrypt_iv++;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: no membar is issued in any of the GPU transfers (decryptions)
|
||||
static void gpu_decrypt(uvm_push_t *push,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
{
|
||||
size_t i;
|
||||
size_t num_iterations = size / copy_size;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
for (i = 0; i < num_iterations; i++) {
|
||||
uvm_gpu_address_t dst_plain = gpu_address(dst_mem, gpu, i * copy_size, copy_size);
|
||||
uvm_gpu_address_t src_cipher = gpu_address(src_mem, gpu, i * copy_size, copy_size);
|
||||
uvm_gpu_address_t auth_tag = auth_tag_gpu_address(auth_tag_mem,
|
||||
gpu,
|
||||
i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
src_cipher);
|
||||
|
||||
if (i > 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
|
||||
gpu->parent->ce_hal->decrypt(push, dst_plain, src_cipher, copy_size, auth_tag);
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
uvm_channel_type_t decrypt_channel_type,
|
||||
uvm_channel_type_t encrypt_channel_type,
|
||||
size_t size,
|
||||
NvU32 copy_size,
|
||||
bool decrypt_in_order,
|
||||
bool encrypt_in_order)
|
||||
{
|
||||
uvm_push_t push;
|
||||
NvU64 init_value;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_mem_t *src_plain = NULL;
|
||||
uvm_mem_t *src_cipher = NULL;
|
||||
uvm_mem_t *dst_cipher = NULL;
|
||||
uvm_mem_t *dst_plain_gpu = NULL;
|
||||
uvm_mem_t *dst_plain = NULL;
|
||||
uvm_mem_t *auth_tag_mem = NULL;
|
||||
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
UvmCslIv *decrypt_iv = NULL;
|
||||
UvmCslIv *encrypt_iv = NULL;
|
||||
uvm_tracker_t tracker;
|
||||
size_t src_plain_size;
|
||||
|
||||
TEST_CHECK_RET(copy_size <= size);
|
||||
TEST_CHECK_RET(IS_ALIGNED(size, copy_size));
|
||||
|
||||
uvm_tracker_init(&tracker);
|
||||
|
||||
decrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv));
|
||||
if (!decrypt_iv) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
encrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv));
|
||||
if (!encrypt_iv) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &src_cipher, size), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_vidmem_protected(gpu, &dst_plain_gpu, size), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &dst_cipher, size), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &dst_plain, size), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &auth_tag_mem, auth_tag_buffer_size), out);
|
||||
|
||||
// The plaintext CPU buffer size should fit the initialization value
|
||||
src_plain_size = UVM_ALIGN_UP(size, sizeof(init_value));
|
||||
TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &src_plain, src_plain_size), out);
|
||||
|
||||
// Initialize the plaintext CPU buffer using a value that uniquely
|
||||
// identifies the given inputs
|
||||
TEST_CHECK_GOTO((((NvU64) size) < (1ULL << 63)), out);
|
||||
init_value = ((NvU64) decrypt_in_order << 63) | ((NvU64) size) | ((NvU64) copy_size);
|
||||
write_range_cpu(src_plain, init_value);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin(gpu->channel_manager,
|
||||
decrypt_channel_type,
|
||||
&push,
|
||||
"CPU > GPU decrypt"),
|
||||
out);
|
||||
|
||||
// CPU (decrypted) > CPU (encrypted), using CPU, if in-order
|
||||
// acquire IVs if not in-order
|
||||
if (encrypt_in_order)
|
||||
cpu_encrypt(push.channel, src_cipher, src_plain, auth_tag_mem, size, copy_size);
|
||||
else
|
||||
cpu_acquire_encryption_ivs(push.channel, size, copy_size, encrypt_iv);
|
||||
|
||||
// CPU (encrypted) > GPU (decrypted), using GPU
|
||||
gpu_decrypt(&push, dst_plain_gpu, src_cipher, auth_tag_mem, size, copy_size);
|
||||
|
||||
// Use acquired IVs to encrypt in reverse order
|
||||
if (!encrypt_in_order)
|
||||
cpu_encrypt_rev(push.channel, src_cipher, src_plain, auth_tag_mem, size, copy_size, encrypt_iv);
|
||||
|
||||
uvm_push_end(&push);
|
||||
TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), out);
|
||||
|
||||
// GPU (decrypted) > CPU (encrypted), using GPU
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin_acquire(gpu->channel_manager,
|
||||
encrypt_channel_type,
|
||||
&tracker,
|
||||
&push,
|
||||
"GPU > CPU encrypt"),
|
||||
out);
|
||||
|
||||
gpu_encrypt(&push, dst_cipher, dst_plain_gpu, auth_tag_mem, decrypt_iv, size, copy_size);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
|
||||
|
||||
TEST_CHECK_GOTO(!mem_match(src_plain, src_cipher, size), out);
|
||||
|
||||
TEST_CHECK_GOTO(!mem_match(dst_cipher, src_plain, size), out);
|
||||
|
||||
// CPU (encrypted) > CPU (decrypted), using CPU
|
||||
if (decrypt_in_order) {
|
||||
TEST_NV_CHECK_GOTO(cpu_decrypt_in_order(push.channel,
|
||||
dst_plain,
|
||||
dst_cipher,
|
||||
decrypt_iv,
|
||||
auth_tag_mem,
|
||||
size,
|
||||
copy_size),
|
||||
out);
|
||||
}
|
||||
else {
|
||||
TEST_NV_CHECK_GOTO(cpu_decrypt_out_of_order(push.channel,
|
||||
dst_plain,
|
||||
dst_cipher,
|
||||
decrypt_iv,
|
||||
auth_tag_mem,
|
||||
size,
|
||||
copy_size),
|
||||
out);
|
||||
}
|
||||
|
||||
TEST_CHECK_GOTO(mem_match(src_plain, dst_plain, size), out);
|
||||
|
||||
out:
|
||||
uvm_mem_free(auth_tag_mem);
|
||||
uvm_mem_free(dst_plain);
|
||||
uvm_mem_free(dst_plain_gpu);
|
||||
uvm_mem_free(dst_cipher);
|
||||
uvm_mem_free(src_cipher);
|
||||
uvm_mem_free(src_plain);
|
||||
uvm_tracker_deinit(&tracker);
|
||||
uvm_kvfree(decrypt_iv);
|
||||
uvm_kvfree(encrypt_iv);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_encryption_decryption(uvm_gpu_t *gpu,
|
||||
uvm_channel_type_t decrypt_channel_type,
|
||||
uvm_channel_type_t encrypt_channel_type)
|
||||
{
|
||||
bool cpu_decrypt_in_order = true;
|
||||
bool cpu_encrypt_in_order = true;
|
||||
size_t size[] = {UVM_PAGE_SIZE_4K, UVM_PAGE_SIZE_4K * 2, UVM_PAGE_SIZE_2M};
|
||||
size_t copy_size[] = {UVM_PAGE_SIZE_4K, UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_2M};
|
||||
unsigned i;
|
||||
|
||||
struct {
|
||||
bool encrypt_in_order;
|
||||
bool decrypt_in_order;
|
||||
} orders[] = {{true, true}, {true, false}, {false, true}, {false, false}};
|
||||
|
||||
struct {
|
||||
size_t size;
|
||||
NvU32 copy_size;
|
||||
} small_sizes[] = {{1, 1}, {3, 1}, {8, 1}, {2, 2}, {8, 4}, {UVM_PAGE_SIZE_4K - 8, 8}, {UVM_PAGE_SIZE_4K + 8, 8}};
|
||||
|
||||
// Only Confidential Computing uses CE encryption/decryption
|
||||
if (!uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
// Use a size, and copy size, that are not a multiple of common page sizes.
|
||||
for (i = 0; i < ARRAY_SIZE(small_sizes); ++i) {
|
||||
// Skip tests that need large pushbuffer on WLC. Secure work launch
|
||||
// needs to do at least one decrypt operation so tests that only need
|
||||
// one operation work ok. Tests using more operations might overflow
|
||||
// UVM_MAX_WLC_PUSH_SIZE.
|
||||
if (encrypt_channel_type == UVM_CHANNEL_TYPE_WLC && (small_sizes[i].size / small_sizes[i].copy_size > 1))
|
||||
continue;
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_to_gpu_roundtrip(gpu,
|
||||
decrypt_channel_type,
|
||||
encrypt_channel_type,
|
||||
small_sizes[i].size,
|
||||
small_sizes[i].copy_size,
|
||||
cpu_decrypt_in_order,
|
||||
cpu_encrypt_in_order));
|
||||
}
|
||||
|
||||
// Use sizes, and copy sizes, that are a multiple of common page sizes.
|
||||
// This is the most typical usage of encrypt/decrypt in the UVM driver.
|
||||
for (i = 0; i < ARRAY_SIZE(orders); ++i) {
|
||||
unsigned j;
|
||||
|
||||
cpu_encrypt_in_order = orders[i].encrypt_in_order;
|
||||
cpu_decrypt_in_order = orders[i].decrypt_in_order;
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(size); ++j) {
|
||||
unsigned k;
|
||||
|
||||
for (k = 0; k < ARRAY_SIZE(copy_size); ++k) {
|
||||
if (copy_size[k] > size[j])
|
||||
continue;
|
||||
|
||||
// Skip tests that need large pushbuffer on WLC. Secure work
|
||||
// launch needs to do at least one decrypt operation so tests
|
||||
// that only need one operation work ok. Tests using more
|
||||
// operations might overflow UVM_MAX_WLC_PUSH_SIZE.
|
||||
if (encrypt_channel_type == UVM_CHANNEL_TYPE_WLC && (size[j] / copy_size[k] > 1))
|
||||
continue;
|
||||
|
||||
// There is no difference between in-order and out-of-order
|
||||
// decryption when encrypting once.
|
||||
if ((copy_size[k] == size[j]) && !cpu_decrypt_in_order)
|
||||
continue;
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_to_gpu_roundtrip(gpu,
|
||||
decrypt_channel_type,
|
||||
encrypt_channel_type,
|
||||
size[j],
|
||||
copy_size[k],
|
||||
cpu_decrypt_in_order,
|
||||
cpu_encrypt_in_order));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_ce(uvm_va_space_t *va_space, bool skipTimestampTest)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
@@ -655,9 +1225,13 @@ static NV_STATUS test_ce(uvm_va_space_t *va_space, bool skipTimestampTest)
|
||||
TEST_NV_CHECK_RET(test_memcpy_and_memset(gpu));
|
||||
TEST_NV_CHECK_RET(test_semaphore_reduction_inc(gpu));
|
||||
TEST_NV_CHECK_RET(test_semaphore_release(gpu));
|
||||
|
||||
if (!skipTimestampTest)
|
||||
TEST_NV_CHECK_RET(test_semaphore_timestamp(gpu));
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_RET(test_encryption_decryption(gpu, UVM_CHANNEL_TYPE_CPU_TO_GPU, UVM_CHANNEL_TYPE_GPU_TO_CPU));
|
||||
TEST_NV_CHECK_RET(test_encryption_decryption(gpu, UVM_CHANNEL_TYPE_WLC, UVM_CHANNEL_TYPE_WLC));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -50,6 +50,9 @@
|
||||
#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MIN 32
|
||||
#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX (1024 * 1024)
|
||||
|
||||
// Maximum number of channels per pool.
|
||||
#define UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL UVM_PUSH_MAX_CONCURRENT_PUSHES
|
||||
|
||||
// Semaphore payloads cannot advance too much between calls to
|
||||
// uvm_gpu_tracking_semaphore_update_completed_value(). In practice the jumps
|
||||
// are bound by gpfifo sizing as we have to update the completed value to
|
||||
@@ -61,6 +64,14 @@
|
||||
// uvm_channel.h includes uvm_gpu_semaphore.h.
|
||||
#define UVM_GPU_SEMAPHORE_MAX_JUMP (2 * UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX)
|
||||
|
||||
#define uvm_channel_pool_assert_locked(pool) ( \
|
||||
{ \
|
||||
if (uvm_channel_pool_uses_mutex(pool)) \
|
||||
uvm_assert_mutex_locked(&(pool)->mutex); \
|
||||
else \
|
||||
uvm_assert_spinlock_locked(&(pool)->spinlock); \
|
||||
})
|
||||
|
||||
// Channel types
|
||||
typedef enum
|
||||
{
|
||||
@@ -83,7 +94,27 @@ typedef enum
|
||||
|
||||
// ^^^^^^
|
||||
// Channel types backed by a CE.
|
||||
UVM_CHANNEL_TYPE_COUNT = UVM_CHANNEL_TYPE_CE_COUNT,
|
||||
// ----------------------------------
|
||||
// Channel types not backed by a CE.
|
||||
// vvvvvv
|
||||
|
||||
// SEC2 channels
|
||||
UVM_CHANNEL_TYPE_SEC2 = UVM_CHANNEL_TYPE_CE_COUNT,
|
||||
|
||||
// ----------------------------------
|
||||
// Channel type with fixed schedules
|
||||
|
||||
// Work Launch Channel (WLC) is a specialized channel for launching work on
|
||||
// other channels when the Confidential Computing is feature enabled. It is
|
||||
// paired with LCIC (below)
|
||||
UVM_CHANNEL_TYPE_WLC,
|
||||
|
||||
// Launch Confirmation Indicator Channel (LCIC) is a specialized channel
|
||||
// with fixed schedule. It gets triggered by executing WLC work, and makes
|
||||
// sure that WLC get/put pointers are up-to-date.
|
||||
UVM_CHANNEL_TYPE_LCIC,
|
||||
|
||||
UVM_CHANNEL_TYPE_COUNT,
|
||||
} uvm_channel_type_t;
|
||||
|
||||
typedef enum
|
||||
@@ -101,7 +132,15 @@ typedef enum
|
||||
// There is a single proxy pool and channel per GPU.
|
||||
UVM_CHANNEL_POOL_TYPE_CE_PROXY = (1 << 1),
|
||||
|
||||
UVM_CHANNEL_POOL_TYPE_COUNT = 2,
|
||||
// A pool of SEC2 channels owned by UVM. These channels are backed by a SEC2
|
||||
// engine.
|
||||
UVM_CHANNEL_POOL_TYPE_SEC2 = (1 << 2),
|
||||
|
||||
UVM_CHANNEL_POOL_TYPE_WLC = (1 << 3),
|
||||
|
||||
UVM_CHANNEL_POOL_TYPE_LCIC = (1 << 4),
|
||||
|
||||
UVM_CHANNEL_POOL_TYPE_COUNT = 5,
|
||||
|
||||
// A mask used to select pools of any type.
|
||||
UVM_CHANNEL_POOL_TYPE_MASK = ((1U << UVM_CHANNEL_POOL_TYPE_COUNT) - 1)
|
||||
@@ -125,16 +164,24 @@ struct uvm_gpfifo_entry_struct
|
||||
// this entry.
|
||||
NvU64 tracking_semaphore_value;
|
||||
|
||||
union {
|
||||
struct {
|
||||
// Offset of the pushbuffer in the pushbuffer allocation used by
|
||||
// this entry.
|
||||
NvU32 pushbuffer_offset;
|
||||
|
||||
// Size of the pushbuffer used for this entry.
|
||||
NvU32 pushbuffer_size;
|
||||
};
|
||||
|
||||
// Value of control entry
|
||||
// Exact value of GPFIFO entry copied directly to GPFIFO[PUT] location.
|
||||
NvU64 control_value;
|
||||
};
|
||||
|
||||
// The following fields are only valid when type is
|
||||
// UVM_GPFIFO_ENTRY_TYPE_NORMAL.
|
||||
|
||||
// Offset of the pushbuffer in the pushbuffer allocation used by
|
||||
// this entry.
|
||||
NvU32 pushbuffer_offset;
|
||||
|
||||
// Size of the pushbuffer used for this entry.
|
||||
NvU32 pushbuffer_size;
|
||||
|
||||
// List node used by the pushbuffer tracking
|
||||
struct list_head pending_list_node;
|
||||
|
||||
@@ -149,6 +196,19 @@ typedef struct
|
||||
// Owning channel manager
|
||||
uvm_channel_manager_t *manager;
|
||||
|
||||
// On Volta+ GPUs, all channels in a pool are members of the same TSG, i.e.,
|
||||
// num_tsgs is 1. Pre-Volta GPUs also have a single TSG object, but since HW
|
||||
// does not support TSG for CE engines, a HW TSG is not created, but a TSG
|
||||
// object is required to allocate channels.
|
||||
// When Confidential Computing mode is enabled, the WLC and LCIC channel
|
||||
// types require one TSG for each WLC/LCIC pair of channels. In this case,
|
||||
// we do not use a TSG per channel pool, but instead a TSG per WLC/LCIC
|
||||
// channel pair, num_tsgs equals to the number of channel pairs.
|
||||
uvmGpuTsgHandle *tsg_handles;
|
||||
|
||||
// Number TSG handles owned by this pool.
|
||||
NvU32 num_tsgs;
|
||||
|
||||
// Channels in this pool
|
||||
uvm_channel_t *channels;
|
||||
|
||||
@@ -162,8 +222,27 @@ typedef struct
|
||||
// Pool type: Refer to the uvm_channel_pool_type_t enum.
|
||||
uvm_channel_pool_type_t pool_type;
|
||||
|
||||
// Lock protecting the state of channels in the pool
|
||||
uvm_spinlock_t lock;
|
||||
// Lock protecting the state of channels in the pool.
|
||||
//
|
||||
// There are two pool lock types available: spinlock and mutex. The mutex
|
||||
// variant is required when the thread holding the pool lock must sleep
|
||||
// (ex: acquire another mutex) deeper in the call stack, either in UVM or
|
||||
// RM.
|
||||
union {
|
||||
uvm_spinlock_t spinlock;
|
||||
uvm_mutex_t mutex;
|
||||
};
|
||||
|
||||
// Secure operations require that uvm_push_begin order matches
|
||||
// uvm_push_end order, because the engine's state is used in its internal
|
||||
// operation and each push may modify this state. push_locks is protected by
|
||||
// the channel pool lock.
|
||||
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
|
||||
// Counting semaphore for available and unlocked channels, it must be
|
||||
// acquired before submitting work to a channel when the Confidential
|
||||
// Computing feature is enabled.
|
||||
uvm_semaphore_t push_sem;
|
||||
} uvm_channel_pool_t;
|
||||
|
||||
struct uvm_channel_struct
|
||||
@@ -214,6 +293,74 @@ struct uvm_channel_struct
|
||||
// uvm_channel_end_push().
|
||||
uvm_gpu_tracking_semaphore_t tracking_sem;
|
||||
|
||||
struct
|
||||
{
|
||||
// Secure operations require that uvm_push_begin order matches
|
||||
// uvm_push_end order, because the engine's state is used in
|
||||
// its internal operation and each push may modify this state.
|
||||
uvm_mutex_t push_lock;
|
||||
|
||||
// When the Confidential Computing feature is enabled, every channel has
|
||||
// cryptographic state in HW, which is mirrored here for CPU-side
|
||||
// operations.
|
||||
UvmCslContext ctx;
|
||||
bool is_ctx_initialized;
|
||||
|
||||
// CPU-side CSL crypto operations which operate on the same CSL state
|
||||
// are not thread-safe, so they must be wrapped in locks at the UVM
|
||||
// level. Encryption, decryption and logging operations must be
|
||||
// protected with the ctx_lock.
|
||||
uvm_mutex_t ctx_lock;
|
||||
} csl;
|
||||
|
||||
struct
|
||||
{
|
||||
// The value of GPU side PUT index.
|
||||
// Indirect work submission introduces delay between updating the CPU
|
||||
// put when ending a push, and updating the GPU visible value via
|
||||
// indirect work launch. It is used to order multiple pending indirect
|
||||
// work launches to match the order of push end-s that triggered them.
|
||||
volatile NvU32 gpu_put;
|
||||
|
||||
// Static pushbuffer for channels with static schedule (WLC/LCIC)
|
||||
uvm_rm_mem_t *static_pb_protected_vidmem;
|
||||
|
||||
// Static pushbuffer staging buffer for WLC
|
||||
uvm_rm_mem_t *static_pb_unprotected_sysmem;
|
||||
void *static_pb_unprotected_sysmem_cpu;
|
||||
void *static_pb_unprotected_sysmem_auth_tag_cpu;
|
||||
|
||||
// The above static locations are required by the WLC (and LCIC)
|
||||
// schedule. Protected sysmem location completes WLC's independence
|
||||
// from the pushbuffer allocator.
|
||||
void *static_pb_protected_sysmem;
|
||||
|
||||
// Static tracking semaphore notifier values
|
||||
// Because of LCIC's fixed schedule, the secure semaphore release
|
||||
// mechanism uses two additional static locations for incrementing the
|
||||
// notifier values. See:
|
||||
// . channel_semaphore_secure_release()
|
||||
// . setup_lcic_schedule()
|
||||
// . internal_channel_submit_work_wlc()
|
||||
uvm_rm_mem_t *static_notifier_unprotected_sysmem;
|
||||
NvU32 *static_notifier_entry_unprotected_sysmem_cpu;
|
||||
NvU32 *static_notifier_exit_unprotected_sysmem_cpu;
|
||||
uvm_gpu_address_t static_notifier_entry_unprotected_sysmem_gpu_va;
|
||||
uvm_gpu_address_t static_notifier_exit_unprotected_sysmem_gpu_va;
|
||||
|
||||
// Explicit location for push launch tag used by WLC.
|
||||
// Encryption auth tags have to be located in unprotected sysmem.
|
||||
void *launch_auth_tag_cpu;
|
||||
NvU64 launch_auth_tag_gpu_va;
|
||||
|
||||
// Used to decrypt the push back to protected sysmem.
|
||||
// This happens when profilers register callbacks for migration data.
|
||||
uvm_push_crypto_bundle_t *push_crypto_bundles;
|
||||
|
||||
// Accompanying authentication tags for the crypto bundles
|
||||
uvm_rm_mem_t *push_crypto_bundle_auth_tags;
|
||||
} conf_computing;
|
||||
|
||||
// RM channel information
|
||||
union
|
||||
{
|
||||
@@ -271,7 +418,7 @@ struct uvm_channel_manager_struct
|
||||
unsigned num_channel_pools;
|
||||
|
||||
// Mask containing the indexes of the usable Copy Engines. Each usable CE
|
||||
// has a pool associated with it, see channel_manager_ce_pool
|
||||
// has at least one pool associated with it.
|
||||
DECLARE_BITMAP(ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
struct
|
||||
@@ -309,18 +456,71 @@ struct uvm_channel_manager_struct
|
||||
// Create a channel manager for the GPU
|
||||
NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
|
||||
|
||||
static bool uvm_pool_type_is_valid(uvm_channel_pool_type_t pool_type)
|
||||
{
|
||||
return (is_power_of_2(pool_type) && (pool_type < UVM_CHANNEL_POOL_TYPE_MASK));
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_sec2(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
|
||||
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_SEC2);
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_wlc(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
|
||||
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_WLC);
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_lcic(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
|
||||
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_LCIC);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_sec2(uvm_channel_t *channel)
|
||||
{
|
||||
return uvm_channel_pool_is_sec2(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_wlc(uvm_channel_t *channel)
|
||||
{
|
||||
return uvm_channel_pool_is_wlc(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_lcic(uvm_channel_t *channel)
|
||||
{
|
||||
return uvm_channel_pool_is_lcic(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
|
||||
return pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_proxy(uvm_channel_t *channel)
|
||||
{
|
||||
UVM_ASSERT(channel->pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
return channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
|
||||
return uvm_channel_pool_is_proxy(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool)
|
||||
{
|
||||
return !uvm_channel_pool_is_sec2(pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_ce(uvm_channel_t *channel)
|
||||
{
|
||||
UVM_ASSERT(channel->pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
return (channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_is_proxy(channel);
|
||||
return uvm_channel_pool_is_ce(channel->pool);
|
||||
}
|
||||
|
||||
bool uvm_channel_pool_uses_mutex(uvm_channel_pool_t *pool);
|
||||
|
||||
// Proxy channels are used to push page tree related methods, so their channel
|
||||
// type is UVM_CHANNEL_TYPE_MEMOPS.
|
||||
static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
|
||||
@@ -375,6 +575,13 @@ NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager
|
||||
// beginning.
|
||||
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager);
|
||||
|
||||
// Check if WLC/LCIC mechanism is ready/setup
|
||||
// Should only return false during initialization
|
||||
static bool uvm_channel_manager_is_wlc_ready(uvm_channel_manager_t *manager)
|
||||
{
|
||||
return (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] != NULL) &&
|
||||
(manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] != NULL);
|
||||
}
|
||||
// Get the GPU VA of semaphore_channel's tracking semaphore within the VA space
|
||||
// associated with access_channel.
|
||||
//
|
||||
@@ -435,6 +642,10 @@ NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_
|
||||
const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type);
|
||||
const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type);
|
||||
|
||||
// Returns the number of available GPFIFO entries. The function internally
|
||||
// acquires the channel pool lock.
|
||||
NvU32 uvm_channel_get_available_gpfifo_entries(uvm_channel_t *channel);
|
||||
|
||||
void uvm_channel_print_pending_pushes(uvm_channel_t *channel);
|
||||
|
||||
static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
|
||||
@@ -442,6 +653,11 @@ static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
|
||||
return channel->pool->manager->gpu;
|
||||
}
|
||||
|
||||
static uvm_pushbuffer_t *uvm_channel_get_pushbuffer(uvm_channel_t *channel)
|
||||
{
|
||||
return channel->pool->manager->pushbuffer;
|
||||
}
|
||||
|
||||
// Index of a channel within the owning pool
|
||||
static unsigned uvm_channel_index_in_pool(const uvm_channel_t *channel)
|
||||
{
|
||||
|
||||
@@ -60,6 +60,11 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
TEST_CHECK_RET(gpu != NULL);
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_all(gpu, UVM_RM_MEM_TYPE_SYS, buffer_size, 0, &mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
@@ -69,7 +74,7 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Initial memset");
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel));
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel)).address;
|
||||
|
||||
// Semaphore release as part of uvm_push_end() will do the membar
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
@@ -104,7 +109,7 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)
|
||||
value + 1);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
gpu_va_base = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel));
|
||||
gpu_va_base = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel)).address;
|
||||
gpu_va_src = gpu_va_base + (value % values_count) * sizeof(NvU32);
|
||||
gpu_va_dst = gpu_va_base + ((value + 1) % values_count) * sizeof(NvU32);
|
||||
|
||||
@@ -153,7 +158,6 @@ done:
|
||||
|
||||
static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
@@ -168,11 +172,12 @@ static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
|
||||
completed_value = uvm_channel_update_completed_value(channel);
|
||||
uvm_gpu_semaphore_set_payload(&channel->tracking_sem.semaphore, (NvU32)completed_value + 1);
|
||||
|
||||
TEST_CHECK_RET(uvm_global_get_status() == NV_OK);
|
||||
TEST_NV_CHECK_RET(uvm_global_get_status());
|
||||
uvm_channel_update_progress_all(channel);
|
||||
TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE);
|
||||
|
||||
uvm_channel_manager_destroy(gpu->channel_manager);
|
||||
|
||||
// Destruction will hit the error again, so clear one more time.
|
||||
uvm_global_reset_fatal_error();
|
||||
|
||||
@@ -200,6 +205,9 @@ static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu)
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
for (i = 0; i < 512; ++i) {
|
||||
@@ -341,8 +349,8 @@ static void snapshot_counter(uvm_push_t *push,
|
||||
return;
|
||||
|
||||
is_proxy_channel = uvm_channel_is_proxy(push->channel);
|
||||
counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel);
|
||||
snapshot_gpu_va = uvm_rm_mem_get_gpu_va(snapshot_mem, gpu, is_proxy_channel) + index * 2 * sizeof(NvU32);
|
||||
counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel).address;
|
||||
snapshot_gpu_va = uvm_rm_mem_get_gpu_va(snapshot_mem, gpu, is_proxy_channel).address + index * 2 * sizeof(NvU32);
|
||||
|
||||
// Copy the last and first counter to a snapshot for later verification.
|
||||
|
||||
@@ -367,7 +375,7 @@ static void set_counter(uvm_push_t *push, uvm_rm_mem_t *counter_mem, NvU32 value
|
||||
bool is_proxy_channel;
|
||||
|
||||
is_proxy_channel = uvm_channel_is_proxy(push->channel);
|
||||
counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel);
|
||||
counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel).address;
|
||||
|
||||
gpu->parent->ce_hal->memset_v_4(push, counter_gpu_va, value, count * sizeof(NvU32));
|
||||
}
|
||||
@@ -427,7 +435,7 @@ static void test_memset_rm_mem(uvm_push_t *push, uvm_rm_mem_t *rm_mem, NvU32 val
|
||||
UVM_ASSERT(rm_mem->size % 4 == 0);
|
||||
|
||||
gpu = uvm_push_get_gpu(push);
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_channel_is_proxy(push->channel));
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_channel_is_proxy(push->channel)).address;
|
||||
|
||||
gpu->parent->ce_hal->memset_v_4(push, gpu_va, value, rm_mem->size);
|
||||
}
|
||||
@@ -672,6 +680,72 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
// The following test is inspired by uvm_push_test.c:test_concurrent_pushes.
|
||||
// This test verifies that concurrent pushes using the same channel pool
|
||||
// select different channels, when the Confidential Computing feature is
|
||||
// enabled.
|
||||
NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_channel_pool_t *pool;
|
||||
uvm_push_t *pushes;
|
||||
uvm_gpu_t *gpu;
|
||||
NvU32 i;
|
||||
NvU32 num_pushes;
|
||||
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
|
||||
if (!uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
uvm_thread_context_lock_disable_tracking();
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_type_t channel_type;
|
||||
|
||||
for (channel_type = 0; channel_type < UVM_CHANNEL_TYPE_COUNT; channel_type++) {
|
||||
pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
|
||||
TEST_CHECK_RET(pool != NULL);
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
|
||||
if (pool->num_channels < 2)
|
||||
continue;
|
||||
|
||||
num_pushes = min(pool->num_channels, (NvU32)UVM_PUSH_MAX_CONCURRENT_PUSHES);
|
||||
|
||||
pushes = uvm_kvmalloc_zero(sizeof(*pushes) * num_pushes);
|
||||
TEST_CHECK_RET(pushes != NULL);
|
||||
|
||||
for (i = 0; i < num_pushes; i++) {
|
||||
uvm_push_t *push = &pushes[i];
|
||||
status = uvm_push_begin(gpu->channel_manager, channel_type, push, "concurrent push %u", i);
|
||||
TEST_NV_CHECK_GOTO(status, error);
|
||||
if (i > 0)
|
||||
TEST_CHECK_GOTO(pushes[i-1].channel != push->channel, error);
|
||||
}
|
||||
for (i = 0; i < num_pushes; i++) {
|
||||
uvm_push_t *push = &pushes[i];
|
||||
status = uvm_push_end_and_wait(push);
|
||||
TEST_NV_CHECK_GOTO(status, error);
|
||||
}
|
||||
|
||||
uvm_kvfree(pushes);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_thread_context_lock_enable_tracking();
|
||||
|
||||
return status;
|
||||
error:
|
||||
uvm_thread_context_lock_enable_tracking();
|
||||
uvm_kvfree(pushes);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
@@ -683,6 +757,14 @@ NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
|
||||
// Skip WLC channels as those can't accept ctrl gpfifos
|
||||
// after their schedule is set up
|
||||
if (uvm_channel_pool_is_wlc(pool))
|
||||
continue;
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
|
||||
@@ -714,6 +796,14 @@ NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
|
||||
// Skip WLC channels as those can't accept ctrl gpfifos
|
||||
// after their schedule is set up
|
||||
if (uvm_channel_pool_is_wlc(pool))
|
||||
continue;
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
uvm_push_t push;
|
||||
@@ -743,22 +833,6 @@ NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NvU32 get_available_gpfifo_entries(uvm_channel_t *channel)
|
||||
{
|
||||
NvU32 pending_entries;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
|
||||
if (channel->cpu_put >= channel->gpu_get)
|
||||
pending_entries = channel->cpu_put - channel->gpu_get;
|
||||
else
|
||||
pending_entries = channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get;
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
|
||||
return channel->num_gpfifo_entries - pending_entries - 1;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@@ -771,9 +845,15 @@ NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
NvU64 entry;
|
||||
uvm_push_t push;
|
||||
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_manager_t *manager = gpu->channel_manager;
|
||||
gpu = manager->gpu;
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(*cpu_ptr), 0, &mem));
|
||||
cpu_ptr = uvm_rm_mem_get_cpu_va(mem);
|
||||
@@ -791,6 +871,12 @@ NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
gpu->parent->host_hal->semaphore_acquire(&push, gpu_va, 1);
|
||||
uvm_push_end(&push);
|
||||
|
||||
// Flush all completed entries from the GPFIFO ring buffer. This test
|
||||
// requires this flush because we verify (below with
|
||||
// uvm_channel_get_available_gpfifo_entries) the number of free entries
|
||||
// in the channel.
|
||||
uvm_channel_update_progress_all(channel);
|
||||
|
||||
// Populate the remaining GPFIFO entries, leaving 2 slots available.
|
||||
// 2 available entries + 1 semaphore acquire (above) + 1 spare entry to
|
||||
// indicate a terminal condition for the GPFIFO ringbuffer, therefore we
|
||||
@@ -800,7 +886,7 @@ NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
uvm_push_end(&push);
|
||||
}
|
||||
|
||||
TEST_CHECK_GOTO(get_available_gpfifo_entries(channel) == 2, error);
|
||||
TEST_CHECK_GOTO(uvm_channel_get_available_gpfifo_entries(channel) == 2, error);
|
||||
|
||||
// We should have room for the control GPFIFO and the subsequent
|
||||
// semaphore release.
|
||||
@@ -859,6 +945,9 @@ static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
uvm_push_t push;
|
||||
@@ -906,6 +995,10 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
status = test_conf_computing_channel_selection(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// The following tests have side effects, they reset the GPU's
|
||||
// channel_manager.
|
||||
status = test_channel_pushbuffer_extension_base(va_space);
|
||||
@@ -936,7 +1029,7 @@ done:
|
||||
static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
|
||||
const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (params->iterations == 0 || params->num_streams == 0)
|
||||
return NV_ERR_INVALID_PARAMETER;
|
||||
@@ -946,13 +1039,16 @@ static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
uvm_va_space_down_read_rm(va_space);
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(uvm_va_space_find_first_gpu(va_space)))
|
||||
goto done;
|
||||
|
||||
status = stress_test_all_gpus_in_va(va_space,
|
||||
params->num_streams,
|
||||
params->iterations,
|
||||
params->seed,
|
||||
params->verbose);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
done:
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
|
||||
@@ -34,7 +34,7 @@ static int uvm_debug_prints = UVM_IS_DEBUG() || UVM_IS_DEVELOP();
|
||||
module_param(uvm_debug_prints, int, S_IRUGO|S_IWUSR);
|
||||
MODULE_PARM_DESC(uvm_debug_prints, "Enable uvm debug prints.");
|
||||
|
||||
bool uvm_debug_prints_enabled()
|
||||
bool uvm_debug_prints_enabled(void)
|
||||
{
|
||||
return uvm_debug_prints != 0;
|
||||
}
|
||||
|
||||
@@ -211,6 +211,11 @@ static inline NvBool uvm_uuid_is_cpu(const NvProcessorUuid *uuid)
|
||||
{
|
||||
return memcmp(uuid, &NV_PROCESSOR_UUID_CPU_DEFAULT, sizeof(*uuid)) == 0;
|
||||
}
|
||||
#define UVM_SIZE_1KB (1024ULL)
|
||||
#define UVM_SIZE_1MB (1024 * UVM_SIZE_1KB)
|
||||
#define UVM_SIZE_1GB (1024 * UVM_SIZE_1MB)
|
||||
#define UVM_SIZE_1TB (1024 * UVM_SIZE_1GB)
|
||||
#define UVM_SIZE_1PB (1024 * UVM_SIZE_1TB)
|
||||
|
||||
#define UVM_ALIGN_DOWN(x, a) ({ \
|
||||
typeof(x) _a = a; \
|
||||
@@ -347,6 +352,22 @@ typedef struct
|
||||
NvHandle user_object;
|
||||
} uvm_rm_user_object_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING,
|
||||
UVM_FD_VA_SPACE,
|
||||
UVM_FD_MM,
|
||||
UVM_FD_COUNT
|
||||
} uvm_fd_type_t;
|
||||
|
||||
// This should be large enough to fit the valid values from uvm_fd_type_t above.
|
||||
// Note we can't use order_base_2(UVM_FD_COUNT) to define this because our code
|
||||
// coverage tool fails due when the preprocessor expands that to a huge mess of
|
||||
// ternary operators.
|
||||
#define UVM_FD_TYPE_BITS 2
|
||||
#define UVM_FD_TYPE_MASK ((1UL << UVM_FD_TYPE_BITS) - 1)
|
||||
|
||||
// Macro used to compare two values for types that support less than operator.
|
||||
// It returns -1 if a < b, 1 if a > b and 0 if a == 0
|
||||
#define UVM_CMP_DEFAULT(a,b) \
|
||||
@@ -369,6 +390,14 @@ typedef struct
|
||||
// file. A NULL input returns false.
|
||||
bool uvm_file_is_nvidia_uvm(struct file *filp);
|
||||
|
||||
// Returns the type of data filp->private_data contains to and if ptr_val !=
|
||||
// NULL returns the value of the pointer.
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val);
|
||||
|
||||
// Returns the pointer stored in filp->private_data if the type
|
||||
// matches, otherwise returns NULL.
|
||||
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type);
|
||||
|
||||
// Reads the first word in the supplied struct page.
|
||||
static inline void uvm_touch_page(struct page *page)
|
||||
{
|
||||
@@ -381,4 +410,7 @@ static inline void uvm_touch_page(struct page *page)
|
||||
kunmap(page);
|
||||
}
|
||||
|
||||
// Return true if the VMA is one used by UVM managed allocations.
|
||||
bool uvm_vma_is_managed(struct vm_area_struct *vma);
|
||||
|
||||
#endif /* _UVM_COMMON_H */
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user