mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-27 11:39:46 +00:00
Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
84715148b4 | ||
|
|
fc4452f892 | ||
|
|
986b3fd1e9 | ||
|
|
db2866126e | ||
|
|
988184c0e5 | ||
|
|
621ebd6965 | ||
|
|
e9fbb541d1 | ||
|
|
a553e6b6a2 | ||
|
|
1a970827d6 | ||
|
|
65bd98c238 | ||
|
|
60c0a71321 | ||
|
|
9594cc0169 | ||
|
|
5f40a5aee5 | ||
|
|
758b4ee818 | ||
|
|
7c345b838b | ||
|
|
90eb10774f |
61
CHANGELOG.md
61
CHANGELOG.md
@@ -1,5 +1,64 @@
|
||||
# Changelog
|
||||
|
||||
## Release 525 Entries
|
||||
|
||||
### [525.89.02] 2023-02-08
|
||||
|
||||
### [525.85.12] 2023-01-30
|
||||
|
||||
### [525.85.05] 2023-01-19
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix build problems with Clang 15.0, [#377](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/377) by @ptr1337
|
||||
|
||||
### [525.78.01] 2023-01-05
|
||||
|
||||
### [525.60.13] 2022-12-05
|
||||
|
||||
### [525.60.11] 2022-11-28
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed nvenc compatibility with usermode clients [#104](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/104)
|
||||
|
||||
### [525.53] 2022-11-10
|
||||
|
||||
#### Changed
|
||||
|
||||
- GSP firmware is now distributed as multiple firmware files: this release has `gsp_tu10x.bin` and `gsp_ad10x.bin` replacing `gsp.bin` from previous releases.
|
||||
- Each file is named after a GPU architecture and supports GPUs from one or more architectures. This allows GSP firmware to better leverage each architecture's capabilities.
|
||||
- The .run installer will continue to install firmware to `/lib/firmware/nvidia/<version>` and the `nvidia.ko` kernel module will load the appropriate firmware for each GPU at runtime.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Add support for IBT (indirect branch tracking) on supported platforms, [#256](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/256) by @rnd-ash
|
||||
- Return EINVAL when [failing to] allocating memory, [#280](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/280) by @YusufKhan-gamedev
|
||||
- Fix various typos in nvidia/src/kernel, [#16](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/16) by @alexisgeoffrey
|
||||
- Added support for rotation in X11, Quadro Sync, Stereo, and YUV 4:2:0 on Turing.
|
||||
|
||||
## Release 520 Entries
|
||||
|
||||
### [520.61.07] 2022-10-20
|
||||
|
||||
### [520.56.06] 2022-10-12
|
||||
|
||||
#### Added
|
||||
|
||||
- Introduce support for GeForce RTX 4090 GPUs.
|
||||
|
||||
### [520.61.05] 2022-10-10
|
||||
|
||||
#### Added
|
||||
|
||||
- Introduce support for NVIDIA H100 GPUs.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix/Improve Makefile, [#308](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/308/) by @izenynn
|
||||
- Make nvLogBase2 more efficient, [#177](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/177/) by @DMaroo
|
||||
- nv-pci: fixed always true expression, [#195](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/195/) by @ValZapod
|
||||
|
||||
## Release 515 Entries
|
||||
|
||||
### [515.76] 2022-09-20
|
||||
@@ -9,6 +68,8 @@
|
||||
- Improved compatibility with new Linux kernel releases
|
||||
- Fixed possible excessive GPU power draw on an idle X11 or Wayland desktop when driving high resolutions or refresh rates
|
||||
|
||||
### [515.65.07] 2022-10-19
|
||||
|
||||
### [515.65.01] 2022-08-02
|
||||
|
||||
#### Fixed
|
||||
|
||||
26
Makefile
26
Makefile
@@ -6,9 +6,9 @@
|
||||
# To install the build kernel modules: run (as root) `make modules_install`
|
||||
###########################################################################
|
||||
|
||||
include utils.mk
|
||||
|
||||
all: modules
|
||||
###########################################################################
|
||||
# variables
|
||||
###########################################################################
|
||||
|
||||
nv_kernel_o = src/nvidia/$(OUTPUTDIR)/nv-kernel.o
|
||||
nv_kernel_o_binary = kernel-open/nvidia/nv-kernel.o_binary
|
||||
@@ -16,13 +16,20 @@ nv_kernel_o_binary = kernel-open/nvidia/nv-kernel.o_binary
|
||||
nv_modeset_kernel_o = src/nvidia-modeset/$(OUTPUTDIR)/nv-modeset-kernel.o
|
||||
nv_modeset_kernel_o_binary = kernel-open/nvidia-modeset/nv-modeset-kernel.o_binary
|
||||
|
||||
.PHONY: $(nv_kernel_o) $(nv_modeset_kernel_o) modules modules_install
|
||||
###########################################################################
|
||||
# rules
|
||||
###########################################################################
|
||||
|
||||
include utils.mk
|
||||
|
||||
.PHONY: all
|
||||
all: modules
|
||||
|
||||
###########################################################################
|
||||
# nv-kernel.o is the OS agnostic portion of nvidia.ko
|
||||
###########################################################################
|
||||
|
||||
.PHONY: $(nv_kernel_o)
|
||||
$(nv_kernel_o):
|
||||
$(MAKE) -C src/nvidia
|
||||
|
||||
@@ -34,6 +41,7 @@ $(nv_kernel_o_binary): $(nv_kernel_o)
|
||||
# nv-modeset-kernel.o is the OS agnostic portion of nvidia-modeset.ko
|
||||
###########################################################################
|
||||
|
||||
.PHONY: $(nv_modeset_kernel_o)
|
||||
$(nv_modeset_kernel_o):
|
||||
$(MAKE) -C src/nvidia-modeset
|
||||
|
||||
@@ -46,31 +54,33 @@ $(nv_modeset_kernel_o_binary): $(nv_modeset_kernel_o)
|
||||
# the kernel modules with kbuild.
|
||||
###########################################################################
|
||||
|
||||
.PHONY: modules
|
||||
modules: $(nv_kernel_o_binary) $(nv_modeset_kernel_o_binary)
|
||||
$(MAKE) -C kernel-open modules
|
||||
|
||||
|
||||
###########################################################################
|
||||
# Install the built kernel modules using kbuild.
|
||||
###########################################################################
|
||||
|
||||
.PHONY: modules_install
|
||||
modules_install:
|
||||
$(MAKE) -C kernel-open modules_install
|
||||
|
||||
|
||||
###########################################################################
|
||||
# clean
|
||||
###########################################################################
|
||||
|
||||
.PHONY: clean nvidia.clean nvidia-modeset.clean kernel-open.clean
|
||||
|
||||
.PHONY: clean
|
||||
clean: nvidia.clean nvidia-modeset.clean kernel-open.clean
|
||||
|
||||
.PHONY: nvidia.clean
|
||||
nvidia.clean:
|
||||
$(MAKE) -C src/nvidia clean
|
||||
|
||||
.PHONY: nvidia-modeset.clean
|
||||
nvidia-modeset.clean:
|
||||
$(MAKE) -C src/nvidia-modeset clean
|
||||
|
||||
.PHONY: kernel-open.clean
|
||||
kernel-open.clean:
|
||||
$(MAKE) -C kernel-open clean
|
||||
|
||||
75
README.md
75
README.md
@@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 515.76.
|
||||
version 525.47.26.
|
||||
|
||||
|
||||
## How to Build
|
||||
@@ -15,9 +15,9 @@ as root:
|
||||
|
||||
make modules_install -j$(nproc)
|
||||
|
||||
Note that the kernel modules built here must be used with gsp.bin
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
515.76 driver release. This can be achieved by installing
|
||||
525.47.26 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@@ -167,7 +167,7 @@ for the target kernel.
|
||||
## Compatible GPUs
|
||||
|
||||
The open-gpu-kernel-modules can be used on any Turing or later GPU
|
||||
(see the table below). However, in the 515.76 release,
|
||||
(see the table below). However, in the 525.47.26 release,
|
||||
GeForce and Workstation support is still considered alpha-quality.
|
||||
|
||||
To enable use of the open kernel modules on GeForce and Workstation GPUs,
|
||||
@@ -175,7 +175,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
|
||||
parameter to 1. For more details, see the NVIDIA GPU driver end user
|
||||
README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/515.76/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/525.47.26/README/kernel_open.html
|
||||
|
||||
In the below table, if three IDs are listed, the first is the PCI Device
|
||||
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
|
||||
@@ -645,13 +645,23 @@ Subsystem Device ID.
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 147F |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1622 |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1623 |
|
||||
| NVIDIA PG506-242 | 20B3 10DE 14A7 |
|
||||
| NVIDIA PG506-243 | 20B3 10DE 14A8 |
|
||||
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A7 |
|
||||
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A8 |
|
||||
| NVIDIA A100 80GB PCIe | 20B5 10DE 1533 |
|
||||
| NVIDIA A100 80GB PCIe | 20B5 10DE 1642 |
|
||||
| NVIDIA PG506-232 | 20B6 10DE 1492 |
|
||||
| NVIDIA A30 | 20B7 10DE 1532 |
|
||||
| NVIDIA A100-PCIE-40GB | 20F1 10DE 145F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179C |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179D |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179E |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A0 |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A1 |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A2 |
|
||||
| NVIDIA A800 80GB PCIe | 20F5 10DE 1799 |
|
||||
| NVIDIA A800 80GB PCIe LC | 20F5 10DE 179A |
|
||||
| NVIDIA GeForce GTX 1660 Ti | 2182 |
|
||||
| NVIDIA GeForce GTX 1660 | 2184 |
|
||||
| NVIDIA GeForce GTX 1650 SUPER | 2187 |
|
||||
@@ -685,6 +695,7 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 3090 Ti | 2203 |
|
||||
| NVIDIA GeForce RTX 3090 | 2204 |
|
||||
| NVIDIA GeForce RTX 3080 | 2206 |
|
||||
| NVIDIA GeForce RTX 3070 Ti | 2207 |
|
||||
| NVIDIA GeForce RTX 3080 Ti | 2208 |
|
||||
| NVIDIA GeForce RTX 3080 | 220A |
|
||||
| NVIDIA CMP 90HX | 220D |
|
||||
@@ -709,6 +720,13 @@ Subsystem Device ID.
|
||||
| NVIDIA A10 | 2236 10DE 1482 |
|
||||
| NVIDIA A10G | 2237 10DE 152F |
|
||||
| NVIDIA A10M | 2238 10DE 1677 |
|
||||
| NVIDIA H800 PCIe | 2322 10DE 17A4 |
|
||||
| NVIDIA H800 | 2324 10DE 17A6 |
|
||||
| NVIDIA H800 | 2324 10DE 17A8 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
|
||||
| NVIDIA H100 PCIe | 2331 10DE 1626 |
|
||||
| NVIDIA H100 | 2339 10DE 17FC |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 2414 |
|
||||
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
|
||||
| NVIDIA RTX A5500 Laptop GPU | 2438 |
|
||||
@@ -736,6 +754,8 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX A3000 12GB Laptop GPU | 24B9 |
|
||||
| NVIDIA RTX A4500 Laptop GPU | 24BA |
|
||||
| NVIDIA RTX A3000 12GB Laptop GPU | 24BB |
|
||||
| NVIDIA GeForce RTX 3060 | 24C7 |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 24C9 |
|
||||
| NVIDIA GeForce RTX 3080 Laptop GPU | 24DC |
|
||||
| NVIDIA GeForce RTX 3070 Laptop GPU | 24DD |
|
||||
| NVIDIA GeForce RTX 3070 Ti Laptop GPU | 24E0 |
|
||||
@@ -751,12 +771,14 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX A2000 | 2531 103C 151D |
|
||||
| NVIDIA RTX A2000 | 2531 10DE 151D |
|
||||
| NVIDIA RTX A2000 | 2531 17AA 151D |
|
||||
| NVIDIA GeForce RTX 3060 | 2544 |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 2560 |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 2563 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 1028 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 103C 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 10DE 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 17AA 1611 |
|
||||
| NVIDIA GeForce RTX 3050 | 2582 |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25A0 |
|
||||
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 8928 |
|
||||
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 89F9 |
|
||||
@@ -769,14 +791,53 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 2050 | 25A7 |
|
||||
| NVIDIA GeForce RTX 2050 | 25A9 |
|
||||
| NVIDIA GeForce MX570 A | 25AA |
|
||||
| NVIDIA GeForce RTX 3050 4GB Laptop GPU | 25AB |
|
||||
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25AC |
|
||||
| NVIDIA GeForce RTX 2050 | 25AD |
|
||||
| NVIDIA A16 | 25B6 10DE 14A9 |
|
||||
| NVIDIA A2 | 25B6 10DE 157E |
|
||||
| NVIDIA RTX A2000 Laptop GPU | 25B8 |
|
||||
| NVIDIA RTX A1000 Laptop GPU | 25B9 |
|
||||
| NVIDIA RTX A2000 8GB Laptop GPU | 25BA |
|
||||
| NVIDIA RTX A500 Laptop GPU | 25BB |
|
||||
| NVIDIA RTX A1000 6GB Laptop GPU | 25BC |
|
||||
| NVIDIA RTX A500 Laptop GPU | 25BD |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25E0 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E2 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E5 |
|
||||
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25EC |
|
||||
| NVIDIA GeForce RTX 2050 | 25ED |
|
||||
| NVIDIA RTX A1000 Embedded GPU | 25F9 |
|
||||
| NVIDIA RTX A2000 Embedded GPU | 25FA |
|
||||
| NVIDIA RTX A500 Embedded GPU | 25FB |
|
||||
| NVIDIA GeForce RTX 4090 | 2684 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 1028 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 17AA 16A1 |
|
||||
| NVIDIA L40 | 26B5 10DE 169D |
|
||||
| NVIDIA L40 | 26B5 10DE 17DA |
|
||||
| NVIDIA GeForce RTX 4080 | 2704 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
|
||||
| NVIDIA RTX 5000 Ada Generation Laptop GPU | 2730 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2757 |
|
||||
| NVIDIA GeForce RTX 4070 Ti | 2782 |
|
||||
| NVIDIA GeForce RTX 4070 | 2786 |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27A0 |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 1028 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 10DE 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 17AA 16FA |
|
||||
| NVIDIA L4 | 27B8 10DE 16CA |
|
||||
| NVIDIA L4 | 27B8 10DE 16EE |
|
||||
| NVIDIA RTX 4000 Ada Generation Laptop GPU | 27BA |
|
||||
| NVIDIA RTX 3500 Ada Generation Laptop GPU | 27BB |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27E0 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2820 |
|
||||
| NVIDIA RTX 3000 Ada Generation Laptop GPU | 2838 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2860 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
|
||||
| NVIDIA RTX 2000 Ada Generation Laptop GPU | 28B8 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
|
||||
|
||||
@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"515.76\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"525.47.26\"
|
||||
|
||||
EXTRA_CFLAGS += -Wno-unused-function
|
||||
|
||||
@@ -229,6 +229,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_ioctl.h \
|
||||
drm/drm_device.h \
|
||||
drm/drm_mode_config.h \
|
||||
drm/drm_modeset_lock.h \
|
||||
dt-bindings/interconnect/tegra_icc_id.h \
|
||||
generated/autoconf.h \
|
||||
generated/compile.h \
|
||||
@@ -243,6 +244,8 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/log2.h \
|
||||
linux/of.h \
|
||||
linux/bug.h \
|
||||
linux/sched.h \
|
||||
linux/sched/mm.h \
|
||||
linux/sched/signal.h \
|
||||
linux/sched/task.h \
|
||||
linux/sched/task_stack.h \
|
||||
@@ -286,7 +289,10 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/ioasid.h \
|
||||
linux/stdarg.h \
|
||||
linux/iosys-map.h \
|
||||
asm/coco.h
|
||||
asm/coco.h \
|
||||
linux/vfio_pci_core.h \
|
||||
soc/tegra/bpmp-abi.h \
|
||||
soc/tegra/bpmp.h
|
||||
|
||||
# Filename to store the define for the header in $(1); this is only consumed by
|
||||
# the rule below that concatenates all of these together.
|
||||
|
||||
@@ -101,13 +101,6 @@
|
||||
# define NV_ANDROID
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if defined(DceCore) && !defined(NV_DCECORE)
|
||||
# define NV_DCECORE
|
||||
#endif
|
||||
@@ -249,7 +242,7 @@
|
||||
#endif
|
||||
|
||||
/* For verification-only features not intended to be included in normal drivers */
|
||||
#if (defined(NV_MODS) || defined(NV_GSP_MODS)) && defined(DEBUG) && !defined(DISABLE_VERIF_FEATURES)
|
||||
#if defined(ENABLE_VERIF_FEATURES)
|
||||
#define NV_VERIF_FEATURES
|
||||
#endif
|
||||
|
||||
@@ -283,12 +276,6 @@
|
||||
#define NV_IS_MODS 0
|
||||
#endif
|
||||
|
||||
#if defined(NV_GSP_MODS)
|
||||
#define NV_IS_GSP_MODS 1
|
||||
#else
|
||||
#define NV_IS_GSP_MODS 0
|
||||
#endif
|
||||
|
||||
#if defined(NV_WINDOWS)
|
||||
#define NVOS_IS_WINDOWS 1
|
||||
#else
|
||||
@@ -355,15 +342,6 @@
|
||||
#define NVOS_IS_INTEGRITY 0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if defined(NVCPU_X86)
|
||||
#define NVCPU_IS_X86 1
|
||||
#else
|
||||
|
||||
132
kernel-open/common/inc/nv-firmware.h
Normal file
132
kernel-open/common/inc/nv-firmware.h
Normal file
@@ -0,0 +1,132 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NV_FIRMWARE_H
|
||||
#define NV_FIRMWARE_H
|
||||
|
||||
|
||||
|
||||
#include <nvtypes.h>
|
||||
#include <nvmisc.h>
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_TYPE_GSP,
|
||||
NV_FIRMWARE_TYPE_GSP_LOG
|
||||
} nv_firmware_type_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_CHIP_FAMILY_NULL = 0,
|
||||
NV_FIRMWARE_CHIP_FAMILY_TU10X = 1,
|
||||
NV_FIRMWARE_CHIP_FAMILY_TU11X = 2,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GA100 = 3,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GA10X = 4,
|
||||
NV_FIRMWARE_CHIP_FAMILY_AD10X = 5,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GH100 = 6,
|
||||
NV_FIRMWARE_CHIP_FAMILY_END,
|
||||
} nv_firmware_chip_family_t;
|
||||
|
||||
static inline const char *nv_firmware_chip_family_to_string(
|
||||
nv_firmware_chip_family_t fw_chip_family
|
||||
)
|
||||
{
|
||||
switch (fw_chip_family) {
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: return "gh100";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: return "ad10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X: return "ga10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: return "ga100";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: return "tu11x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X: return "tu10x";
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
return NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// The includer (presumably nv.c) may optionally define
|
||||
// NV_FIRMWARE_PATH_FOR_FILENAME(filename)
|
||||
// to return a string "path" given a gsp_*.bin or gsp_log_*.bin filename.
|
||||
//
|
||||
// The function nv_firmware_path will then be available.
|
||||
#if defined(NV_FIRMWARE_PATH_FOR_FILENAME)
|
||||
static inline const char *nv_firmware_path(
|
||||
nv_firmware_type_t fw_type,
|
||||
nv_firmware_chip_family_t fw_chip_family
|
||||
)
|
||||
{
|
||||
if (fw_type == NV_FIRMWARE_TYPE_GSP)
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_ad10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_tu10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
else if (fw_type == NV_FIRMWARE_TYPE_GSP_LOG)
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_ad10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_tu10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
#endif // defined(NV_FIRMWARE_PATH_FOR_FILENAME)
|
||||
|
||||
// The includer (presumably nv.c) may optionally define
|
||||
// NV_FIRMWARE_DECLARE_GSP_FILENAME(filename)
|
||||
// which will then be invoked (at the top-level) for each
|
||||
// gsp_*.bin (but not gsp_log_*.bin)
|
||||
#if defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
|
||||
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_ad10x.bin")
|
||||
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_tu10x.bin")
|
||||
#endif // defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
|
||||
|
||||
#endif // NV_FIRMWARE_DECLARE_GSP_FILENAME
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-22 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -91,6 +91,6 @@ static inline void _nv_hash_init(struct hlist_head *ht, unsigned int sz)
|
||||
* @key: the key of the objects to iterate over
|
||||
*/
|
||||
#define nv_hash_for_each_possible(name, obj, member, key) \
|
||||
nv_hlist_for_each_entry(obj, &name[NV_HASH_MIN(key, NV_HASH_BITS(name))], member)
|
||||
hlist_for_each_entry(obj, &name[NV_HASH_MIN(key, NV_HASH_BITS(name))], member)
|
||||
|
||||
#endif // __NV_HASH_H__
|
||||
|
||||
@@ -27,15 +27,13 @@
|
||||
#include <nv-kernel-interface-api.h>
|
||||
|
||||
// Enums for supported hypervisor types.
|
||||
// New hypervisor type should be added before OS_HYPERVISOR_CUSTOM_FORCED
|
||||
// New hypervisor type should be added before OS_HYPERVISOR_UNKNOWN
|
||||
typedef enum _HYPERVISOR_TYPE
|
||||
{
|
||||
OS_HYPERVISOR_XEN = 0,
|
||||
OS_HYPERVISOR_VMWARE,
|
||||
OS_HYPERVISOR_HYPERV,
|
||||
OS_HYPERVISOR_KVM,
|
||||
OS_HYPERVISOR_PARALLELS,
|
||||
OS_HYPERVISOR_CUSTOM_FORCED,
|
||||
OS_HYPERVISOR_UNKNOWN
|
||||
} HYPERVISOR_TYPE;
|
||||
|
||||
|
||||
@@ -115,11 +115,6 @@ struct nv_kthread_q_item
|
||||
void *function_args;
|
||||
};
|
||||
|
||||
#if defined(NV_KTHREAD_CREATE_ON_NODE_PRESENT)
|
||||
#define NV_KTHREAD_Q_SUPPORTS_AFFINITY() 1
|
||||
#else
|
||||
#define NV_KTHREAD_Q_SUPPORTS_AFFINITY() 0
|
||||
#endif
|
||||
|
||||
#ifndef NUMA_NO_NODE
|
||||
#define NUMA_NO_NODE (-1)
|
||||
@@ -142,18 +137,12 @@ struct nv_kthread_q_item
|
||||
//
|
||||
// A short prefix of the qname arg will show up in []'s, via the ps(1) utility.
|
||||
//
|
||||
// The kernel thread stack is preferably allocated on the specified NUMA node if
|
||||
// NUMA-affinity (NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1) is supported, but
|
||||
// fallback to another node is possible because kernel allocators do not
|
||||
// The kernel thread stack is preferably allocated on the specified NUMA node,
|
||||
// but fallback to another node is possible because kernel allocators do not
|
||||
// guarantee affinity. Note that NUMA-affinity applies only to
|
||||
// the kthread stack. This API does not do anything about limiting the CPU
|
||||
// affinity of the kthread. That is left to the caller.
|
||||
//
|
||||
// On kernels, which do not support NUMA-aware kthread stack allocations
|
||||
// (NV_KTHTREAD_Q_SUPPORTS_AFFINITY() == 0), the API will return -ENOTSUPP
|
||||
// if the value supplied for 'preferred_node' is anything other than
|
||||
// NV_KTHREAD_NO_NODE.
|
||||
//
|
||||
// Reusing a queue: once a queue is initialized, it must be safely shut down
|
||||
// (see "Stopping the queue(s)", below), before it can be reused. So, for
|
||||
// a simple queue use case, the following will work:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -191,13 +191,6 @@
|
||||
*/
|
||||
#define NV_CURRENT_EUID() (__kuid_val(current->cred->euid))
|
||||
|
||||
#if !defined(NV_KUID_T_PRESENT)
|
||||
static inline uid_t __kuid_val(uid_t uid)
|
||||
{
|
||||
return uid;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_VGA_ARB)
|
||||
#include <linux/vgaarb.h>
|
||||
#endif
|
||||
@@ -234,18 +227,6 @@ static inline uid_t __kuid_val(uid_t uid)
|
||||
#include <asm-generic/pci-dma-compat.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_EFI_ENABLED_PRESENT) && defined(NV_EFI_ENABLED_ARGUMENT_COUNT)
|
||||
#if (NV_EFI_ENABLED_ARGUMENT_COUNT == 1)
|
||||
#define NV_EFI_ENABLED() efi_enabled(EFI_BOOT)
|
||||
#else
|
||||
#error "NV_EFI_ENABLED_ARGUMENT_COUNT value unrecognized!"
|
||||
#endif
|
||||
#elif (defined(NV_EFI_ENABLED_PRESENT) || defined(efi_enabled))
|
||||
#define NV_EFI_ENABLED() efi_enabled
|
||||
#else
|
||||
#define NV_EFI_ENABLED() 0
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_CRAY_XT)
|
||||
#include <cray/cray_nvidia.h>
|
||||
NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32,
|
||||
@@ -521,7 +502,7 @@ static inline void *nv_vmalloc(unsigned long size)
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void nv_vfree(void *ptr, NvU32 size)
|
||||
static inline void nv_vfree(void *ptr, NvU64 size)
|
||||
{
|
||||
NV_MEMDBG_REMOVE(ptr, size);
|
||||
vfree(ptr);
|
||||
@@ -592,11 +573,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
{
|
||||
if (node_id < 0 || node_id >= MAX_NUMNODES)
|
||||
return NV_FALSE;
|
||||
#if defined(NV_NODE_STATES_N_MEMORY_PRESENT)
|
||||
return node_state(node_id, N_MEMORY) ? NV_TRUE : NV_FALSE;
|
||||
#else
|
||||
return node_state(node_id, N_HIGH_MEMORY) ? NV_TRUE : NV_FALSE;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NV_KMALLOC(ptr, size) \
|
||||
@@ -606,6 +583,13 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KZALLOC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kzalloc(size, NV_GFP_KERNEL); \
|
||||
if (ptr) \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KMALLOC_ATOMIC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kmalloc(size, NV_GFP_ATOMIC); \
|
||||
@@ -838,10 +822,8 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
|
||||
})
|
||||
#endif
|
||||
|
||||
#if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.4.9
|
||||
#if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.18-rc1 for aarch64
|
||||
#define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_stop_and_remove_bus_device(pci_dev)
|
||||
#elif defined(NV_PCI_REMOVE_BUS_DEVICE_PRESENT) // introduced in 2.6
|
||||
#define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_remove_bus_device(pci_dev)
|
||||
#endif
|
||||
|
||||
#define NV_PRINT_AT(nv_debug_level,at) \
|
||||
@@ -1037,6 +1019,32 @@ static inline vm_fault_t nv_insert_pfn(struct vm_area_struct *vma,
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
/* Converts BAR index to Linux specific PCI BAR index */
|
||||
static inline NvU8 nv_bar_index_to_os_bar_index
|
||||
(
|
||||
struct pci_dev *dev,
|
||||
NvU8 nv_bar_index
|
||||
)
|
||||
{
|
||||
NvU8 bar_index = 0;
|
||||
NvU8 i;
|
||||
|
||||
BUG_ON(nv_bar_index >= NV_GPU_NUM_BARS);
|
||||
|
||||
for (i = 0; i < nv_bar_index; i++)
|
||||
{
|
||||
if (NV_PCI_RESOURCE_FLAGS(dev, bar_index) & PCI_BASE_ADDRESS_MEM_TYPE_64)
|
||||
{
|
||||
bar_index += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
bar_index++;
|
||||
}
|
||||
}
|
||||
|
||||
return bar_index;
|
||||
}
|
||||
|
||||
#define NV_PAGE_MASK (NvU64)(long)PAGE_MASK
|
||||
|
||||
@@ -1113,11 +1121,14 @@ static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
|
||||
{
|
||||
nvidia_stack_t *sp = NULL;
|
||||
#if defined(NVCPU_X86_64)
|
||||
sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache);
|
||||
if (sp == NULL)
|
||||
return -ENOMEM;
|
||||
sp->size = sizeof(sp->stack);
|
||||
sp->top = sp->stack + sp->size;
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache);
|
||||
if (sp == NULL)
|
||||
return -ENOMEM;
|
||||
sp->size = sizeof(sp->stack);
|
||||
sp->top = sp->stack + sp->size;
|
||||
}
|
||||
#endif
|
||||
*stack = sp;
|
||||
return 0;
|
||||
@@ -1126,7 +1137,7 @@ static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
|
||||
static inline void nv_kmem_cache_free_stack(nvidia_stack_t *stack)
|
||||
{
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (stack != NULL)
|
||||
if (stack != NULL && rm_is_altstack_in_use())
|
||||
{
|
||||
NV_KMEM_CACHE_FREE(stack, nvidia_stack_t_cache);
|
||||
}
|
||||
@@ -1161,16 +1172,6 @@ typedef struct nvidia_pte_s {
|
||||
unsigned int page_count;
|
||||
} nvidia_pte_t;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
typedef struct nv_alloc_s {
|
||||
struct nv_alloc_s *next;
|
||||
struct device *dev;
|
||||
@@ -1370,8 +1371,7 @@ typedef struct nv_dma_map_s {
|
||||
* xen_swiotlb_map_sg_attrs may try to route to the SWIOTLB. We must only use
|
||||
* single-page sg elements on Xen Server.
|
||||
*/
|
||||
#if defined(NV_SG_ALLOC_TABLE_FROM_PAGES_PRESENT) && \
|
||||
!defined(NV_DOM0_KERNEL_PRESENT)
|
||||
#if !defined(NV_DOM0_KERNEL_PRESENT)
|
||||
#define NV_ALLOC_DMA_SUBMAP_SCATTERLIST(dm, sm, i) \
|
||||
((sg_alloc_table_from_pages(&sm->sgt, \
|
||||
&dm->pages[NV_DMA_SUBMAP_IDX_TO_PAGE_IDX(i)], \
|
||||
@@ -1413,34 +1413,6 @@ struct os_wait_queue {
|
||||
struct completion q;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* To report error in msi/msix when unhandled count reaches a threshold
|
||||
*/
|
||||
@@ -1464,19 +1436,6 @@ struct nv_dma_device {
|
||||
NvBool nvlink;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* linux-specific version of old nv_state_t */
|
||||
/* this is a general os-specific state structure. the first element *must* be
|
||||
the general state structure, for the generic unix-based code */
|
||||
@@ -1492,11 +1451,6 @@ typedef struct nv_linux_state_s {
|
||||
/* IBM-NPU info associated with this GPU */
|
||||
nv_ibmnpu_info_t *npu;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* NUMA node information for the platforms where GPU memory is presented
|
||||
* as a NUMA node to the kernel */
|
||||
struct {
|
||||
@@ -1576,23 +1530,6 @@ typedef struct nv_linux_state_s {
|
||||
/* Per-device notifier block for ACPI events */
|
||||
struct notifier_block acpi_nb;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* Lock serializing ISRs for different SOC vectors */
|
||||
nv_spinlock_t soc_isr_lock;
|
||||
|
||||
@@ -1714,6 +1651,27 @@ static inline nv_linux_file_private_t *nv_get_nvlfp_from_nvfp(nv_file_private_t
|
||||
|
||||
#define NV_STATE_PTR(nvl) &(((nv_linux_state_t *)(nvl))->nv_state)
|
||||
|
||||
static inline nvidia_stack_t *nv_nvlfp_get_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which)
|
||||
{
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
down(&nvlfp->fops_sp_lock[which]);
|
||||
return nvlfp->fops_sp[which];
|
||||
}
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void nv_nvlfp_put_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which)
|
||||
{
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
up(&nvlfp->fops_sp_lock[which]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NV_ATOMIC_READ(data) atomic_read(&(data))
|
||||
#define NV_ATOMIC_SET(data,val) atomic_set(&(data), (val))
|
||||
@@ -1760,12 +1718,10 @@ static inline struct kmem_cache *nv_kmem_cache_create(const char *name, unsigned
|
||||
return cache;
|
||||
}
|
||||
|
||||
|
||||
#if defined(CONFIG_PCI_IOV)
|
||||
#define NV_PCI_SRIOV_SUPPORT
|
||||
#endif /* CONFIG_PCI_IOV */
|
||||
|
||||
|
||||
#define NV_PCIE_CFG_MAX_OFFSET 0x1000
|
||||
|
||||
#include "nv-proto.h"
|
||||
@@ -1944,25 +1900,12 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
|
||||
#define NV_GET_UNUSED_FD_FLAGS(flags) (-1)
|
||||
#endif
|
||||
|
||||
#if defined(NV_SET_CLOSE_ON_EXEC_PRESENT)
|
||||
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) __set_close_on_exec(fd, fdt)
|
||||
#elif defined(NV_LINUX_TIME_H_PRESENT) && defined(FD_SET)
|
||||
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) FD_SET(fd, fdt->close_on_exec)
|
||||
#else
|
||||
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) __set_bit(fd, fdt->close_on_exec)
|
||||
#endif
|
||||
|
||||
#define MODULE_BASE_NAME "nvidia"
|
||||
#define MODULE_INSTANCE_NUMBER 0
|
||||
#define MODULE_INSTANCE_STRING ""
|
||||
#define MODULE_NAME MODULE_BASE_NAME MODULE_INSTANCE_STRING
|
||||
|
||||
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32, const char*);
|
||||
|
||||
static inline void nv_mutex_destroy(struct mutex *lock)
|
||||
{
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -73,21 +73,4 @@
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT)
|
||||
#if NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT == 3
|
||||
#define nv_hlist_for_each_entry(pos, head, member) \
|
||||
hlist_for_each_entry(pos, head, member)
|
||||
#else
|
||||
#if !defined(hlist_entry_safe)
|
||||
#define hlist_entry_safe(ptr, type, member) \
|
||||
(ptr) ? hlist_entry(ptr, type, member) : NULL
|
||||
#endif
|
||||
|
||||
#define nv_hlist_for_each_entry(pos, head, member) \
|
||||
for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member); \
|
||||
pos; \
|
||||
pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
|
||||
#endif
|
||||
#endif // NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT
|
||||
|
||||
#endif // __NV_LIST_HELPERS_H__
|
||||
|
||||
@@ -29,6 +29,25 @@
|
||||
typedef int vm_fault_t;
|
||||
#endif
|
||||
|
||||
/* pin_user_pages
|
||||
* Presence of pin_user_pages() also implies the presence of unpin-user_page().
|
||||
* Both were added in the v5.6-rc1
|
||||
*
|
||||
* pin_user_pages() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6-rc1 (2020-01-30)
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#if defined(NV_PIN_USER_PAGES_PRESENT)
|
||||
#define NV_PIN_USER_PAGES pin_user_pages
|
||||
#define NV_UNPIN_USER_PAGE unpin_user_page
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES NV_GET_USER_PAGES
|
||||
#define NV_UNPIN_USER_PAGE put_page
|
||||
#endif // NV_PIN_USER_PAGES_PRESENT
|
||||
|
||||
/* get_user_pages
|
||||
*
|
||||
* The 8-argument version of get_user_pages was deprecated by commit
|
||||
@@ -47,69 +66,57 @@ typedef int vm_fault_t;
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_HAS_TASK_STRUCT)
|
||||
#if defined(NV_GET_USER_PAGES_HAS_WRITE_AND_FORCE_ARGS)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, write, force, pages, vmas) \
|
||||
get_user_pages(current, current->mm, start, nr_pages, write, force, pages, vmas)
|
||||
#else
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
static inline long NV_GET_USER_PAGES(unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
return get_user_pages(current, current->mm, start, nr_pages, flags,
|
||||
pages, vmas);
|
||||
}
|
||||
#endif
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
|
||||
#define NV_GET_USER_PAGES get_user_pages
|
||||
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages, vmas) \
|
||||
get_user_pages(current, current->mm, start, nr_pages, flags, pages, vmas)
|
||||
#else
|
||||
#if defined(NV_GET_USER_PAGES_HAS_WRITE_AND_FORCE_ARGS)
|
||||
#define NV_GET_USER_PAGES get_user_pages
|
||||
static inline long NV_GET_USER_PAGES(unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
{
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE)
|
||||
return get_user_pages(start, nr_pages, write, force, pages, vmas);
|
||||
#else
|
||||
#include <linux/mm.h>
|
||||
// NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE
|
||||
return get_user_pages(current, current->mm, start, nr_pages, write,
|
||||
force, pages, vmas);
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE
|
||||
}
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS
|
||||
|
||||
static inline long NV_GET_USER_PAGES(unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
/* pin_user_pages_remote
|
||||
*
|
||||
* pin_user_pages_remote() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6 (2020-01-30)
|
||||
*
|
||||
* pin_user_pages_remote() removed 'tsk' parameter by commit
|
||||
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
|
||||
* in v5.9-rc1 (2020-08-11). *
|
||||
*
|
||||
*/
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
return get_user_pages(start, nr_pages, flags, pages, vmas);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#if defined(NV_PIN_USER_PAGES_REMOTE_PRESENT)
|
||||
#if defined (NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK)
|
||||
#define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
pin_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES_REMOTE pin_user_pages_remote
|
||||
#endif // NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES_REMOTE NV_GET_USER_PAGES_REMOTE
|
||||
#endif // NV_PIN_USER_PAGES_REMOTE_PRESENT
|
||||
|
||||
/*
|
||||
* get_user_pages_remote() was added by commit 1e9877902dc7
|
||||
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6 (2016-02-12).
|
||||
*
|
||||
* The very next commit cde70140fed8 ("mm/gup: Overload get_user_pages()
|
||||
* functions") deprecated the 8-argument version of get_user_pages for the
|
||||
* non-remote case (calling get_user_pages with current and current->mm).
|
||||
*
|
||||
* The guidelines are: call NV_GET_USER_PAGES_REMOTE if you need the 8-argument
|
||||
* version that uses something other than current and current->mm. Use
|
||||
* NV_GET_USER_PAGES if you are refering to current and current->mm.
|
||||
*
|
||||
* Note that get_user_pages_remote() requires the caller to hold a reference on
|
||||
* the task_struct (if non-NULL and if this API has tsk argument) and the mm_struct.
|
||||
* This will always be true when using current and current->mm. If the kernel passes
|
||||
@@ -131,71 +138,55 @@ typedef int vm_fault_t;
|
||||
*/
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_PRESENT)
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_WRITE_AND_FORCE_ARGS)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas)
|
||||
|
||||
#else
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk,
|
||||
struct mm_struct *mm,
|
||||
// NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
struct vm_area_struct **vmas,
|
||||
int *locked)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_LOCKED_ARG)
|
||||
#if defined (NV_GET_USER_PAGES_REMOTE_HAS_TSK_ARG)
|
||||
return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
|
||||
pages, vmas, NULL);
|
||||
#else
|
||||
return get_user_pages_remote(mm, start, nr_pages, flags,
|
||||
pages, vmas, NULL);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
|
||||
pages, vmas);
|
||||
|
||||
#endif
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
return get_user_pages_remote(NULL, mm, start, nr_pages, write, force,
|
||||
pages, vmas);
|
||||
}
|
||||
#endif
|
||||
#endif // NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED
|
||||
#else
|
||||
#if defined(NV_GET_USER_PAGES_HAS_WRITE_AND_FORCE_ARGS)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages
|
||||
#else
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk,
|
||||
struct mm_struct *mm,
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE)
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
struct vm_area_struct **vmas,
|
||||
int *locked)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
return get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
|
||||
return get_user_pages(NULL, mm, start, nr_pages, write, force, pages, vmas);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages(NULL, mm, start, nr_pages, flags, pages, vmas)
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE
|
||||
#endif // NV_GET_USER_PAGES_REMOTE_PRESENT
|
||||
|
||||
/*
|
||||
* The .virtual_address field was effectively renamed to .address, by these
|
||||
@@ -270,4 +261,22 @@ static inline struct rw_semaphore *nv_mmap_get_lock(struct mm_struct *mm)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
{
|
||||
#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
vm_flags_set(vma, flags);
|
||||
#else
|
||||
vma->vm_flags |= flags;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
{
|
||||
#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
vm_flags_clear(vma, flags);
|
||||
#else
|
||||
vma->vm_flags &= ~flags;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // __NV_MM_H__
|
||||
|
||||
@@ -27,17 +27,6 @@
|
||||
#include <linux/pci.h>
|
||||
#include "nv-linux.h"
|
||||
|
||||
#if defined(NV_DEV_IS_PCI_PRESENT)
|
||||
#define nv_dev_is_pci(dev) dev_is_pci(dev)
|
||||
#else
|
||||
/*
|
||||
* Non-PCI devices are only supported on kernels which expose the
|
||||
* dev_is_pci() function. For older kernels, we only support PCI
|
||||
* devices, hence returning true to take all the PCI code paths.
|
||||
*/
|
||||
#define nv_dev_is_pci(dev) (true)
|
||||
#endif
|
||||
|
||||
int nv_pci_register_driver(void);
|
||||
void nv_pci_unregister_driver(void);
|
||||
int nv_pci_count_devices(void);
|
||||
|
||||
@@ -74,21 +74,8 @@ typedef struct file_operations nv_proc_ops_t;
|
||||
__entry; \
|
||||
})
|
||||
|
||||
/*
|
||||
* proc_mkdir_mode exists in Linux 2.6.9, but isn't exported until Linux 3.0.
|
||||
* Use the older interface instead unless the newer interface is necessary.
|
||||
*/
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
# define NV_PROC_MKDIR_MODE(name, mode, parent) \
|
||||
proc_mkdir_mode(name, mode, parent)
|
||||
#else
|
||||
# define NV_PROC_MKDIR_MODE(name, mode, parent) \
|
||||
({ \
|
||||
struct proc_dir_entry *__entry; \
|
||||
__entry = create_proc_entry(name, mode, parent); \
|
||||
__entry; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#define NV_CREATE_PROC_DIR(name,parent) \
|
||||
({ \
|
||||
@@ -104,16 +91,6 @@ typedef struct file_operations nv_proc_ops_t;
|
||||
#define NV_PDE_DATA(inode) PDE_DATA(inode)
|
||||
#endif
|
||||
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
# define NV_REMOVE_PROC_ENTRY(entry) \
|
||||
proc_remove(entry);
|
||||
#else
|
||||
# define NV_REMOVE_PROC_ENTRY(entry) \
|
||||
remove_proc_entry(entry->name, entry->parent);
|
||||
#endif
|
||||
|
||||
void nv_procfs_unregister_all(struct proc_dir_entry *entry,
|
||||
struct proc_dir_entry *delimiter);
|
||||
#define NV_DEFINE_SINGLE_PROCFS_FILE_HELPER(name, lock) \
|
||||
static int nv_procfs_open_##name( \
|
||||
struct inode *inode, \
|
||||
|
||||
@@ -27,9 +27,6 @@
|
||||
#include "nv-pci.h"
|
||||
#include "nv-register-module.h"
|
||||
|
||||
|
||||
|
||||
|
||||
extern const char *nv_device_name;
|
||||
extern nvidia_module_t nv_fops;
|
||||
|
||||
@@ -57,8 +54,6 @@ void nv_free_contig_pages (nv_alloc_t *);
|
||||
NV_STATUS nv_alloc_system_pages (nv_state_t *, nv_alloc_t *);
|
||||
void nv_free_system_pages (nv_alloc_t *);
|
||||
|
||||
void nv_address_space_init_once (struct address_space *mapping);
|
||||
|
||||
int nv_uvm_init (void);
|
||||
void nv_uvm_exit (void);
|
||||
NV_STATUS nv_uvm_suspend (void);
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
#include <nvstatus.h>
|
||||
#include "nv_stdarg.h"
|
||||
#include <nv-caps.h>
|
||||
#include <nv-firmware.h>
|
||||
#include <nv-ioctl.h>
|
||||
#include <nvmisc.h>
|
||||
|
||||
@@ -47,11 +48,6 @@ extern nv_cap_t *nvidia_caps_root;
|
||||
|
||||
extern const NvBool nv_is_rm_firmware_supported_os;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#include <nv-kernel-interface-api.h>
|
||||
|
||||
/* NVIDIA's reserved major character device number (Linux). */
|
||||
@@ -165,8 +161,14 @@ typedef enum _TEGRASOC_WHICH_CLK
|
||||
TEGRASOC_WHICH_CLK_MAUD,
|
||||
TEGRASOC_WHICH_CLK_AZA_2XBIT,
|
||||
TEGRASOC_WHICH_CLK_AZA_BIT,
|
||||
TEGRA234_CLK_MIPI_CAL,
|
||||
TEGRA234_CLK_UART_FST_MIPI_CAL,
|
||||
TEGRASOC_WHICH_CLK_MIPI_CAL,
|
||||
TEGRASOC_WHICH_CLK_UART_FST_MIPI_CAL,
|
||||
TEGRASOC_WHICH_CLK_SOR0_DIV,
|
||||
TEGRASOC_WHICH_CLK_DISP_ROOT,
|
||||
TEGRASOC_WHICH_CLK_HUB_ROOT,
|
||||
TEGRASOC_WHICH_CLK_PLLA_DISP,
|
||||
TEGRASOC_WHICH_CLK_PLLA_DISPHUB,
|
||||
TEGRASOC_WHICH_CLK_PLLA,
|
||||
TEGRASOC_WHICH_CLK_MAX, // TEGRASOC_WHICH_CLK_MAX is defined for boundary checks only.
|
||||
} TEGRASOC_WHICH_CLK;
|
||||
|
||||
@@ -286,6 +288,7 @@ typedef struct nv_usermap_access_params_s
|
||||
NvU64 access_size;
|
||||
NvU64 remap_prot_extra;
|
||||
NvBool contig;
|
||||
NvU32 caching;
|
||||
} nv_usermap_access_params_t;
|
||||
|
||||
/*
|
||||
@@ -303,11 +306,12 @@ typedef struct nv_alloc_mapping_context_s {
|
||||
NvU64 remap_prot_extra;
|
||||
NvU32 prot;
|
||||
NvBool valid;
|
||||
NvU32 caching;
|
||||
} nv_alloc_mapping_context_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_SOC_IRQ_DISPLAY_TYPE,
|
||||
NV_SOC_IRQ_DISPLAY_TYPE = 0x1,
|
||||
NV_SOC_IRQ_DPAUX_TYPE,
|
||||
NV_SOC_IRQ_GPIO_TYPE,
|
||||
NV_SOC_IRQ_HDACODEC_TYPE,
|
||||
@@ -331,6 +335,9 @@ typedef struct nv_soc_irq_info_s {
|
||||
#define NV_MAX_DPAUX_NUM_DEVICES 4
|
||||
#define NV_MAX_SOC_DPAUX_NUM_DEVICES 2 // From SOC_DEV_MAPPING
|
||||
|
||||
#define NV_IGPU_LEGACY_STALL_IRQ 70
|
||||
#define NV_IGPU_MAX_STALL_IRQS 3
|
||||
#define NV_IGPU_MAX_NONSTALL_IRQS 1
|
||||
/*
|
||||
* per device state
|
||||
*/
|
||||
@@ -367,6 +374,8 @@ typedef struct nv_state_t
|
||||
nv_aperture_t *hdacodec_regs;
|
||||
nv_aperture_t *mipical_regs;
|
||||
nv_aperture_t *fb, ud;
|
||||
nv_aperture_t *simregs;
|
||||
nv_aperture_t *emc_regs;
|
||||
|
||||
NvU32 num_dpaux_instance;
|
||||
NvU32 interrupt_line;
|
||||
@@ -379,6 +388,11 @@ typedef struct nv_state_t
|
||||
NvU32 soc_dcb_size;
|
||||
NvU32 disp_sw_soc_chip_id;
|
||||
|
||||
NvU32 igpu_stall_irq[NV_IGPU_MAX_STALL_IRQS];
|
||||
NvU32 igpu_nonstall_irq;
|
||||
NvU32 num_stall_irqs;
|
||||
NvU64 dma_mask;
|
||||
|
||||
NvBool primary_vga;
|
||||
|
||||
NvU32 sim_env;
|
||||
@@ -424,9 +438,6 @@ typedef struct nv_state_t
|
||||
/* Variable to force allocation of 32-bit addressable memory */
|
||||
NvBool force_dma32_alloc;
|
||||
|
||||
/* Variable to track if device has entered dynamic power state */
|
||||
NvBool dynamic_power_entered;
|
||||
|
||||
/* PCI power state should be D0 during system suspend */
|
||||
NvBool d0_state_in_suspend;
|
||||
|
||||
@@ -456,6 +467,12 @@ typedef struct nv_state_t
|
||||
|
||||
NvBool printed_openrm_enable_unsupported_gpus_error;
|
||||
|
||||
/* Check if NVPCF DSM function is implemented under NVPCF or GPU device scope */
|
||||
NvBool nvpcf_dsm_in_gpu_scope;
|
||||
|
||||
/* Bool to check if the device received a shutdown notification */
|
||||
NvBool is_shutdown;
|
||||
|
||||
} nv_state_t;
|
||||
|
||||
// These define need to be in sync with defines in system.h
|
||||
@@ -464,6 +481,10 @@ typedef struct nv_state_t
|
||||
#define OS_TYPE_SUNOS 0x3
|
||||
#define OS_TYPE_VMWARE 0x4
|
||||
|
||||
#define NVFP_TYPE_NONE 0x0
|
||||
#define NVFP_TYPE_REFCOUNTED 0x1
|
||||
#define NVFP_TYPE_REGISTERED 0x2
|
||||
|
||||
struct nv_file_private_t
|
||||
{
|
||||
NvHandle *handles;
|
||||
@@ -473,6 +494,7 @@ struct nv_file_private_t
|
||||
|
||||
nv_file_private_t *ctl_nvfp;
|
||||
void *ctl_nvfp_priv;
|
||||
NvU32 register_or_refcount;
|
||||
};
|
||||
|
||||
// Forward define the gpu ops structures
|
||||
@@ -504,8 +526,9 @@ typedef struct UvmGpuChannelResourceBindParams_tag *nvgpuChannelResourceBindPar
|
||||
typedef struct UvmGpuPagingChannelAllocParams_tag nvgpuPagingChannelAllocParams_t;
|
||||
typedef struct UvmGpuPagingChannel_tag *nvgpuPagingChannelHandle_t;
|
||||
typedef struct UvmGpuPagingChannelInfo_tag *nvgpuPagingChannelInfo_t;
|
||||
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU32, NvU64 *, NvU32, NvU64, NvU64);
|
||||
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64);
|
||||
typedef enum UvmPmaGpuMemoryType_tag nvgpuGpuMemoryType_t;
|
||||
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU32, NvU64 *, NvU32, NvU64, NvU64, nvgpuGpuMemoryType_t);
|
||||
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemoryType_t);
|
||||
|
||||
/*
|
||||
* flags
|
||||
@@ -520,7 +543,7 @@ typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64);
|
||||
#define NV_FLAG_USES_MSIX 0x0040
|
||||
#define NV_FLAG_PASSTHRU 0x0080
|
||||
#define NV_FLAG_SUSPENDED 0x0100
|
||||
// Unused 0x0200
|
||||
#define NV_FLAG_SOC_IGPU 0x0200
|
||||
// Unused 0x0400
|
||||
#define NV_FLAG_PERSISTENT_SW_STATE 0x0800
|
||||
#define NV_FLAG_IN_RECOVERY 0x1000
|
||||
@@ -557,30 +580,21 @@ typedef enum
|
||||
NV_POWER_STATE_RUNNING
|
||||
} nv_power_state_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_GSP,
|
||||
NV_FIRMWARE_GSP_LOG
|
||||
} nv_firmware_t;
|
||||
|
||||
#define NV_PRIMARY_VGA(nv) ((nv)->primary_vga)
|
||||
|
||||
#define NV_IS_CTL_DEVICE(nv) ((nv)->flags & NV_FLAG_CONTROL)
|
||||
#define NV_IS_SOC_DISPLAY_DEVICE(nv) \
|
||||
((nv)->flags & NV_FLAG_SOC_DISPLAY)
|
||||
|
||||
#define NV_IS_SOC_IGPU_DEVICE(nv) \
|
||||
((nv)->flags & NV_FLAG_SOC_IGPU)
|
||||
|
||||
#define NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv) \
|
||||
(((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0)
|
||||
|
||||
#define NV_SOC_IS_ISO_IOMMU_PRESENT(nv) \
|
||||
((nv)->iso_iommu_present)
|
||||
|
||||
/*
|
||||
* NVIDIA ACPI event ID to be passed into the core NVIDIA driver for
|
||||
* AC/DC event.
|
||||
*/
|
||||
#define NV_SYSTEM_ACPI_BATTERY_POWER_EVENT 0x8002
|
||||
|
||||
/*
|
||||
* GPU add/remove events
|
||||
*/
|
||||
@@ -592,8 +606,6 @@ typedef enum
|
||||
* to core NVIDIA driver for ACPI events.
|
||||
*/
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_DISPLAY_SWITCH_DEFAULT 0
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_POWER_EVENT_AC 0
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_POWER_EVENT_BATTERY 1
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_DOCK_EVENT_UNDOCKED 0
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_DOCK_EVENT_DOCKED 1
|
||||
|
||||
@@ -604,14 +616,18 @@ typedef enum
|
||||
#define NV_EVAL_ACPI_METHOD_NVIF 0x01
|
||||
#define NV_EVAL_ACPI_METHOD_WMMX 0x02
|
||||
|
||||
#define NV_I2C_CMD_READ 1
|
||||
#define NV_I2C_CMD_WRITE 2
|
||||
#define NV_I2C_CMD_SMBUS_READ 3
|
||||
#define NV_I2C_CMD_SMBUS_WRITE 4
|
||||
#define NV_I2C_CMD_SMBUS_QUICK_WRITE 5
|
||||
#define NV_I2C_CMD_SMBUS_QUICK_READ 6
|
||||
#define NV_I2C_CMD_SMBUS_BLOCK_READ 7
|
||||
#define NV_I2C_CMD_SMBUS_BLOCK_WRITE 8
|
||||
typedef enum {
|
||||
NV_I2C_CMD_READ = 1,
|
||||
NV_I2C_CMD_WRITE,
|
||||
NV_I2C_CMD_SMBUS_READ,
|
||||
NV_I2C_CMD_SMBUS_WRITE,
|
||||
NV_I2C_CMD_SMBUS_QUICK_WRITE,
|
||||
NV_I2C_CMD_SMBUS_QUICK_READ,
|
||||
NV_I2C_CMD_SMBUS_BLOCK_READ,
|
||||
NV_I2C_CMD_SMBUS_BLOCK_WRITE,
|
||||
NV_I2C_CMD_BLOCK_READ,
|
||||
NV_I2C_CMD_BLOCK_WRITE
|
||||
} nv_i2c_cmd_t;
|
||||
|
||||
// Flags needed by OSAllocPagesNode
|
||||
#define NV_ALLOC_PAGES_NODE_NONE 0x0
|
||||
@@ -627,18 +643,14 @@ typedef enum
|
||||
static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((offset >= nv->regs->cpu_address) &&
|
||||
|
||||
|
||||
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
|
||||
}
|
||||
|
||||
static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((nv->fb) && (offset >= nv->fb->cpu_address) &&
|
||||
|
||||
|
||||
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
|
||||
}
|
||||
|
||||
@@ -646,9 +658,7 @@ static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
|
||||
(offset >= nv->ud.cpu_address) &&
|
||||
|
||||
|
||||
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
|
||||
}
|
||||
|
||||
@@ -657,9 +667,7 @@ static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
|
||||
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
|
||||
(offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
|
||||
|
||||
|
||||
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
|
||||
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
|
||||
}
|
||||
@@ -793,13 +801,13 @@ NV_STATUS NV_API_CALL nv_pci_trigger_recovery (nv_state_t *);
|
||||
NvBool NV_API_CALL nv_requires_dma_remap (nv_state_t *);
|
||||
|
||||
NvBool NV_API_CALL nv_is_rm_firmware_active(nv_state_t *);
|
||||
const void*NV_API_CALL nv_get_firmware(nv_state_t *, nv_firmware_t, const void **, NvU32 *);
|
||||
const void*NV_API_CALL nv_get_firmware(nv_state_t *, nv_firmware_type_t, nv_firmware_chip_family_t, const void **, NvU32 *);
|
||||
void NV_API_CALL nv_put_firmware(const void *);
|
||||
|
||||
nv_file_private_t* NV_API_CALL nv_get_file_private(NvS32, NvBool, void **);
|
||||
void NV_API_CALL nv_put_file_private(void *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU32 *, NvU32 *, NvS32 *);
|
||||
NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU32 *, NvS32 *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_ibmnpu_genreg_info(nv_state_t *, NvU64 *, NvU64 *, void**);
|
||||
NV_STATUS NV_API_CALL nv_get_ibmnpu_relaxed_ordering_mode(nv_state_t *nv, NvBool *mode);
|
||||
@@ -834,6 +842,7 @@ NV_STATUS NV_API_CALL nv_acquire_fabric_mgmt_cap (int, int*);
|
||||
int NV_API_CALL nv_cap_drv_init(void);
|
||||
void NV_API_CALL nv_cap_drv_exit(void);
|
||||
NvBool NV_API_CALL nv_is_gpu_accessible(nv_state_t *);
|
||||
NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
|
||||
|
||||
NvU32 NV_API_CALL nv_get_os_type(void);
|
||||
|
||||
@@ -850,11 +859,9 @@ void NV_API_CALL nv_dma_release_dma_buf (void *, nv_dma_buf_t *);
|
||||
|
||||
void NV_API_CALL nv_schedule_uvm_isr (nv_state_t *);
|
||||
|
||||
|
||||
NvBool NV_API_CALL nv_platform_supports_s0ix (void);
|
||||
NvBool NV_API_CALL nv_s2idle_pm_configured (void);
|
||||
|
||||
|
||||
NvBool NV_API_CALL nv_is_chassis_notebook (void);
|
||||
void NV_API_CALL nv_allow_runtime_suspend (nv_state_t *nv);
|
||||
void NV_API_CALL nv_disallow_runtime_suspend (nv_state_t *nv);
|
||||
@@ -864,45 +871,6 @@ typedef void (*nvTegraDceClientIpcCallback)(NvU32, NvU32, NvU32, void *, void *)
|
||||
NV_STATUS NV_API_CALL nv_get_num_phys_pages (void *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL nv_get_phys_pages (void *, void *, NvU32 *);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------
|
||||
*
|
||||
@@ -947,6 +915,7 @@ NV_STATUS NV_API_CALL rm_write_registry_string (nvidia_stack_t *, nv_state_t *
|
||||
void NV_API_CALL rm_parse_option_string (nvidia_stack_t *, const char *);
|
||||
char* NV_API_CALL rm_remove_spaces (const char *);
|
||||
char* NV_API_CALL rm_string_token (char **, const char);
|
||||
void NV_API_CALL rm_vgpu_vfio_set_driver_vm(nvidia_stack_t *, NvBool);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_run_rc_callback (nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_execute_work_item (nvidia_stack_t *, void *);
|
||||
@@ -963,11 +932,11 @@ NvBool NV_API_CALL rm_is_supported_pci_device(NvU8 pci_class,
|
||||
|
||||
void NV_API_CALL rm_i2c_remove_adapters (nvidia_stack_t *, nv_state_t *);
|
||||
NvBool NV_API_CALL rm_i2c_is_smbus_capable (nvidia_stack_t *, nv_state_t *, void *);
|
||||
NV_STATUS NV_API_CALL rm_i2c_transfer (nvidia_stack_t *, nv_state_t *, void *, NvU8, NvU8, NvU8, NvU32, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_i2c_transfer (nvidia_stack_t *, nv_state_t *, void *, nv_i2c_cmd_t, NvU8, NvU8, NvU32, NvU8 *);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_perform_version_check (nvidia_stack_t *, void *, NvU32);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_system_event (nvidia_stack_t *, NvU32, NvU32);
|
||||
void NV_API_CALL rm_power_source_change_event (nvidia_stack_t *, NvU32);
|
||||
|
||||
void NV_API_CALL rm_disable_gpu_state_persistence (nvidia_stack_t *sp, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU64 *, NvU64 *, NvU64 *, NvU64 *, NvU64, NvU64, NvU64, NvU64, void (*)(void *), void *);
|
||||
@@ -990,7 +959,7 @@ NV_STATUS NV_API_CALL rm_log_gpu_crash (nv_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_kernel_rmapi_op(nvidia_stack_t *sp, void *ops_cmd);
|
||||
NvBool NV_API_CALL rm_get_device_remove_flag(nvidia_stack_t *sp, NvU32 gpu_id);
|
||||
NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults_unlocked(nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_handle_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
NvBool NV_API_CALL rm_gpu_need_4k_page_isolation(nv_state_t *);
|
||||
NvBool NV_API_CALL rm_is_chipset_io_coherent(nv_stack_t *);
|
||||
NvBool NV_API_CALL rm_init_event_locks(nvidia_stack_t *, nv_state_t *);
|
||||
@@ -1016,12 +985,13 @@ const char* NV_API_CALL rm_get_dynamic_power_management_status(nvidia_stack_t *,
|
||||
const char* NV_API_CALL rm_get_gpu_gcx_support(nvidia_stack_t *, nv_state_t *, NvBool);
|
||||
|
||||
void NV_API_CALL rm_acpi_notify(nvidia_stack_t *, nv_state_t *, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_get_clientnvpcf_power_limits(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *);
|
||||
|
||||
NvBool NV_API_CALL rm_is_altstack_in_use(void);
|
||||
|
||||
/* vGPU VFIO specific functions */
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 **, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32);
|
||||
@@ -1034,6 +1004,10 @@ NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_acces
|
||||
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);
|
||||
void NV_API_CALL nv_flush_coherent_cpu_cache_range(nv_state_t *nv, NvU64 cpu_virtual, NvU64 size);
|
||||
|
||||
#if defined(NV_VMWARE)
|
||||
const void* NV_API_CALL rm_get_firmware(nv_firmware_type_t fw_type, const void **fw_buf, NvU32 *fw_size);
|
||||
#endif
|
||||
|
||||
/* Callbacks should occur roughly every 10ms. */
|
||||
#define NV_SNAPSHOT_TIMER_HZ 100
|
||||
void NV_API_CALL nv_start_snapshot_timer(void (*snapshot_callback)(void *context));
|
||||
@@ -1045,17 +1019,15 @@ static inline const NvU8 *nv_get_cached_uuid(nv_state_t *nv)
|
||||
return nv->nv_uuid_cache.valid ? nv->nv_uuid_cache.uuid : NULL;
|
||||
}
|
||||
|
||||
/* nano second resolution timer callback structure */
|
||||
typedef struct nv_nano_timer nv_nano_timer_t;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* nano timer functions */
|
||||
void NV_API_CALL nv_create_nano_timer(nv_state_t *, void *pTmrEvent, nv_nano_timer_t **);
|
||||
void NV_API_CALL nv_start_nano_timer(nv_state_t *nv, nv_nano_timer_t *, NvU64 timens);
|
||||
NV_STATUS NV_API_CALL rm_run_nano_timer_callback(nvidia_stack_t *, nv_state_t *, void *pTmrEvent);
|
||||
void NV_API_CALL nv_cancel_nano_timer(nv_state_t *, nv_nano_timer_t *);
|
||||
void NV_API_CALL nv_destroy_nano_timer(nv_state_t *nv, nv_nano_timer_t *);
|
||||
|
||||
#if defined(NVCPU_X86_64)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -201,7 +201,8 @@ void nvUvmInterfaceAddressSpaceDestroy(uvmGpuAddressSpaceHandle vaSpace);
|
||||
and will return a unique GPU virtual address.
|
||||
|
||||
The default page size will be the small page size (as returned by query
|
||||
caps). The Alignment will also be enforced to small page size(64K/128K).
|
||||
caps). The physical alignment will also be enforced to small page
|
||||
size(64K/128K).
|
||||
|
||||
Arguments:
|
||||
vaSpace[IN] - Pointer to vaSpace object
|
||||
@@ -211,15 +212,15 @@ void nvUvmInterfaceAddressSpaceDestroy(uvmGpuAddressSpaceHandle vaSpace);
|
||||
contains below given fields
|
||||
|
||||
allocInfo Members:
|
||||
rangeBegin[IN] - Allocation will be made between rangeBegin
|
||||
rangeEnd[IN] and rangeEnd(both inclusive). Default will be
|
||||
no-range limitation.
|
||||
gpuPhysOffset[OUT] - Physical offset of allocation returned only
|
||||
if contiguous allocation is requested.
|
||||
pageSize[IN] - Override the default page size (see above).
|
||||
alignment[IN] - gpuPointer GPU VA alignment. 0 means 4KB
|
||||
alignment.
|
||||
bContiguousPhysAlloc[IN] - Flag to request contiguous allocation. Default
|
||||
will follow the vidHeapControl default policy.
|
||||
bHandleProvided [IN] - Flag to signify that the client has provided
|
||||
the handle for phys allocation.
|
||||
bMemGrowsDown[IN]
|
||||
bPersistentVidmem[IN] - Allocate persistent vidmem.
|
||||
hPhysHandle[IN/OUT] - The handle will be used in allocation if provided.
|
||||
If not provided; allocator will return the handle
|
||||
it used eventually.
|
||||
@@ -247,7 +248,6 @@ NV_STATUS nvUvmInterfaceMemoryAllocFB(uvmGpuAddressSpaceHandle vaSpace,
|
||||
and will return a unique GPU virtual address.
|
||||
|
||||
The default page size will be the small page size (as returned by query caps)
|
||||
The Alignment will also be enforced to small page size.
|
||||
|
||||
Arguments:
|
||||
vaSpace[IN] - Pointer to vaSpace object
|
||||
@@ -257,15 +257,15 @@ NV_STATUS nvUvmInterfaceMemoryAllocFB(uvmGpuAddressSpaceHandle vaSpace,
|
||||
contains below given fields
|
||||
|
||||
allocInfo Members:
|
||||
rangeBegin[IN] - Allocation will be made between rangeBegin
|
||||
rangeEnd[IN] and rangeEnd(both inclusive). Default will be
|
||||
no-range limitation.
|
||||
gpuPhysOffset[OUT] - Physical offset of allocation returned only
|
||||
if contiguous allocation is requested.
|
||||
pageSize[IN] - Override the default page size (see above).
|
||||
alignment[IN] - gpuPointer GPU VA alignment. 0 means 4KB
|
||||
alignment.
|
||||
bContiguousPhysAlloc[IN] - Flag to request contiguous allocation. Default
|
||||
will follow the vidHeapControl default policy.
|
||||
bHandleProvided [IN] - Flag to signify that the client has provided
|
||||
the handle for phys allocation.
|
||||
bMemGrowsDown[IN]
|
||||
bPersistentVidmem[IN] - Allocate persistent vidmem.
|
||||
hPhysHandle[IN/OUT] - The handle will be used in allocation if provided.
|
||||
If not provided; allocator will return the handle
|
||||
it used eventually.
|
||||
@@ -331,10 +331,14 @@ typedef NV_STATUS (*uvmPmaEvictPagesCallback)(void *callbackData,
|
||||
NvU64 *pPages,
|
||||
NvU32 count,
|
||||
NvU64 physBegin,
|
||||
NvU64 physEnd);
|
||||
NvU64 physEnd,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type);
|
||||
|
||||
// Mirrors pmaEvictRangeCb_t, see its documentation in pma.h.
|
||||
typedef NV_STATUS (*uvmPmaEvictRangeCallback)(void *callbackData, NvU64 physBegin, NvU64 physEnd);
|
||||
typedef NV_STATUS (*uvmPmaEvictRangeCallback)(void *callbackData,
|
||||
NvU64 physBegin,
|
||||
NvU64 physEnd,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfacePmaRegisterEvictionCallbacks
|
||||
@@ -671,14 +675,16 @@ NV_STATUS nvUvmInterfaceUnsetPageDirectory(uvmGpuAddressSpaceHandle vaSpace);
|
||||
For duplication of physical memory use nvUvmInterfaceDupMemory.
|
||||
|
||||
Arguments:
|
||||
srcVaSpace[IN] - Source VA space.
|
||||
srcAddress[IN] - GPU VA in the source VA space. The provided address
|
||||
should match one previously returned by
|
||||
nvUvmInterfaceMemoryAllocFB or
|
||||
nvUvmInterfaceMemoryAllocSys.
|
||||
dstVaSpace[IN] - Destination VA space where the new mapping will be
|
||||
created.
|
||||
dstAddress[OUT] - Pointer to the GPU VA in the destination VA space.
|
||||
srcVaSpace[IN] - Source VA space.
|
||||
srcAddress[IN] - GPU VA in the source VA space. The provided address
|
||||
should match one previously returned by
|
||||
nvUvmInterfaceMemoryAllocFB or
|
||||
nvUvmInterfaceMemoryAllocSys.
|
||||
dstVaSpace[IN] - Destination VA space where the new mapping will be
|
||||
created.
|
||||
dstVaAlignment[IN] - Alignment of the GPU VA in the destination VA
|
||||
space. 0 means 4KB alignment.
|
||||
dstAddress[OUT] - Pointer to the GPU VA in the destination VA space.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - If any of the inputs is invalid, or the source
|
||||
@@ -692,6 +698,7 @@ NV_STATUS nvUvmInterfaceUnsetPageDirectory(uvmGpuAddressSpaceHandle vaSpace);
|
||||
NV_STATUS nvUvmInterfaceDupAllocation(uvmGpuAddressSpaceHandle srcVaSpace,
|
||||
NvU64 srcAddress,
|
||||
uvmGpuAddressSpaceHandle dstVaSpace,
|
||||
NvU64 dstVaAlignment,
|
||||
NvU64 *dstAddress);
|
||||
|
||||
/*******************************************************************************
|
||||
@@ -1068,10 +1075,6 @@ void nvUvmInterfaceP2pObjectDestroy(uvmGpuSessionHandle session,
|
||||
NV_ERR_NOT_READY - Returned when querying the PTEs requires a deferred setup
|
||||
which has not yet completed. It is expected that the caller
|
||||
will reattempt the call until a different code is returned.
|
||||
|
||||
|
||||
|
||||
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceGetExternalAllocPtes(uvmGpuAddressSpaceHandle vaSpace,
|
||||
NvHandle hMemory,
|
||||
@@ -1260,7 +1263,7 @@ void nvUvmInterfacePagingChannelDestroy(UvmGpuPagingChannelHandle channel);
|
||||
device[IN] - device under which paging channels were allocated
|
||||
dstAddress[OUT] - a virtual address that is valid (i.e. is mapped) in
|
||||
all the paging channels allocated under the given vaSpace.
|
||||
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - Invalid parameter/s is passed.
|
||||
NV_ERR_NOT_SUPPORTED - SR-IOV heavy mode is disabled.
|
||||
@@ -1361,8 +1364,6 @@ void nvUvmInterfacePagingChannelsUnmap(uvmGpuAddressSpaceHandle srcVaSpace,
|
||||
a. pre-allocated stack
|
||||
b. the fact that internal RPC infrastructure doesn't acquire GPU lock.
|
||||
Therefore, locking is the caller's responsibility.
|
||||
- This function DOES NOT sleep (does not allocate memory or acquire locks)
|
||||
so it can be invoked while holding a spinlock.
|
||||
|
||||
Arguments:
|
||||
channel[IN] - paging channel handle obtained via
|
||||
@@ -1373,7 +1374,7 @@ void nvUvmInterfacePagingChannelsUnmap(uvmGpuAddressSpaceHandle srcVaSpace,
|
||||
methodStreamSize[IN] - Size of methodStream, in bytes. The maximum push
|
||||
size is 128KB.
|
||||
|
||||
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - Invalid parameter/s is passed.
|
||||
NV_ERR_NOT_SUPPORTED - SR-IOV heavy mode is disabled.
|
||||
@@ -1382,136 +1383,4 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
|
||||
char *methodStream,
|
||||
NvU32 methodStreamSize);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif // _NV_UVM_INTERFACE_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -217,12 +217,6 @@ typedef struct UvmGpuChannelInstanceInfo_tag
|
||||
// Out: Type of the engine the channel is bound to
|
||||
NvU32 channelEngineType;
|
||||
|
||||
// Out: Channel handle required to ring the doorbell
|
||||
NvU32 workSubmissionToken;
|
||||
|
||||
// Out: Address of the doorbell
|
||||
volatile NvU32 *workSubmissionOffset;
|
||||
|
||||
// Out: Channel handle to be used in the CLEAR_FAULTED method
|
||||
NvU32 clearFaultedToken;
|
||||
|
||||
@@ -231,6 +225,10 @@ typedef struct UvmGpuChannelInstanceInfo_tag
|
||||
// Ampere+ GPUs
|
||||
volatile NvU32 *pChramChannelRegister;
|
||||
|
||||
// Out: Address of the Runlist PRI Base Register required to ring the
|
||||
// doorbell after clearing the faulted bit.
|
||||
volatile NvU32 *pRunlistPRIBaseRegister;
|
||||
|
||||
// Out: SMC engine id to which the GR channel is bound, or zero if the GPU
|
||||
// does not support SMC or it is a CE channel
|
||||
NvU32 smcEngineId;
|
||||
@@ -372,10 +370,8 @@ typedef enum
|
||||
UVM_LINK_TYPE_NVLINK_1,
|
||||
UVM_LINK_TYPE_NVLINK_2,
|
||||
UVM_LINK_TYPE_NVLINK_3,
|
||||
|
||||
|
||||
|
||||
|
||||
UVM_LINK_TYPE_NVLINK_4,
|
||||
UVM_LINK_TYPE_C2C,
|
||||
} UVM_LINK_TYPE;
|
||||
|
||||
typedef struct UvmGpuCaps_tag
|
||||
@@ -433,19 +429,13 @@ typedef struct UvmGpuAddressSpaceInfo_tag
|
||||
|
||||
typedef struct UvmGpuAllocInfo_tag
|
||||
{
|
||||
NvU64 rangeBegin; // Allocation will be made between
|
||||
NvU64 rangeEnd; // rangeBegin & rangeEnd both included
|
||||
NvU64 gpuPhysOffset; // Returns gpuPhysOffset if contiguous requested
|
||||
NvU32 pageSize; // default is RM big page size - 64K or 128 K" else use 4K or 2M
|
||||
NvU64 alignment; // Alignment of allocation
|
||||
NvU64 alignment; // Virtual alignment
|
||||
NvBool bContiguousPhysAlloc; // Flag to request contiguous physical allocation
|
||||
NvBool bMemGrowsDown; // Causes RM to reserve physical heap from top of FB
|
||||
NvBool bPersistentVidmem; // Causes RM to allocate persistent video memory
|
||||
NvHandle hPhysHandle; // Handle for phys allocation either provided or retrieved
|
||||
|
||||
|
||||
|
||||
|
||||
} UvmGpuAllocInfo;
|
||||
|
||||
typedef enum
|
||||
@@ -576,10 +566,8 @@ typedef struct UvmPlatformInfo_tag
|
||||
// Out: ATS (Address Translation Services) is supported
|
||||
NvBool atsSupported;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Out: AMD SEV (Secure Encrypted Virtualization) is enabled
|
||||
NvBool sevEnabled;
|
||||
} UvmPlatformInfo;
|
||||
|
||||
typedef struct UvmGpuClientInfo_tag
|
||||
@@ -589,24 +577,6 @@ typedef struct UvmGpuClientInfo_tag
|
||||
NvHandle hSmcPartRef;
|
||||
} UvmGpuClientInfo;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define UVM_GPU_NAME_LENGTH 0x40
|
||||
|
||||
typedef struct UvmGpuInfo_tag
|
||||
@@ -671,10 +641,6 @@ typedef struct UvmGpuInfo_tag
|
||||
|
||||
UvmGpuClientInfo smcUserClientInfo;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} UvmGpuInfo;
|
||||
|
||||
typedef struct UvmGpuFbInfo_tag
|
||||
@@ -717,11 +683,6 @@ typedef struct UvmPmaStatistics_tag
|
||||
volatile NvU64 numPages2m; // PMA-wide 2MB pages count across all regions
|
||||
volatile NvU64 numFreePages64k; // PMA-wide free 64KB page count across all regions
|
||||
volatile NvU64 numFreePages2m; // PMA-wide free 2MB pages count across all regions
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} UvmPmaStatistics;
|
||||
|
||||
/*******************************************************************************
|
||||
@@ -869,6 +830,8 @@ typedef struct UvmGpuFaultInfo_tag
|
||||
NvHandle faultBufferHandle;
|
||||
} UvmGpuFaultInfo;
|
||||
|
||||
struct Device;
|
||||
|
||||
typedef struct UvmGpuPagingChannel_tag
|
||||
{
|
||||
struct gpuDevice *device;
|
||||
@@ -876,6 +839,7 @@ typedef struct UvmGpuPagingChannel_tag
|
||||
NvHandle channelHandle;
|
||||
NvHandle errorNotifierHandle;
|
||||
void *pushStreamSp;
|
||||
struct Device *pDevice;
|
||||
} UvmGpuPagingChannel, *UvmGpuPagingChannelHandle;
|
||||
|
||||
typedef struct UvmGpuAccessCntrInfo_tag
|
||||
@@ -936,6 +900,16 @@ typedef struct UvmGpuAccessCntrConfig_tag
|
||||
NvU32 threshold;
|
||||
} UvmGpuAccessCntrConfig;
|
||||
|
||||
//
|
||||
// When modifying this enum, make sure they are compatible with the mirrored
|
||||
// MEMORY_PROTECTION enum in phys_mem_allocator.h.
|
||||
//
|
||||
typedef enum UvmPmaGpuMemoryType_tag
|
||||
{
|
||||
UVM_PMA_GPU_MEMORY_TYPE_UNPROTECTED = 0,
|
||||
UVM_PMA_GPU_MEMORY_TYPE_PROTECTED = 1
|
||||
} UVM_PMA_GPU_MEMORY_TYPE;
|
||||
|
||||
typedef UvmGpuChannelInfo gpuChannelInfo;
|
||||
typedef UvmGpuChannelAllocParams gpuChannelAllocParams;
|
||||
typedef UvmGpuCaps gpuCaps;
|
||||
@@ -961,10 +935,4 @@ typedef UvmGpuPagingChannelInfo gpuPagingChannelInfo;
|
||||
typedef UvmGpuPagingChannelAllocParams gpuPagingChannelAllocParams;
|
||||
typedef UvmPmaAllocationOptions gpuPmaAllocationOptions;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif // _NV_UVM_TYPES_H_
|
||||
|
||||
@@ -150,9 +150,7 @@ typedef struct NvSyncPointFenceRec {
|
||||
|* *|
|
||||
\***************************************************************************/
|
||||
|
||||
#if !defined(XAPIGEN) /* NvOffset is XAPIGEN builtin type, so skip typedef */
|
||||
typedef NvU64 NvOffset; /* GPU address */
|
||||
#endif
|
||||
|
||||
#define NvOffset_HI32(n) ((NvU32)(((NvU64)(n)) >> 32))
|
||||
#define NvOffset_LO32(n) ((NvU32)((NvU64)(n)))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -29,6 +29,7 @@
|
||||
#include <nvlimits.h>
|
||||
|
||||
#define NVKMS_MAX_SUBDEVICES NV_MAX_SUBDEVICES
|
||||
#define NVKMS_MAX_HEADS_PER_DISP NV_MAX_HEADS
|
||||
|
||||
#define NVKMS_LEFT 0
|
||||
#define NVKMS_RIGHT 1
|
||||
@@ -530,4 +531,78 @@ typedef struct {
|
||||
NvBool noncoherent;
|
||||
} NvKmsDispIOCoherencyModes;
|
||||
|
||||
enum NvKmsInputColorSpace {
|
||||
/* Unknown colorspace; no de-gamma will be applied */
|
||||
NVKMS_INPUT_COLORSPACE_NONE = 0,
|
||||
|
||||
/* Linear, Rec.709 [-0.5, 7.5) */
|
||||
NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR = 1,
|
||||
|
||||
/* PQ, Rec.2020 unity */
|
||||
NVKMS_INPUT_COLORSPACE_BT2100_PQ = 2,
|
||||
};
|
||||
|
||||
enum NvKmsOutputTf {
|
||||
/*
|
||||
* NVKMS itself won't apply any OETF (clients are still
|
||||
* free to provide a custom OLUT)
|
||||
*/
|
||||
NVKMS_OUTPUT_TF_NONE = 0,
|
||||
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR = 1,
|
||||
NVKMS_OUTPUT_TF_PQ = 2,
|
||||
};
|
||||
|
||||
/*!
|
||||
* HDR Static Metadata Type1 Descriptor as per CEA-861.3 spec.
|
||||
* This is expected to match exactly with the spec.
|
||||
*/
|
||||
struct NvKmsHDRStaticMetadata {
|
||||
/*!
|
||||
* Color primaries of the data.
|
||||
* These are coded as unsigned 16-bit values in units of 0.00002,
|
||||
* where 0x0000 represents zero and 0xC350 represents 1.0000.
|
||||
*/
|
||||
struct {
|
||||
NvU16 x, y;
|
||||
} displayPrimaries[3];
|
||||
|
||||
/*!
|
||||
* White point of colorspace data.
|
||||
* These are coded as unsigned 16-bit values in units of 0.00002,
|
||||
* where 0x0000 represents zero and 0xC350 represents 1.0000.
|
||||
*/
|
||||
struct {
|
||||
NvU16 x, y;
|
||||
} whitePoint;
|
||||
|
||||
/**
|
||||
* Maximum mastering display luminance.
|
||||
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
|
||||
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
|
||||
*/
|
||||
NvU16 maxDisplayMasteringLuminance;
|
||||
|
||||
/*!
|
||||
* Minimum mastering display luminance.
|
||||
* This value is coded as an unsigned 16-bit value in units of
|
||||
* 0.0001 cd/m2, where 0x0001 represents 0.0001 cd/m2 and 0xFFFF
|
||||
* represents 6.5535 cd/m2.
|
||||
*/
|
||||
NvU16 minDisplayMasteringLuminance;
|
||||
|
||||
/*!
|
||||
* Maximum content light level.
|
||||
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
|
||||
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
|
||||
*/
|
||||
NvU16 maxCLL;
|
||||
|
||||
/*!
|
||||
* Maximum frame-average light level.
|
||||
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
|
||||
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
|
||||
*/
|
||||
NvU16 maxFALL;
|
||||
};
|
||||
|
||||
#endif /* NVKMS_API_TYPES_H */
|
||||
|
||||
@@ -86,8 +86,9 @@ enum NvKmsSurfaceMemoryFormat {
|
||||
NvKmsSurfaceMemoryFormatY12___V12U12_N420 = 32,
|
||||
NvKmsSurfaceMemoryFormatY8___U8___V8_N444 = 33,
|
||||
NvKmsSurfaceMemoryFormatY8___U8___V8_N420 = 34,
|
||||
NvKmsSurfaceMemoryFormatRF16GF16BF16XF16 = 35,
|
||||
NvKmsSurfaceMemoryFormatMin = NvKmsSurfaceMemoryFormatI8,
|
||||
NvKmsSurfaceMemoryFormatMax = NvKmsSurfaceMemoryFormatY8___U8___V8_N420,
|
||||
NvKmsSurfaceMemoryFormatMax = NvKmsSurfaceMemoryFormatRF16GF16BF16XF16,
|
||||
};
|
||||
|
||||
typedef struct NvKmsSurfaceMemoryFormatInfo {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -149,6 +149,7 @@ struct NvKmsKapiDeviceResourcesInfo {
|
||||
} caps;
|
||||
|
||||
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
|
||||
NvBool supportsHDR[NVKMS_KAPI_LAYER_MAX];
|
||||
};
|
||||
|
||||
#define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
|
||||
@@ -218,6 +219,11 @@ struct NvKmsKapiLayerConfig {
|
||||
struct NvKmsRRParams rrParams;
|
||||
struct NvKmsKapiSyncpt syncptParams;
|
||||
|
||||
struct NvKmsHDRStaticMetadata hdrMetadata;
|
||||
NvBool hdrMetadataSpecified;
|
||||
|
||||
enum NvKmsOutputTf tf;
|
||||
|
||||
NvU8 minPresentInterval;
|
||||
NvBool tearing;
|
||||
|
||||
@@ -226,6 +232,8 @@ struct NvKmsKapiLayerConfig {
|
||||
|
||||
NvS16 dstX, dstY;
|
||||
NvU16 dstWidth, dstHeight;
|
||||
|
||||
enum NvKmsInputColorSpace inputColorSpace;
|
||||
};
|
||||
|
||||
struct NvKmsKapiLayerRequestedConfig {
|
||||
@@ -277,6 +285,8 @@ struct NvKmsKapiHeadModeSetConfig {
|
||||
NvKmsKapiDisplay displays[NVKMS_KAPI_MAX_CLONE_DISPLAYS];
|
||||
|
||||
struct NvKmsKapiDisplayMode mode;
|
||||
|
||||
NvBool vrrEnabled;
|
||||
};
|
||||
|
||||
struct NvKmsKapiHeadRequestedConfig {
|
||||
@@ -368,6 +378,9 @@ struct NvKmsKapiDynamicDisplayParams {
|
||||
/* [OUT] Connection status */
|
||||
NvU32 connected;
|
||||
|
||||
/* [OUT] VRR status */
|
||||
NvBool vrrSupported;
|
||||
|
||||
/* [IN/OUT] EDID of connected monitor/ Input to override EDID */
|
||||
struct {
|
||||
NvU16 bufferSize;
|
||||
@@ -416,6 +429,12 @@ struct NvKmsKapiCreateSurfaceParams {
|
||||
NvU8 log2GobsPerBlockY;
|
||||
};
|
||||
|
||||
enum NvKmsKapiAllocationType {
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT = 0,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_NOTIFIER = 1,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_OFFSCREEN = 2,
|
||||
};
|
||||
|
||||
struct NvKmsKapiFunctionsTable {
|
||||
|
||||
/*!
|
||||
@@ -478,6 +497,38 @@ struct NvKmsKapiFunctionsTable {
|
||||
*/
|
||||
void (*releaseOwnership)(struct NvKmsKapiDevice *device);
|
||||
|
||||
/*!
|
||||
* Grant modeset permissions for a display to fd. Only one (dispIndex, head,
|
||||
* display) is currently supported.
|
||||
*
|
||||
* \param [in] fd fd from opening /dev/nvidia-modeset.
|
||||
*
|
||||
* \param [in] device A device returned by allocateDevice().
|
||||
*
|
||||
* \param [in] head head of display.
|
||||
*
|
||||
* \param [in] display The display to grant.
|
||||
*
|
||||
* \return NV_TRUE on success, NV_FALSE on failure.
|
||||
*/
|
||||
NvBool (*grantPermissions)
|
||||
(
|
||||
NvS32 fd,
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvU32 head,
|
||||
NvKmsKapiDisplay display
|
||||
);
|
||||
|
||||
/*!
|
||||
* Revoke modeset permissions previously granted. This currently applies for all
|
||||
* previous grant requests for this device.
|
||||
*
|
||||
* \param [in] device A device returned by allocateDevice().
|
||||
*
|
||||
* \return NV_TRUE on success, NV_FALSE on failure.
|
||||
*/
|
||||
NvBool (*revokePermissions)(struct NvKmsKapiDevice *device);
|
||||
|
||||
/*!
|
||||
* Registers for notification, via
|
||||
* NvKmsKapiAllocateDeviceParams::eventCallback, of the events specified
|
||||
@@ -609,6 +660,8 @@ struct NvKmsKapiFunctionsTable {
|
||||
* \param [in] device A device allocated using allocateDevice().
|
||||
*
|
||||
* \param [in] layout BlockLinear or Pitch.
|
||||
*
|
||||
* \param [in] type Allocation type.
|
||||
*
|
||||
* \param [in] size Size, in bytes, of the memory to allocate.
|
||||
*
|
||||
@@ -624,6 +677,7 @@ struct NvKmsKapiFunctionsTable {
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
enum NvKmsSurfaceMemoryLayout layout,
|
||||
enum NvKmsKapiAllocationType type,
|
||||
NvU64 size,
|
||||
NvU8 *compressible
|
||||
);
|
||||
@@ -637,6 +691,8 @@ struct NvKmsKapiFunctionsTable {
|
||||
* \param [in] device A device allocated using allocateDevice().
|
||||
*
|
||||
* \param [in] layout BlockLinear or Pitch.
|
||||
*
|
||||
* \param [in] type Allocation type.
|
||||
*
|
||||
* \param [in] size Size, in bytes, of the memory to allocate.
|
||||
*
|
||||
@@ -652,6 +708,7 @@ struct NvKmsKapiFunctionsTable {
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
enum NvKmsSurfaceMemoryLayout layout,
|
||||
enum NvKmsKapiAllocationType type,
|
||||
NvU64 size,
|
||||
NvU8 *compressible
|
||||
);
|
||||
|
||||
@@ -31,13 +31,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* This is the maximum number of GPUs supported in a single system.
|
||||
*/
|
||||
|
||||
@@ -234,12 +234,14 @@ extern "C" {
|
||||
#define DRF_EXTENT(drf) (drf##_HIGH_FIELD)
|
||||
#define DRF_SHIFT(drf) ((drf##_LOW_FIELD) % 32U)
|
||||
#define DRF_SHIFT_RT(drf) ((drf##_HIGH_FIELD) % 32U)
|
||||
#define DRF_SIZE(drf) ((drf##_HIGH_FIELD)-(drf##_LOW_FIELD)+1U)
|
||||
#define DRF_MASK(drf) (0xFFFFFFFFU >> (31U - ((drf##_HIGH_FIELD) % 32U) + ((drf##_LOW_FIELD) % 32U)))
|
||||
#else
|
||||
#define DRF_BASE(drf) (NV_FALSE?drf) // much better
|
||||
#define DRF_EXTENT(drf) (NV_TRUE?drf) // much better
|
||||
#define DRF_SHIFT(drf) (((NvU32)DRF_BASE(drf)) % 32U)
|
||||
#define DRF_SHIFT_RT(drf) (((NvU32)DRF_EXTENT(drf)) % 32U)
|
||||
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
|
||||
#define DRF_MASK(drf) (0xFFFFFFFFU>>(31U - DRF_SHIFT_RT(drf) + DRF_SHIFT(drf)))
|
||||
#endif
|
||||
#define DRF_DEF(d,r,f,c) (((NvU32)(NV ## d ## r ## f ## c))<<DRF_SHIFT(NV ## d ## r ## f))
|
||||
@@ -249,12 +251,12 @@ extern "C" {
|
||||
#define DRF_EXTENT(drf) (1?drf) // much better
|
||||
#define DRF_SHIFT(drf) ((DRF_ISBIT(0,drf)) % 32)
|
||||
#define DRF_SHIFT_RT(drf) ((DRF_ISBIT(1,drf)) % 32)
|
||||
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
|
||||
#define DRF_MASK(drf) (0xFFFFFFFFU>>(31-((DRF_ISBIT(1,drf)) % 32)+((DRF_ISBIT(0,drf)) % 32)))
|
||||
#define DRF_DEF(d,r,f,c) ((NV ## d ## r ## f ## c)<<DRF_SHIFT(NV ## d ## r ## f))
|
||||
#define DRF_NUM(d,r,f,n) (((n)&DRF_MASK(NV ## d ## r ## f))<<DRF_SHIFT(NV ## d ## r ## f))
|
||||
#endif
|
||||
#define DRF_SHIFTMASK(drf) (DRF_MASK(drf)<<(DRF_SHIFT(drf)))
|
||||
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
|
||||
|
||||
#define DRF_VAL(d,r,f,v) (((v)>>DRF_SHIFT(NV ## d ## r ## f))&DRF_MASK(NV ## d ## r ## f))
|
||||
#endif
|
||||
@@ -907,6 +909,16 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
|
||||
return uAddr.p;
|
||||
}
|
||||
|
||||
// Get bit at pos (k) from x
|
||||
#define NV_BIT_GET(k, x) (((x) >> (k)) & 1)
|
||||
// Get bit at pos (n) from (hi) if >= 64, otherwise from (lo). This is paired with NV_BIT_SET_128 which sets the bit.
|
||||
#define NV_BIT_GET_128(n, lo, hi) (((n) < 64) ? NV_BIT_GET((n), (lo)) : NV_BIT_GET((n) - 64, (hi)))
|
||||
//
|
||||
// Set the bit at pos (b) for U64 which is < 128. Since the (b) can be >= 64, we need 2 U64 to store this.
|
||||
// Use (lo) if (b) is less than 64, and (hi) if >= 64.
|
||||
//
|
||||
#define NV_BIT_SET_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) |= NVBIT64(b); else (hi) |= NVBIT64( b & 0x3F ); }
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif //__cplusplus
|
||||
|
||||
@@ -24,11 +24,6 @@
|
||||
#ifndef SDK_NVSTATUS_H
|
||||
#define SDK_NVSTATUS_H
|
||||
|
||||
/* XAPIGEN - this file is not suitable for (nor needed by) xapigen. */
|
||||
/* Rather than #ifdef out every such include in every sdk */
|
||||
/* file, punt here. */
|
||||
#if !defined(XAPIGEN) /* rest of file */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@@ -125,6 +120,4 @@ const char *nvstatusToString(NV_STATUS nvStatusIn);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // XAPIGEN
|
||||
|
||||
#endif /* SDK_NVSTATUS_H */
|
||||
|
||||
@@ -24,11 +24,6 @@
|
||||
#ifndef SDK_NVSTATUSCODES_H
|
||||
#define SDK_NVSTATUSCODES_H
|
||||
|
||||
/* XAPIGEN - this file is not suitable for (nor needed by) xapigen. */
|
||||
/* Rather than #ifdef out every such include in every sdk */
|
||||
/* file, punt here. */
|
||||
#if !defined(XAPIGEN) /* rest of file */
|
||||
|
||||
NV_STATUS_CODE(NV_OK, 0x00000000, "Success")
|
||||
NV_STATUS_CODE(NV_ERR_GENERIC, 0x0000FFFF, "Failure: Generic Error")
|
||||
|
||||
@@ -153,6 +148,7 @@ NV_STATUS_CODE(NV_ERR_NVLINK_CLOCK_ERROR, 0x00000076, "Nvlink Clock
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_TRAINING_ERROR, 0x00000077, "Nvlink Training Error")
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_CONFIGURATION_ERROR, 0x00000078, "Nvlink Configuration Error")
|
||||
NV_STATUS_CODE(NV_ERR_RISCV_ERROR, 0x00000079, "Generic RISC-V assert or halt")
|
||||
NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT, 0x0000007A, "Fabric Manager is not loaded")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
@@ -164,6 +160,4 @@ NV_STATUS_CODE(NV_WARN_NOTHING_TO_DO, 0x00010006, "WARNING Noth
|
||||
NV_STATUS_CODE(NV_WARN_NULL_OBJECT, 0x00010007, "WARNING NULL object found")
|
||||
NV_STATUS_CODE(NV_WARN_OUT_OF_RANGE, 0x00010008, "WARNING value out of range")
|
||||
|
||||
#endif // XAPIGEN
|
||||
|
||||
#endif /* SDK_NVSTATUSCODES_H */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -125,6 +125,7 @@ NvU32 NV_API_CALL os_get_cpu_number (void);
|
||||
void NV_API_CALL os_disable_console_access (void);
|
||||
void NV_API_CALL os_enable_console_access (void);
|
||||
NV_STATUS NV_API_CALL os_registry_init (void);
|
||||
NvU64 NV_API_CALL os_get_max_user_va (void);
|
||||
NV_STATUS NV_API_CALL os_schedule (void);
|
||||
NV_STATUS NV_API_CALL os_alloc_spinlock (void **);
|
||||
void NV_API_CALL os_free_spinlock (void *);
|
||||
@@ -142,6 +143,14 @@ void NV_API_CALL os_free_semaphore (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_semaphore (void *);
|
||||
NV_STATUS NV_API_CALL os_cond_acquire_semaphore (void *);
|
||||
NV_STATUS NV_API_CALL os_release_semaphore (void *);
|
||||
void* NV_API_CALL os_alloc_rwlock (void);
|
||||
void NV_API_CALL os_free_rwlock (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_rwlock_read (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_rwlock_write (void *);
|
||||
NV_STATUS NV_API_CALL os_cond_acquire_rwlock_read (void *);
|
||||
NV_STATUS NV_API_CALL os_cond_acquire_rwlock_write(void *);
|
||||
void NV_API_CALL os_release_rwlock_read (void *);
|
||||
void NV_API_CALL os_release_rwlock_write (void *);
|
||||
NvBool NV_API_CALL os_semaphore_may_sleep (void);
|
||||
NV_STATUS NV_API_CALL os_get_version_info (os_version_info*);
|
||||
NvBool NV_API_CALL os_is_isr (void);
|
||||
@@ -193,19 +202,12 @@ void NV_API_CALL os_nv_cap_destroy_entry (nv_cap_t *);
|
||||
int NV_API_CALL os_nv_cap_validate_and_dup_fd(const nv_cap_t *, int);
|
||||
void NV_API_CALL os_nv_cap_close_fd (int);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
enum os_pci_req_atomics_type {
|
||||
OS_INTF_PCIE_REQ_ATOMICS_32BIT,
|
||||
OS_INTF_PCIE_REQ_ATOMICS_64BIT,
|
||||
OS_INTF_PCIE_REQ_ATOMICS_128BIT
|
||||
};
|
||||
NV_STATUS NV_API_CALL os_enable_pci_req_atomics (void *, enum os_pci_req_atomics_type);
|
||||
|
||||
extern NvU32 os_page_size;
|
||||
extern NvU64 os_page_mask;
|
||||
@@ -245,11 +247,4 @@ int NV_API_CALL nv_printf(NvU32 debuglevel, const char *printf_format, ...);
|
||||
#define NV_LOCK_USER_PAGES_FLAGS_WRITE_NO 0x00000000
|
||||
#define NV_LOCK_USER_PAGES_FLAGS_WRITE_YES 0x00000001
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /* OS_INTERFACE_H */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -63,7 +63,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_query_caps(nvidia_stack_t *, nvgpuDeviceHandle
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_query_ces_caps(nvidia_stack_t *sp, nvgpuDeviceHandle_t, nvgpuCesCaps_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_gpu_info(nvidia_stack_t *, const NvProcessorUuid *pUuid, const nvgpuClientInfo_t *, nvgpuInfo_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_service_device_interrupts_rm(nvidia_stack_t *, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_dup_allocation(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuAddressSpaceHandle_t, NvU64 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_dup_allocation(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuAddressSpaceHandle_t, NvU64, NvU64 *);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_dup_memory (nvidia_stack_t *, nvgpuDeviceHandle_t, NvHandle, NvHandle, NvHandle *, nvgpuMemoryInfo_t);
|
||||
|
||||
@@ -98,13 +98,4 @@ NV_STATUS NV_API_CALL rm_gpu_ops_paging_channels_map(nvidia_stack_t *, nvgpuAdd
|
||||
void NV_API_CALL rm_gpu_ops_paging_channels_unmap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, nvgpuPagingChannelHandle_t, char *, NvU32);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
25
kernel-open/count-lines.mk
Normal file
25
kernel-open/count-lines.mk
Normal file
@@ -0,0 +1,25 @@
|
||||
count:
|
||||
@echo "conftests:$(words $(ALL_CONFTESTS))" \
|
||||
"objects:$(words $(NV_OBJECTS_DEPEND_ON_CONFTEST))" \
|
||||
"modules:$(words $(NV_KERNEL_MODULES))"
|
||||
|
||||
.PHONY: count
|
||||
|
||||
# Include the top-level makefile to get $(NV_KERNEL_MODULES)
|
||||
include Makefile
|
||||
|
||||
# Set $(src) for the to-be-included nvidia*.Kbuild files
|
||||
src := $(CURDIR)
|
||||
|
||||
# Include nvidia*.Kbuild and append the nvidia*-y objects to ALL_OBJECTS
|
||||
$(foreach _module, $(NV_KERNEL_MODULES), \
|
||||
$(eval include $(_module)/$(_module).Kbuild) \
|
||||
)
|
||||
|
||||
# Concatenate all of the conftest lists; use $(sort ) to remove duplicates
|
||||
ALL_CONFTESTS := $(sort $(NV_CONFTEST_FUNCTION_COMPILE_TESTS) \
|
||||
$(NV_CONFTEST_GENERIC_COMPILE_TESTS) \
|
||||
$(NV_CONFTEST_MACRO_COMPILE_TESTS) \
|
||||
$(NV_CONFTEST_SYMBOL_COMPILE_TESTS) \
|
||||
$(NV_CONFTEST_TYPE_COMPILE_TESTS) \
|
||||
)
|
||||
@@ -42,6 +42,7 @@
|
||||
|
||||
#include <drm/drm_atomic.h>
|
||||
#include <drm/drm_atomic_helper.h>
|
||||
#include <drm/drm_edid.h>
|
||||
|
||||
static void nv_drm_connector_destroy(struct drm_connector *connector)
|
||||
{
|
||||
@@ -98,7 +99,11 @@ __nv_drm_detect_encoder(struct NvKmsKapiDynamicDisplayParams *pDetectParams,
|
||||
break;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_CONNECTOR_HAS_OVERRIDE_EDID)
|
||||
if (connector->override_edid) {
|
||||
#else
|
||||
if (drm_edid_override_connector_update(connector) > 0) {
|
||||
#endif
|
||||
const struct drm_property_blob *edid = connector->edid_blob_ptr;
|
||||
|
||||
if (edid->length <= sizeof(pDetectParams->edid.buffer)) {
|
||||
@@ -118,6 +123,11 @@ __nv_drm_detect_encoder(struct NvKmsKapiDynamicDisplayParams *pDetectParams,
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_CONNECTOR_HAS_VRR_CAPABLE_PROPERTY)
|
||||
drm_connector_attach_vrr_capable_property(&nv_connector->base);
|
||||
drm_connector_set_vrr_capable_property(&nv_connector->base, pDetectParams->vrrSupported ? true : false);
|
||||
#endif
|
||||
|
||||
if (pDetectParams->connected) {
|
||||
if (!pDetectParams->overrideEdid && pDetectParams->edid.bufferSize) {
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -46,6 +46,35 @@
|
||||
#include <linux/nvhost.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
static int
|
||||
nv_drm_atomic_replace_property_blob_from_id(struct drm_device *dev,
|
||||
struct drm_property_blob **blob,
|
||||
uint64_t blob_id,
|
||||
ssize_t expected_size)
|
||||
{
|
||||
struct drm_property_blob *new_blob = NULL;
|
||||
|
||||
if (blob_id != 0) {
|
||||
new_blob = drm_property_lookup_blob(dev, blob_id);
|
||||
if (new_blob == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((expected_size > 0) &&
|
||||
(new_blob->length != expected_size)) {
|
||||
drm_property_blob_put(new_blob);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
drm_property_replace_blob(blob, new_blob);
|
||||
drm_property_blob_put(new_blob);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void nv_drm_plane_destroy(struct drm_plane *plane)
|
||||
{
|
||||
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
|
||||
@@ -84,9 +113,6 @@ cursor_plane_req_config_update(struct drm_plane *plane,
|
||||
{
|
||||
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
|
||||
struct NvKmsKapiCursorRequestedConfig old_config = *req_config;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(plane_state);
|
||||
|
||||
if (plane_state->fb == NULL) {
|
||||
cursor_req_config_disable(req_config);
|
||||
@@ -186,7 +212,6 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(plane_state);
|
||||
int ret = 0;
|
||||
|
||||
if (plane_state->fb == NULL) {
|
||||
plane_req_config_disable(req_config);
|
||||
@@ -309,6 +334,9 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
nv_plane->defaultCompositionMode;
|
||||
#endif
|
||||
|
||||
req_config->config.inputColorSpace =
|
||||
nv_drm_plane_state->input_colorspace;
|
||||
|
||||
req_config->config.syncptParams.preSyncptSpecified = false;
|
||||
req_config->config.syncptParams.postSyncptRequested = false;
|
||||
|
||||
@@ -320,10 +348,10 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
#if defined(NV_NVHOST_DMA_FENCE_UNPACK_PRESENT)
|
||||
if (plane_state->fence != NULL) {
|
||||
ret = nvhost_dma_fence_unpack(
|
||||
plane_state->fence,
|
||||
&req_config->config.syncptParams.preSyncptId,
|
||||
&req_config->config.syncptParams.preSyncptValue);
|
||||
int ret = nvhost_dma_fence_unpack(
|
||||
plane_state->fence,
|
||||
&req_config->config.syncptParams.preSyncptId,
|
||||
&req_config->config.syncptParams.preSyncptValue);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
@@ -339,6 +367,60 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
if (nv_drm_plane_state->hdr_output_metadata != NULL) {
|
||||
struct hdr_output_metadata *hdr_metadata =
|
||||
nv_drm_plane_state->hdr_output_metadata->data;
|
||||
struct hdr_metadata_infoframe *info_frame =
|
||||
&hdr_metadata->hdmi_metadata_type1;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
uint32_t i;
|
||||
|
||||
if (hdr_metadata->metadata_type != HDMI_STATIC_METADATA_TYPE1) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Unsupported Metadata Type");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(info_frame->display_primaries); i ++) {
|
||||
req_config->config.hdrMetadata.displayPrimaries[i].x =
|
||||
info_frame->display_primaries[i].x;
|
||||
req_config->config.hdrMetadata.displayPrimaries[i].y =
|
||||
info_frame->display_primaries[i].y;
|
||||
}
|
||||
|
||||
req_config->config.hdrMetadata.whitePoint.x =
|
||||
info_frame->white_point.x;
|
||||
req_config->config.hdrMetadata.whitePoint.y =
|
||||
info_frame->white_point.y;
|
||||
req_config->config.hdrMetadata.maxDisplayMasteringLuminance =
|
||||
info_frame->max_display_mastering_luminance;
|
||||
req_config->config.hdrMetadata.minDisplayMasteringLuminance =
|
||||
info_frame->min_display_mastering_luminance;
|
||||
req_config->config.hdrMetadata.maxCLL =
|
||||
info_frame->max_cll;
|
||||
req_config->config.hdrMetadata.maxFALL =
|
||||
info_frame->max_fall;
|
||||
|
||||
req_config->config.hdrMetadataSpecified = true;
|
||||
|
||||
switch (info_frame->eotf) {
|
||||
case HDMI_EOTF_SMPTE_ST2084:
|
||||
req_config->config.tf = NVKMS_OUTPUT_TF_PQ;
|
||||
break;
|
||||
case HDMI_EOTF_TRADITIONAL_GAMMA_SDR:
|
||||
req_config->config.tf =
|
||||
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR;
|
||||
break;
|
||||
default:
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Unsupported EOTF");
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
req_config->config.hdrMetadataSpecified = false;
|
||||
req_config->config.tf = NVKMS_OUTPUT_TF_NONE;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Unconditionally mark the surface as changed, even if nothing changed,
|
||||
* so that we always get a flip event: a DRM client may flip with
|
||||
@@ -509,9 +591,21 @@ static int nv_drm_plane_atomic_set_property(
|
||||
nv_drm_plane_state->fd_user_ptr = u64_to_user_ptr(val);
|
||||
#endif
|
||||
return 0;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
} else if (property == nv_dev->nv_input_colorspace_property) {
|
||||
nv_drm_plane_state->input_colorspace = val;
|
||||
return 0;
|
||||
}
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
else if (property == nv_dev->nv_hdr_output_metadata_property) {
|
||||
return nv_drm_atomic_replace_property_blob_from_id(
|
||||
nv_dev->dev,
|
||||
&nv_drm_plane_state->hdr_output_metadata,
|
||||
val,
|
||||
sizeof(struct hdr_output_metadata));
|
||||
}
|
||||
#endif
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int nv_drm_plane_atomic_get_property(
|
||||
@@ -521,12 +615,26 @@ static int nv_drm_plane_atomic_get_property(
|
||||
uint64_t *val)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
const struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state_const(state);
|
||||
|
||||
if (property == nv_dev->nv_out_fence_property) {
|
||||
return 0;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
} else if (property == nv_dev->nv_input_colorspace_property) {
|
||||
*val = nv_drm_plane_state->input_colorspace;
|
||||
return 0;
|
||||
}
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
else if (property == nv_dev->nv_hdr_output_metadata_property) {
|
||||
const struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state_const(state);
|
||||
*val = nv_drm_plane_state->hdr_output_metadata ?
|
||||
nv_drm_plane_state->hdr_output_metadata->base.id : 0;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static struct drm_plane_state *
|
||||
@@ -544,6 +652,14 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
|
||||
__drm_atomic_helper_plane_duplicate_state(plane, &nv_plane_state->base);
|
||||
|
||||
nv_plane_state->fd_user_ptr = nv_old_plane_state->fd_user_ptr;
|
||||
nv_plane_state->input_colorspace = nv_old_plane_state->input_colorspace;
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
nv_plane_state->hdr_output_metadata = nv_old_plane_state->hdr_output_metadata;
|
||||
if (nv_plane_state->hdr_output_metadata) {
|
||||
drm_property_blob_get(nv_plane_state->hdr_output_metadata);
|
||||
}
|
||||
#endif
|
||||
|
||||
return &nv_plane_state->base;
|
||||
}
|
||||
@@ -557,6 +673,12 @@ static inline void __nv_drm_plane_atomic_destroy_state(
|
||||
#else
|
||||
__drm_atomic_helper_plane_destroy_state(state);
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(state);
|
||||
drm_property_blob_put(nv_drm_plane_state->hdr_output_metadata);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void nv_drm_plane_atomic_destroy_state(
|
||||
@@ -803,7 +925,8 @@ static const struct drm_crtc_helper_funcs nv_crtc_helper_funcs = {
|
||||
};
|
||||
|
||||
static void nv_drm_plane_install_properties(
|
||||
struct drm_plane *plane)
|
||||
struct drm_plane *plane,
|
||||
NvBool supportsHDR)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
|
||||
@@ -811,6 +934,19 @@ static void nv_drm_plane_install_properties(
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_out_fence_property, 0);
|
||||
}
|
||||
|
||||
if (nv_dev->nv_input_colorspace_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_input_colorspace_property,
|
||||
NVKMS_INPUT_COLORSPACE_NONE);
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
if (supportsHDR && nv_dev->nv_hdr_output_metadata_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_hdr_output_metadata_property, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -990,7 +1126,9 @@ nv_drm_plane_create(struct drm_device *dev,
|
||||
drm_plane_helper_add(plane, &nv_plane_helper_funcs);
|
||||
|
||||
if (plane_type != DRM_PLANE_TYPE_CURSOR) {
|
||||
nv_drm_plane_install_properties(plane);
|
||||
nv_drm_plane_install_properties(
|
||||
plane,
|
||||
pResInfo->supportsHDR[layer_idx]);
|
||||
}
|
||||
|
||||
__nv_drm_plane_create_alpha_blending_properties(
|
||||
@@ -1141,11 +1279,13 @@ void nv_drm_enumerate_crtcs_and_planes(
|
||||
}
|
||||
|
||||
for (layer = 0; layer < pResInfo->numLayers[i]; layer++) {
|
||||
struct drm_plane *overlay_plane = NULL;
|
||||
|
||||
if (layer == NVKMS_KAPI_LAYER_PRIMARY_IDX) {
|
||||
continue;
|
||||
}
|
||||
|
||||
struct drm_plane *overlay_plane =
|
||||
overlay_plane =
|
||||
nv_drm_plane_create(nv_dev->dev,
|
||||
DRM_PLANE_TYPE_OVERLAY,
|
||||
layer,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -205,6 +205,10 @@ static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
|
||||
struct nv_drm_plane_state {
|
||||
struct drm_plane_state base;
|
||||
s32 __user *fd_user_ptr;
|
||||
enum NvKmsInputColorSpace input_colorspace;
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct drm_property_blob *hdr_output_metadata;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_state *state)
|
||||
@@ -212,6 +216,11 @@ static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_
|
||||
return container_of(state, struct nv_drm_plane_state, base);
|
||||
}
|
||||
|
||||
static inline const struct nv_drm_plane_state *to_nv_drm_plane_state_const(const struct drm_plane_state *state)
|
||||
{
|
||||
return container_of(state, const struct nv_drm_plane_state, base);
|
||||
}
|
||||
|
||||
static inline struct nv_drm_crtc *to_nv_crtc(struct drm_crtc *crtc)
|
||||
{
|
||||
if (crtc == NULL) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -86,6 +86,23 @@
|
||||
|
||||
static struct nv_drm_device *dev_list = NULL;
|
||||
|
||||
static const char* nv_get_input_colorspace_name(
|
||||
enum NvKmsInputColorSpace colorSpace)
|
||||
{
|
||||
switch (colorSpace) {
|
||||
case NVKMS_INPUT_COLORSPACE_NONE:
|
||||
return "None";
|
||||
case NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR:
|
||||
return "IEC 61966-2-2 linear FP";
|
||||
case NVKMS_INPUT_COLORSPACE_BT2100_PQ:
|
||||
return "ITU-R BT.2100-PQ YCbCr";
|
||||
default:
|
||||
/* We shoudn't hit this */
|
||||
WARN_ON("Unsupported input colorspace");
|
||||
return "None";
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
|
||||
static void nv_drm_output_poll_changed(struct drm_device *dev)
|
||||
@@ -240,10 +257,6 @@ nv_drm_init_mode_config(struct nv_drm_device *nv_dev,
|
||||
dev->mode_config.preferred_depth = 24;
|
||||
dev->mode_config.prefer_shadow = 1;
|
||||
|
||||
/* Currently unused. Update when needed. */
|
||||
|
||||
dev->mode_config.fb_base = 0;
|
||||
|
||||
#if defined(NV_DRM_CRTC_STATE_HAS_ASYNC_FLIP) || \
|
||||
defined(NV_DRM_CRTC_STATE_HAS_PAGEFLIP_FLAGS)
|
||||
dev->mode_config.async_page_flip = true;
|
||||
@@ -332,6 +345,15 @@ static void nv_drm_enumerate_encoders_and_connectors
|
||||
*/
|
||||
static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
{
|
||||
struct drm_prop_enum_list enum_list[3] = { };
|
||||
int i, len = 0;
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
enum_list[len].type = i;
|
||||
enum_list[len].name = nv_get_input_colorspace_name(i);
|
||||
len++;
|
||||
}
|
||||
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
if (!nv_dev->supportsSyncpts) {
|
||||
return 0;
|
||||
@@ -345,6 +367,23 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
}
|
||||
#endif
|
||||
|
||||
nv_dev->nv_input_colorspace_property =
|
||||
drm_property_create_enum(nv_dev->dev, 0, "NV_INPUT_COLORSPACE",
|
||||
enum_list, len);
|
||||
if (nv_dev->nv_input_colorspace_property == NULL) {
|
||||
NV_DRM_LOG_ERR("Failed to create NV_INPUT_COLORSPACE property");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
nv_dev->nv_hdr_output_metadata_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_HDR_STATIC_METADATA", 0);
|
||||
if (nv_dev->nv_hdr_output_metadata_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -882,7 +921,9 @@ static void nv_drm_update_drm_driver_features(void)
|
||||
|
||||
nv_drm_driver.dumb_create = nv_drm_dumb_create;
|
||||
nv_drm_driver.dumb_map_offset = nv_drm_dumb_map_offset;
|
||||
#if defined(NV_DRM_DRIVER_HAS_DUMB_DESTROY)
|
||||
nv_drm_driver.dumb_destroy = nv_drm_dumb_destroy;
|
||||
#endif /* NV_DRM_DRIVER_HAS_DUMB_DESTROY */
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -40,9 +40,16 @@ static const u32 nvkms_to_drm_format[] = {
|
||||
[NvKmsSurfaceMemoryFormatR5G6B5] = DRM_FORMAT_RGB565,
|
||||
[NvKmsSurfaceMemoryFormatA8R8G8B8] = DRM_FORMAT_ARGB8888,
|
||||
[NvKmsSurfaceMemoryFormatX8R8G8B8] = DRM_FORMAT_XRGB8888,
|
||||
[NvKmsSurfaceMemoryFormatX8B8G8R8] = DRM_FORMAT_XBGR8888,
|
||||
[NvKmsSurfaceMemoryFormatA2B10G10R10] = DRM_FORMAT_ABGR2101010,
|
||||
[NvKmsSurfaceMemoryFormatX2B10G10R10] = DRM_FORMAT_XBGR2101010,
|
||||
[NvKmsSurfaceMemoryFormatA8B8G8R8] = DRM_FORMAT_ABGR8888,
|
||||
#if defined(DRM_FORMAT_ABGR16161616F)
|
||||
[NvKmsSurfaceMemoryFormatRF16GF16BF16AF16] = DRM_FORMAT_ABGR16161616F,
|
||||
#endif
|
||||
#if defined(DRM_FORMAT_XBGR16161616F)
|
||||
[NvKmsSurfaceMemoryFormatRF16GF16BF16XF16] = DRM_FORMAT_XBGR16161616F,
|
||||
#endif
|
||||
|
||||
[NvKmsSurfaceMemoryFormatY8_U8__Y8_V8_N422] = DRM_FORMAT_YUYV,
|
||||
[NvKmsSurfaceMemoryFormatU8_Y8__V8_Y8_N422] = DRM_FORMAT_UYVY,
|
||||
|
||||
@@ -95,7 +95,7 @@ static vm_fault_t __nv_drm_gem_nvkms_handle_vma_fault(
|
||||
pfn >>= PAGE_SHIFT;
|
||||
pfn += page_offset;
|
||||
} else {
|
||||
BUG_ON(page_offset > nv_nvkms_memory->pages_count);
|
||||
BUG_ON(page_offset >= nv_nvkms_memory->pages_count);
|
||||
pfn = page_to_pfn(nv_nvkms_memory->pages[page_offset]);
|
||||
}
|
||||
|
||||
@@ -201,7 +201,7 @@ static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
|
||||
nv_dev,
|
||||
"Cannot create sg_table for NvKmsKapiMemory 0x%p",
|
||||
nv_gem->pMemory);
|
||||
return NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
sg_table = nv_drm_prime_pages_to_sg(nv_dev->dev,
|
||||
@@ -285,11 +285,13 @@ int nv_drm_dumb_create(
|
||||
if (nv_dev->hasVideoMemory) {
|
||||
pMemory = nvKms->allocateVideoMemory(nv_dev->pDevice,
|
||||
NvKmsSurfaceMemoryLayoutPitch,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
|
||||
args->size,
|
||||
&compressible);
|
||||
} else {
|
||||
pMemory = nvKms->allocateSystemMemory(nv_dev->pDevice,
|
||||
NvKmsSurfaceMemoryLayoutPitch,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
|
||||
args->size,
|
||||
&compressible);
|
||||
}
|
||||
@@ -441,6 +443,7 @@ int nv_drm_gem_alloc_nvkms_memory_ioctl(struct drm_device *dev,
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory = NULL;
|
||||
struct NvKmsKapiMemory *pMemory;
|
||||
enum NvKmsSurfaceMemoryLayout layout;
|
||||
enum NvKmsKapiAllocationType type;
|
||||
int ret = 0;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
@@ -449,6 +452,7 @@ int nv_drm_gem_alloc_nvkms_memory_ioctl(struct drm_device *dev,
|
||||
}
|
||||
|
||||
if (p->__pad != 0) {
|
||||
ret = -EINVAL;
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "non-zero value in padding field");
|
||||
goto failed;
|
||||
}
|
||||
@@ -461,15 +465,19 @@ int nv_drm_gem_alloc_nvkms_memory_ioctl(struct drm_device *dev,
|
||||
|
||||
layout = p->block_linear ?
|
||||
NvKmsSurfaceMemoryLayoutBlockLinear : NvKmsSurfaceMemoryLayoutPitch;
|
||||
type = (p->flags & NV_GEM_ALLOC_NO_SCANOUT) ?
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_OFFSCREEN : NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT;
|
||||
|
||||
if (nv_dev->hasVideoMemory) {
|
||||
pMemory = nvKms->allocateVideoMemory(nv_dev->pDevice,
|
||||
layout,
|
||||
type,
|
||||
p->memory_size,
|
||||
&p->compressible);
|
||||
} else {
|
||||
pMemory = nvKms->allocateSystemMemory(nv_dev->pDevice,
|
||||
layout,
|
||||
type,
|
||||
p->memory_size,
|
||||
&p->compressible);
|
||||
}
|
||||
@@ -575,11 +583,13 @@ int nv_drm_dumb_map_offset(struct drm_file *file,
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_DRIVER_HAS_DUMB_DESTROY)
|
||||
int nv_drm_dumb_destroy(struct drm_file *file,
|
||||
struct drm_device *dev,
|
||||
uint32_t handle)
|
||||
{
|
||||
return drm_gem_handle_delete(file, handle);
|
||||
}
|
||||
#endif /* NV_DRM_DRIVER_HAS_DUMB_DESTROY */
|
||||
|
||||
#endif
|
||||
|
||||
@@ -97,9 +97,11 @@ int nv_drm_dumb_map_offset(struct drm_file *file,
|
||||
struct drm_device *dev, uint32_t handle,
|
||||
uint64_t *offset);
|
||||
|
||||
#if defined(NV_DRM_DRIVER_HAS_DUMB_DESTROY)
|
||||
int nv_drm_dumb_destroy(struct drm_file *file,
|
||||
struct drm_device *dev,
|
||||
uint32_t handle);
|
||||
#endif /* NV_DRM_DRIVER_HAS_DUMB_DESTROY */
|
||||
|
||||
struct drm_gem_object *nv_drm_gem_nvkms_prime_import(
|
||||
struct drm_device *dev,
|
||||
|
||||
@@ -92,9 +92,9 @@ static int __nv_drm_gem_user_memory_mmap(struct nv_drm_gem_object *nv_gem,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
vma->vm_flags &= ~VM_PFNMAP;
|
||||
vma->vm_flags &= ~VM_IO;
|
||||
vma->vm_flags |= VM_MIXEDMAP;
|
||||
nv_vm_flags_clear(vma, VM_PFNMAP);
|
||||
nv_vm_flags_clear(vma, VM_IO);
|
||||
nv_vm_flags_set(vma, VM_MIXEDMAP);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -112,8 +112,7 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
|
||||
|
||||
page_offset = vmf->pgoff - drm_vma_node_start(&gem->vma_node);
|
||||
|
||||
BUG_ON(page_offset > nv_user_memory->pages_count);
|
||||
|
||||
BUG_ON(page_offset >= nv_user_memory->pages_count);
|
||||
ret = vm_insert_page(vma, address, nv_user_memory->pages[page_offset]);
|
||||
switch (ret) {
|
||||
case 0:
|
||||
|
||||
@@ -299,7 +299,7 @@ int nv_drm_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
vma->vm_flags &= ~VM_MAYWRITE;
|
||||
nv_vm_flags_clear(vma, VM_MAYWRITE);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -199,6 +199,8 @@ struct drm_nvidia_gem_map_offset_params {
|
||||
uint64_t offset; /* OUT Fake offset */
|
||||
};
|
||||
|
||||
#define NV_GEM_ALLOC_NO_SCANOUT (1 << 0)
|
||||
|
||||
struct drm_nvidia_gem_alloc_nvkms_memory_params {
|
||||
uint32_t handle; /* OUT */
|
||||
uint8_t block_linear; /* IN */
|
||||
@@ -206,6 +208,7 @@ struct drm_nvidia_gem_alloc_nvkms_memory_params {
|
||||
uint16_t __pad;
|
||||
|
||||
uint64_t memory_size; /* IN */
|
||||
uint32_t flags; /* IN */
|
||||
};
|
||||
|
||||
struct drm_nvidia_gem_export_dmabuf_memory_params {
|
||||
|
||||
@@ -47,6 +47,14 @@ module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);
|
||||
|
||||
void *nv_drm_calloc(size_t nmemb, size_t size)
|
||||
{
|
||||
size_t total_size = nmemb * size;
|
||||
//
|
||||
// Check for overflow.
|
||||
//
|
||||
if ((nmemb != 0) && ((total_size / nmemb) != size))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
return kzalloc(nmemb * size, GFP_KERNEL);
|
||||
}
|
||||
|
||||
@@ -93,8 +101,6 @@ int nv_drm_lock_user_pages(unsigned long address,
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct page **user_pages;
|
||||
const int write = 1;
|
||||
const int force = 0;
|
||||
int pages_pinned;
|
||||
|
||||
user_pages = nv_drm_calloc(pages_count, sizeof(*user_pages));
|
||||
@@ -105,7 +111,7 @@ int nv_drm_lock_user_pages(unsigned long address,
|
||||
|
||||
nv_mmap_read_lock(mm);
|
||||
|
||||
pages_pinned = NV_GET_USER_PAGES(address, pages_count, write, force,
|
||||
pages_pinned = NV_PIN_USER_PAGES(address, pages_count, FOLL_WRITE,
|
||||
user_pages, NULL);
|
||||
nv_mmap_read_unlock(mm);
|
||||
|
||||
@@ -123,7 +129,7 @@ failed:
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pages_pinned; i++) {
|
||||
put_page(user_pages[i]);
|
||||
NV_UNPIN_USER_PAGE(user_pages[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -138,8 +144,7 @@ void nv_drm_unlock_user_pages(unsigned long pages_count, struct page **pages)
|
||||
|
||||
for (i = 0; i < pages_count; i++) {
|
||||
set_page_dirty_lock(pages[i]);
|
||||
|
||||
put_page(pages[i]);
|
||||
NV_UNPIN_USER_PAGE(pages[i]);
|
||||
}
|
||||
|
||||
nv_drm_free(pages);
|
||||
@@ -174,16 +179,7 @@ static void __exit nv_linux_drm_exit(void)
|
||||
module_init(nv_linux_drm_init);
|
||||
module_exit(nv_linux_drm_exit);
|
||||
|
||||
#if defined(MODULE_LICENSE)
|
||||
|
||||
MODULE_LICENSE("Dual MIT/GPL");
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
#if defined(MODULE_INFO)
|
||||
MODULE_INFO(supported, "external");
|
||||
#endif
|
||||
#if defined(MODULE_VERSION)
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
#endif
|
||||
MODULE_INFO(supported, "external");
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
|
||||
@@ -93,9 +93,6 @@ static bool __will_generate_flip_event(struct drm_crtc *crtc,
|
||||
to_nv_crtc_state(new_crtc_state);
|
||||
struct drm_plane_state *old_plane_state = NULL;
|
||||
struct drm_plane *plane = NULL;
|
||||
struct drm_plane *primary_plane = crtc->primary;
|
||||
bool primary_event = false;
|
||||
bool overlay_event = false;
|
||||
int i;
|
||||
|
||||
if (!old_crtc_state->active && !new_crtc_state->active) {
|
||||
@@ -134,16 +131,19 @@ static int __nv_drm_put_back_post_fence_fd(
|
||||
const struct NvKmsKapiLayerReplyConfig *layer_reply_config)
|
||||
{
|
||||
int fd = layer_reply_config->postSyncptFd;
|
||||
int ret = 0;
|
||||
|
||||
if ((fd >= 0) && (plane_state->fd_user_ptr != NULL)) {
|
||||
if (put_user(fd, plane_state->fd_user_ptr)) {
|
||||
return -EFAULT;
|
||||
ret = copy_to_user(plane_state->fd_user_ptr, &fd, sizeof(fd));
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*! set back to Null and let set_property specify it again */
|
||||
plane_state->fd_user_ptr = NULL;
|
||||
}
|
||||
return 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __nv_drm_get_syncpt_data(
|
||||
@@ -274,6 +274,9 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
|
||||
|
||||
nv_new_crtc_state->nv_flip = NULL;
|
||||
}
|
||||
#if defined(NV_DRM_CRTC_STATE_HAS_VRR_ENABLED)
|
||||
requested_config->headRequestedConfig[nv_crtc->head].modeSetConfig.vrrEnabled = new_crtc_state->vrr_enabled;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -347,6 +347,9 @@ static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
|
||||
&nv_fence->lock, nv_fence_context->context,
|
||||
seqno);
|
||||
|
||||
/* The context maintains a reference to any pending fences. */
|
||||
nv_dma_fence_get(&nv_fence->base);
|
||||
|
||||
list_add_tail(&nv_fence->list_entry, &nv_fence_context->pending);
|
||||
|
||||
nv_fence_context->last_seqno = seqno;
|
||||
@@ -512,6 +515,9 @@ int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
|
||||
nv_dma_resv_unlock(&nv_gem->resv);
|
||||
|
||||
/* dma_resv_add_excl_fence takes its own reference to the fence. */
|
||||
nv_dma_fence_put(fence);
|
||||
|
||||
fence_context_create_fence_failed:
|
||||
nv_drm_gem_object_unreference_unlocked(&nv_gem_fence_context->base);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -122,6 +122,11 @@ struct nv_drm_device {
|
||||
NvBool supportsSyncpts;
|
||||
|
||||
struct drm_property *nv_out_fence_property;
|
||||
struct drm_property *nv_input_colorspace_property;
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct drm_property *nv_hdr_output_metadata_property;
|
||||
#endif
|
||||
|
||||
struct nv_drm_device *next;
|
||||
};
|
||||
|
||||
@@ -59,11 +59,14 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages_remote
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages_remote
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_lookup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_state_ref_counting
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_driver_has_gem_prime_res_obj
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_connector_dpms
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_funcs_have_mode_in_name
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_has_vrr_capable_property
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_framebuffer_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
|
||||
@@ -100,6 +103,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_has_resv
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_async_flip
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_pageflip_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_vrr_enabled
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_modifiers_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_node_is_allowed_has_tag_arg
|
||||
@@ -115,6 +119,10 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_plane_atomic_check_has_atomic_state_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_device_has_pdev
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_no_vblank
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_config_has_allow_fb_modifiers
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_has_hdr_output_metadata
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_add_fence
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_reserve_fences
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fences_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
|
||||
|
||||
@@ -169,7 +169,6 @@ void nv_kthread_q_stop(nv_kthread_q_t *q)
|
||||
//
|
||||
// This function is never invoked when there is no NUMA preference (preferred
|
||||
// node is NUMA_NO_NODE).
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
nv_kthread_q_t *q,
|
||||
int preferred_node,
|
||||
@@ -217,7 +216,6 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
return thread[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
|
||||
{
|
||||
@@ -231,11 +229,7 @@ int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferr
|
||||
q->q_kthread = kthread_create(_main_loop, q, q_name);
|
||||
}
|
||||
else {
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
|
||||
#else
|
||||
return -ENOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (IS_ERR(q->q_kthread)) {
|
||||
|
||||
@@ -34,6 +34,9 @@
|
||||
#include <linux/file.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/freezer.h>
|
||||
|
||||
#include <acpi/video.h>
|
||||
|
||||
#include "nvstatus.h"
|
||||
|
||||
@@ -59,7 +62,7 @@
|
||||
|
||||
#define NVKMS_LOG_PREFIX "nvidia-modeset: "
|
||||
|
||||
static bool output_rounding_fix = false;
|
||||
static bool output_rounding_fix = true;
|
||||
module_param_named(output_rounding_fix, output_rounding_fix, bool, 0400);
|
||||
|
||||
/* These parameters are used for fault injection tests. Normally the defaults
|
||||
@@ -85,110 +88,6 @@ NvBool nvkms_output_rounding_fix(void)
|
||||
* NVKMS interface for nvhost unit for sync point APIs.
|
||||
*************************************************************************/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef NVKMS_SYNCPT_STUBS_NEEDED
|
||||
/* Unsupported STUB for nvkms_syncpt APIs */
|
||||
NvBool nvkms_syncpt_op(
|
||||
@@ -284,7 +183,10 @@ static inline int nvkms_read_trylock_pm_lock(void)
|
||||
|
||||
static inline void nvkms_read_lock_pm_lock(void)
|
||||
{
|
||||
down_read(&nvkms_pm_lock);
|
||||
while (!down_read_trylock(&nvkms_pm_lock)) {
|
||||
try_to_freeze();
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void nvkms_read_unlock_pm_lock(void)
|
||||
@@ -1060,6 +962,12 @@ nvkms_register_backlight(NvU32 gpu_id, NvU32 display_id, void *drv_priv,
|
||||
struct nvkms_backlight_device *nvkms_bd = NULL;
|
||||
int i;
|
||||
|
||||
#if defined(NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE)
|
||||
if (!acpi_video_backlight_use_native()) {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
gpu_info = nvkms_alloc(NV_MAX_GPUS * sizeof(*gpu_info), NV_TRUE);
|
||||
if (gpu_info == NULL) {
|
||||
return NULL;
|
||||
@@ -1182,7 +1090,7 @@ failed:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void nvkms_close_common(struct nvkms_per_open *popen)
|
||||
void nvkms_close_pm_locked(struct nvkms_per_open *popen)
|
||||
{
|
||||
/*
|
||||
* Don't use down_interruptible(): we need to free resources
|
||||
@@ -1220,13 +1128,13 @@ void nvkms_close_common(struct nvkms_per_open *popen)
|
||||
nvkms_free(popen, sizeof(*popen));
|
||||
}
|
||||
|
||||
static void nvkms_close_deferred(void *data)
|
||||
static void nvkms_close_pm_unlocked(void *data)
|
||||
{
|
||||
struct nvkms_per_open *popen = data;
|
||||
|
||||
nvkms_read_lock_pm_lock();
|
||||
|
||||
nvkms_close_common(popen);
|
||||
nvkms_close_pm_locked(popen);
|
||||
|
||||
nvkms_read_unlock_pm_lock();
|
||||
}
|
||||
@@ -1234,11 +1142,11 @@ static void nvkms_close_deferred(void *data)
|
||||
static void nvkms_close_popen(struct nvkms_per_open *popen)
|
||||
{
|
||||
if (nvkms_read_trylock_pm_lock() == 0) {
|
||||
nvkms_close_common(popen);
|
||||
nvkms_close_pm_locked(popen);
|
||||
nvkms_read_unlock_pm_lock();
|
||||
} else {
|
||||
nv_kthread_q_item_init(&popen->deferred_close_q_item,
|
||||
nvkms_close_deferred,
|
||||
nvkms_close_pm_unlocked,
|
||||
popen);
|
||||
nvkms_queue_work(&nvkms_deferred_close_kthread_q,
|
||||
&popen->deferred_close_q_item);
|
||||
@@ -1291,7 +1199,7 @@ struct nvkms_per_open* nvkms_open_from_kapi
|
||||
|
||||
void nvkms_close_from_kapi(struct nvkms_per_open *popen)
|
||||
{
|
||||
nvkms_close_popen(popen);
|
||||
nvkms_close_pm_unlocked(popen);
|
||||
}
|
||||
|
||||
NvBool nvkms_ioctl_from_kapi
|
||||
@@ -1450,29 +1358,7 @@ static void nvkms_proc_exit(void)
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
proc_remove(nvkms_proc_dir);
|
||||
#else
|
||||
/*
|
||||
* On kernel versions without proc_remove(), we need to explicitly
|
||||
* remove each proc file beneath nvkms_proc_dir.
|
||||
* nvkms_proc_init() only creates files directly under
|
||||
* nvkms_proc_dir, so those are the only files we need to remove
|
||||
* here: warn if there is any deeper directory nesting.
|
||||
*/
|
||||
{
|
||||
struct proc_dir_entry *entry = nvkms_proc_dir->subdir;
|
||||
|
||||
while (entry != NULL) {
|
||||
struct proc_dir_entry *next = entry->next;
|
||||
WARN_ON(entry->subdir != NULL);
|
||||
remove_proc_entry(entry->name, entry->parent);
|
||||
entry = next;
|
||||
}
|
||||
}
|
||||
|
||||
remove_proc_entry(nvkms_proc_dir->name, nvkms_proc_dir->parent);
|
||||
#endif /* NV_PROC_REMOVE_PRESENT */
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
}
|
||||
|
||||
@@ -1734,16 +1620,7 @@ restart:
|
||||
module_init(nvkms_init);
|
||||
module_exit(nvkms_exit);
|
||||
|
||||
#if defined(MODULE_LICENSE)
|
||||
|
||||
MODULE_LICENSE("Dual MIT/GPL");
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
#if defined(MODULE_INFO)
|
||||
MODULE_INFO(supported, "external");
|
||||
#endif
|
||||
#if defined(MODULE_VERSION)
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
#endif
|
||||
MODULE_INFO(supported, "external");
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
|
||||
@@ -85,15 +85,11 @@ $(obj)/$(NVIDIA_MODESET_INTERFACE): $(addprefix $(obj)/,$(NVIDIA_MODESET_OBJECTS
|
||||
|
||||
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_MODESET_OBJECTS)
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_kthread_create_on_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2011-2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2011-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -94,11 +94,10 @@ struct nvidia_p2p_params {
|
||||
} nvidia_p2p_params_t;
|
||||
|
||||
/*
|
||||
* Capability flag for users to detect
|
||||
* Macro for users to detect
|
||||
* driver support for persistent pages.
|
||||
*/
|
||||
extern int nvidia_p2p_cap_persistent_pages;
|
||||
#define NVIDIA_P2P_CAP_PERSISTENT_PAGES
|
||||
#define NVIDIA_P2P_CAP_GET_PAGES_PERSISTENT_API
|
||||
|
||||
/*
|
||||
* This API is not supported.
|
||||
@@ -173,15 +172,6 @@ struct nvidia_p2p_page_table {
|
||||
* A pointer to the function to be invoked when the pages
|
||||
* underlying the virtual address range are freed
|
||||
* implicitly.
|
||||
* If NULL, persistent pages will be returned.
|
||||
* This means the pages underlying the range of GPU virtual memory
|
||||
* will persist until explicitly freed by nvidia_p2p_put_pages().
|
||||
* Persistent GPU memory mappings are not supported on PowerPC,
|
||||
|
||||
|
||||
|
||||
* MIG-enabled devices and vGPU.
|
||||
|
||||
* @param[in] data
|
||||
* A non-NULL opaque pointer to private data to be passed to the
|
||||
* callback function.
|
||||
@@ -194,12 +184,48 @@ struct nvidia_p2p_page_table {
|
||||
* insufficient resources were available to complete the operation.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_get_pages(uint64_t p2p_token, uint32_t va_space,
|
||||
uint64_t virtual_address,
|
||||
int nvidia_p2p_get_pages( uint64_t p2p_token, uint32_t va_space,
|
||||
uint64_t virtual_address, uint64_t length,
|
||||
struct nvidia_p2p_page_table **page_table,
|
||||
void (*free_callback)(void *data), void *data);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Pin and make the pages underlying a range of GPU virtual memory
|
||||
* accessible to a third-party device. The pages will persist until
|
||||
* explicitly freed by nvidia_p2p_put_pages_persistent().
|
||||
*
|
||||
* Persistent GPU memory mappings are not supported on PowerPC,
|
||||
* MIG-enabled devices and vGPU.
|
||||
*
|
||||
* This API only supports pinned, GPU-resident memory, such as that provided
|
||||
* by cudaMalloc().
|
||||
*
|
||||
* This API may sleep.
|
||||
*
|
||||
* @param[in] virtual_address
|
||||
* The start address in the specified virtual address space.
|
||||
* Address must be aligned to the 64KB boundary.
|
||||
* @param[in] length
|
||||
* The length of the requested P2P mapping.
|
||||
* Length must be a multiple of 64KB.
|
||||
* @param[out] page_table
|
||||
* A pointer to an array of structures with P2P PTEs.
|
||||
* @param[in] flags
|
||||
* Must be set to zero for now.
|
||||
*
|
||||
* @return
|
||||
* 0 upon successful completion.
|
||||
* -EINVAL if an invalid argument was supplied.
|
||||
* -ENOTSUPP if the requested operation is not supported.
|
||||
* -ENOMEM if the driver failed to allocate memory or if
|
||||
* insufficient resources were available to complete the operation.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_get_pages_persistent(uint64_t virtual_address,
|
||||
uint64_t length,
|
||||
struct nvidia_p2p_page_table **page_table,
|
||||
void (*free_callback)(void *data),
|
||||
void *data);
|
||||
uint32_t flags);
|
||||
|
||||
#define NVIDIA_P2P_DMA_MAPPING_VERSION 0x00020003
|
||||
|
||||
@@ -272,6 +298,8 @@ int nvidia_p2p_dma_unmap_pages(struct pci_dev *peer,
|
||||
* Release a set of pages previously made accessible to
|
||||
* a third-party device.
|
||||
*
|
||||
* This API may sleep.
|
||||
*
|
||||
* @param[in] p2p_token
|
||||
* A token that uniquely identifies the P2P mapping.
|
||||
* @param[in] va_space
|
||||
@@ -286,10 +314,33 @@ int nvidia_p2p_dma_unmap_pages(struct pci_dev *peer,
|
||||
* -EINVAL if an invalid argument was supplied.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_put_pages(uint64_t p2p_token, uint32_t va_space,
|
||||
uint64_t virtual_address,
|
||||
int nvidia_p2p_put_pages(uint64_t p2p_token,
|
||||
uint32_t va_space, uint64_t virtual_address,
|
||||
struct nvidia_p2p_page_table *page_table);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Release a set of persistent pages previously made accessible to
|
||||
* a third-party device.
|
||||
*
|
||||
* This API may sleep.
|
||||
*
|
||||
* @param[in] virtual_address
|
||||
* The start address in the specified virtual address space.
|
||||
* @param[in] page_table
|
||||
* A pointer to the array of structures with P2P PTEs.
|
||||
* @param[in] flags
|
||||
* Must be set to zero for now.
|
||||
*
|
||||
* @return
|
||||
* 0 upon successful completion.
|
||||
* -EINVAL if an invalid argument was supplied.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_put_pages_persistent(uint64_t virtual_address,
|
||||
struct nvidia_p2p_page_table *page_table,
|
||||
uint32_t flags);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Free a third-party P2P page table. (This function is a no-op.)
|
||||
|
||||
@@ -30,8 +30,18 @@ NVIDIA_PEERMEM_CFLAGS += -UDEBUG -U_DEBUG -DNDEBUG -DNV_BUILD_MODULE_INSTANCES=0
|
||||
# MOFED's Module.symvers is needed for the build
|
||||
# to find the additional ib_* symbols.
|
||||
#
|
||||
# Also, MOFED doesn't use kbuild ARCH names.
|
||||
# So adapt OFA_ARCH to match MOFED's conventions.
|
||||
#
|
||||
ifeq ($(ARCH), arm64)
|
||||
OFA_ARCH := aarch64
|
||||
else ifeq ($(ARCH), powerpc)
|
||||
OFA_ARCH := ppc64le
|
||||
else
|
||||
OFA_ARCH := $(ARCH)
|
||||
endif
|
||||
OFA_DIR := /usr/src/ofa_kernel
|
||||
OFA_CANDIDATES = $(OFA_DIR)/$(ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
|
||||
OFA_CANDIDATES = $(OFA_DIR)/$(OFA_ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
|
||||
MLNX_OFED_KERNEL := $(shell for d in $(OFA_CANDIDATES); do \
|
||||
if [ -d "$$d" ]; then \
|
||||
echo "$$d"; \
|
||||
|
||||
@@ -284,8 +284,9 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
|
||||
static void nv_mem_put_pages_common(int nc,
|
||||
struct sg_table *sg_head,
|
||||
void *context)
|
||||
{
|
||||
int ret = 0;
|
||||
struct nv_mem_context *nv_mem_context =
|
||||
@@ -302,8 +303,13 @@ static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
|
||||
if (nv_mem_context->callback_task == current)
|
||||
return;
|
||||
|
||||
ret = nvidia_p2p_put_pages(0, 0, nv_mem_context->page_virt_start,
|
||||
nv_mem_context->page_table);
|
||||
if (nc) {
|
||||
ret = nvidia_p2p_put_pages_persistent(nv_mem_context->page_virt_start,
|
||||
nv_mem_context->page_table, 0);
|
||||
} else {
|
||||
ret = nvidia_p2p_put_pages(0, 0, nv_mem_context->page_virt_start,
|
||||
nv_mem_context->page_table);
|
||||
}
|
||||
|
||||
#ifdef _DEBUG_ONLY_
|
||||
/* Here we expect an error in real life cases that should be ignored - not printed.
|
||||
@@ -318,6 +324,16 @@ static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
|
||||
return;
|
||||
}
|
||||
|
||||
static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
|
||||
{
|
||||
nv_mem_put_pages_common(0, sg_head, context);
|
||||
}
|
||||
|
||||
static void nv_mem_put_pages_nc(struct sg_table *sg_head, void *context)
|
||||
{
|
||||
nv_mem_put_pages_common(1, sg_head, context);
|
||||
}
|
||||
|
||||
static void nv_mem_release(void *context)
|
||||
{
|
||||
struct nv_mem_context *nv_mem_context =
|
||||
@@ -396,8 +412,9 @@ static int nv_mem_get_pages_nc(unsigned long addr,
|
||||
nv_mem_context->core_context = core_context;
|
||||
nv_mem_context->page_size = GPU_PAGE_SIZE;
|
||||
|
||||
ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
|
||||
&nv_mem_context->page_table, NULL, NULL);
|
||||
ret = nvidia_p2p_get_pages_persistent(nv_mem_context->page_virt_start,
|
||||
nv_mem_context->mapped_size,
|
||||
&nv_mem_context->page_table, 0);
|
||||
if (ret < 0) {
|
||||
peer_err("error %d while calling nvidia_p2p_get_pages() with NULL callback\n", ret);
|
||||
return ret;
|
||||
@@ -407,13 +424,13 @@ static int nv_mem_get_pages_nc(unsigned long addr,
|
||||
}
|
||||
|
||||
static struct peer_memory_client nv_mem_client_nc = {
|
||||
.acquire = nv_mem_acquire,
|
||||
.get_pages = nv_mem_get_pages_nc,
|
||||
.dma_map = nv_dma_map,
|
||||
.dma_unmap = nv_dma_unmap,
|
||||
.put_pages = nv_mem_put_pages,
|
||||
.get_page_size = nv_mem_get_page_size,
|
||||
.release = nv_mem_release,
|
||||
.acquire = nv_mem_acquire,
|
||||
.get_pages = nv_mem_get_pages_nc,
|
||||
.dma_map = nv_dma_map,
|
||||
.dma_unmap = nv_dma_unmap,
|
||||
.put_pages = nv_mem_put_pages_nc,
|
||||
.get_page_size = nv_mem_get_page_size,
|
||||
.release = nv_mem_release,
|
||||
};
|
||||
|
||||
#endif /* NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT */
|
||||
@@ -477,9 +494,6 @@ static int __init nv_mem_client_init(void)
|
||||
}
|
||||
|
||||
// The nc client enables support for persistent pages.
|
||||
// Thanks to this check, nvidia-peermem requires the new symbol from nvidia.ko, which
|
||||
// prevents users to unintentionally load this module with unsupported nvidia.ko.
|
||||
BUG_ON(!nvidia_p2p_cap_persistent_pages);
|
||||
strcpy(nv_mem_client_nc.name, DRV_NAME "_nc");
|
||||
strcpy(nv_mem_client_nc.version, DRV_VERSION);
|
||||
reg_handle_nc = ib_register_peer_memory_client(&nv_mem_client_nc, NULL);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2021-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -116,6 +116,14 @@ extern "C" {
|
||||
#define NVA16F_GP_ENTRY1_LEVEL_MAIN 0x00000000
|
||||
#define NVA16F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
|
||||
#define NVA16F_GP_ENTRY1_LENGTH 30:10
|
||||
#define NVA16F_GP_ENTRY1_SYNC 31:31
|
||||
#define NVA16F_GP_ENTRY1_SYNC_PROCEED 0x00000000
|
||||
#define NVA16F_GP_ENTRY1_SYNC_WAIT 0x00000001
|
||||
#define NVA16F_GP_ENTRY1_OPCODE 7:0
|
||||
#define NVA16F_GP_ENTRY1_OPCODE_NOP 0x00000000
|
||||
#define NVA16F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
|
||||
#define NVA16F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
|
||||
#define NVA16F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
|
||||
/* dma method formats */
|
||||
#define NVA16F_DMA_METHOD_ADDRESS 11:0
|
||||
|
||||
375
kernel-open/nvidia-uvm/clc86f.h
Normal file
375
kernel-open/nvidia-uvm/clc86f.h
Normal file
@@ -0,0 +1,375 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2012-2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#ifndef _clc86f_h_
|
||||
#define _clc86f_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
/* class HOPPER_CHANNEL_GPFIFO */
|
||||
/*
|
||||
* Documentation for HOPPER_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
|
||||
* chapter "User Control Registers". It is documented as device NV_UDMA.
|
||||
* The GPFIFO format itself is also documented in dev_pbdma.ref,
|
||||
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
|
||||
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
|
||||
*
|
||||
* Note there is no .mfs file for this class.
|
||||
*/
|
||||
#define HOPPER_CHANNEL_GPFIFO_A (0x0000C86F)
|
||||
|
||||
#define NVC86F_TYPEDEF HOPPER_CHANNELChannelGPFifoA
|
||||
|
||||
/* dma flow control data structure */
|
||||
typedef volatile struct Nvc86fControl_struct {
|
||||
NvU32 Ignored00[0x010]; /* 0000-003f*/
|
||||
NvU32 Put; /* put offset, read/write 0040-0043*/
|
||||
NvU32 Get; /* get offset, read only 0044-0047*/
|
||||
NvU32 Reference; /* reference value, read only 0048-004b*/
|
||||
NvU32 PutHi; /* high order put offset bits 004c-004f*/
|
||||
NvU32 Ignored01[0x002]; /* 0050-0057*/
|
||||
NvU32 TopLevelGet; /* top level get offset, read only 0058-005b*/
|
||||
NvU32 TopLevelGetHi; /* high order top level get bits 005c-005f*/
|
||||
NvU32 GetHi; /* high order get offset bits 0060-0063*/
|
||||
NvU32 Ignored02[0x007]; /* 0064-007f*/
|
||||
NvU32 Ignored03; /* used to be engine yield 0080-0083*/
|
||||
NvU32 Ignored04[0x001]; /* 0084-0087*/
|
||||
NvU32 GPGet; /* GP FIFO get offset, read only 0088-008b*/
|
||||
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
|
||||
NvU32 Ignored05[0x5c];
|
||||
} Nvc86fControl, HopperAControlGPFifo;
|
||||
|
||||
/* fields and values */
|
||||
#define NVC86F_NUMBER_OF_SUBCHANNELS (8)
|
||||
#define NVC86F_SET_OBJECT (0x00000000)
|
||||
#define NVC86F_SET_OBJECT_NVCLASS 15:0
|
||||
#define NVC86F_SET_OBJECT_ENGINE 20:16
|
||||
#define NVC86F_SET_OBJECT_ENGINE_SW 0x0000001f
|
||||
#define NVC86F_ILLEGAL (0x00000004)
|
||||
#define NVC86F_ILLEGAL_HANDLE 31:0
|
||||
#define NVC86F_NOP (0x00000008)
|
||||
#define NVC86F_NOP_HANDLE 31:0
|
||||
#define NVC86F_SEMAPHOREA (0x00000010)
|
||||
#define NVC86F_SEMAPHOREA_OFFSET_UPPER 7:0
|
||||
#define NVC86F_SEMAPHOREB (0x00000014)
|
||||
#define NVC86F_SEMAPHOREB_OFFSET_LOWER 31:2
|
||||
#define NVC86F_SEMAPHOREC (0x00000018)
|
||||
#define NVC86F_SEMAPHOREC_PAYLOAD 31:0
|
||||
#define NVC86F_SEMAPHORED (0x0000001C)
|
||||
#define NVC86F_SEMAPHORED_OPERATION 4:0
|
||||
#define NVC86F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
|
||||
#define NVC86F_SEMAPHORED_OPERATION_RELEASE 0x00000002
|
||||
#define NVC86F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
|
||||
#define NVC86F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
|
||||
#define NVC86F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
|
||||
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH 12:12
|
||||
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
|
||||
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
|
||||
#define NVC86F_SEMAPHORED_RELEASE_WFI 20:20
|
||||
#define NVC86F_SEMAPHORED_RELEASE_WFI_EN 0x00000000
|
||||
#define NVC86F_SEMAPHORED_RELEASE_WFI_DIS 0x00000001
|
||||
#define NVC86F_SEMAPHORED_RELEASE_SIZE 24:24
|
||||
#define NVC86F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
|
||||
#define NVC86F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
|
||||
#define NVC86F_SEMAPHORED_REDUCTION 30:27
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_MIN 0x00000000
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_MAX 0x00000001
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_XOR 0x00000002
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_AND 0x00000003
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_OR 0x00000004
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_ADD 0x00000005
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_INC 0x00000006
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_DEC 0x00000007
|
||||
#define NVC86F_SEMAPHORED_FORMAT 31:31
|
||||
#define NVC86F_SEMAPHORED_FORMAT_SIGNED 0x00000000
|
||||
#define NVC86F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
|
||||
#define NVC86F_NON_STALL_INTERRUPT (0x00000020)
|
||||
#define NVC86F_NON_STALL_INTERRUPT_HANDLE 31:0
|
||||
#define NVC86F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
|
||||
#define NVC86F_FB_FLUSH_HANDLE 31:0
|
||||
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
|
||||
// specifying the page address for a targeted TLB invalidate and the uTLB for
|
||||
// a targeted REPLAY_CANCEL for UVM.
|
||||
// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
|
||||
// rearranged fields.
|
||||
#define NVC86F_MEM_OP_A (0x00000028)
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID 5:0 // only relevant for REPLAY_CANCEL_TARGETED
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE 5:0 // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID 10:6 // only relevant for REPLAY_CANCEL_TARGETED
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE 7:6 // only relevant for invalidates with NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE for invalidating link TLB only, or non-link TLB only or all TLBs
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_ALL_TLBS 0
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_LINK_TLBS 1
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_NON_LINK_TLBS 2
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_RSVRVD 3
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID 8:0 // only relevant for REPLAY_CANCEL_VA_GLOBAL
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR 11:11
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN 0x00000001
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS 0x00000000
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO 31:12
|
||||
#define NVC86F_MEM_OP_B (0x0000002c)
|
||||
#define NVC86F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI 31:0
|
||||
#define NVC86F_MEM_OP_C (0x00000030)
|
||||
#define NVC86F_MEM_OP_C_MEMBAR_TYPE 2:0
|
||||
#define NVC86F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR 0x00000000
|
||||
#define NVC86F_MEM_OP_C_MEMBAR_TYPE_MEMBAR 0x00000001
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB 0:0
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE 0x00000000
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL 0x00000001 // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_GPC 1:1
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE 0x00000000
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE 0x00000001
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY 4:2 // only relevant if GPC ENABLE
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE 0x00000000
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START 0x00000001
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL 0x00000002
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED 0x00000003
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL 0x00000004
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL 0x00000005
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE 6:5 // only relevant if GPC ENABLE
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE 0x00000000
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY 0x00000001
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE 0x00000002
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE 9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ 0
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE 1
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG 2
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD 3
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK 4
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL 5
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC 6
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL 7
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL 9:7 // Invalidate affects this level and all below
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL 0x00000000 // Invalidate tlb caches at all levels of the page table
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY 0x00000001
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0 0x00000002
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1 0x00000003
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2 0x00000004
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 0x00000005
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 0x00000006
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5 0x00000007
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE 11:10 // only relevant if PDB_ONE
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM 0x00000000
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT 0x00000002
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO 31:12 // only relevant if PDB_ONE
|
||||
#define NVC86F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG 19:0
|
||||
// MEM_OP_D MUST be preceded by MEM_OPs A-C.
|
||||
#define NVC86F_MEM_OP_D (0x00000034)
|
||||
#define NVC86F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI 26:0 // only relevant if PDB_ONE
|
||||
#define NVC86F_MEM_OP_D_OPERATION 31:27
|
||||
#define NVC86F_MEM_OP_D_OPERATION_MEMBAR 0x00000005
|
||||
#define NVC86F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009
|
||||
#define NVC86F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED 0x0000000a
|
||||
#define NVC86F_MEM_OP_D_OPERATION_MMU_OPERATION 0x0000000b
|
||||
#define NVC86F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
|
||||
#define NVC86F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
|
||||
// CLEAN_LINES is an alias for Tegra/GPU IP usage
|
||||
#define NVC86F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
|
||||
#define NVC86F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
|
||||
#define NVC86F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
|
||||
#define NVC86F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015
|
||||
#define NVC86F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR 0x00000016
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE 1:0
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC 0x00000000
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC 0x00000001
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL 0x00000002
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED 0x00000003
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE 2:2
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC 0x00000000
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC 0x00000001
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK 6:3
|
||||
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE 23:20
|
||||
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE_RESERVED 0x00000000
|
||||
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE_VIDMEM_ACCESS_BIT_DUMP 0x00000001
|
||||
#define NVC86F_SET_REFERENCE (0x00000050)
|
||||
#define NVC86F_SET_REFERENCE_COUNT 31:0
|
||||
#define NVC86F_SEM_ADDR_LO (0x0000005c)
|
||||
#define NVC86F_SEM_ADDR_LO_OFFSET 31:2
|
||||
#define NVC86F_SEM_ADDR_HI (0x00000060)
|
||||
#define NVC86F_SEM_ADDR_HI_OFFSET 24:0
|
||||
#define NVC86F_SEM_PAYLOAD_LO (0x00000064)
|
||||
#define NVC86F_SEM_PAYLOAD_LO_PAYLOAD 31:0
|
||||
#define NVC86F_SEM_PAYLOAD_HI (0x00000068)
|
||||
#define NVC86F_SEM_PAYLOAD_HI_PAYLOAD 31:0
|
||||
#define NVC86F_SEM_EXECUTE (0x0000006c)
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION 2:0
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ 0x00000002
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ 0x00000003
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_AND 0x00000004
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_NOR 0x00000005
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_REDUCTION 0x00000006
|
||||
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG 12:12
|
||||
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_WFI 20:20
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_WFI_EN 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
|
||||
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP 25:25
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION 30:27
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IMIN 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IMAX 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IXOR 0x00000002
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IAND 0x00000003
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IOR 0x00000004
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IADD 0x00000005
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_INC 0x00000006
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_DEC 0x00000007
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT 31:31
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED 0x00000001
|
||||
#define NVC86F_WFI (0x00000078)
|
||||
#define NVC86F_WFI_SCOPE 0:0
|
||||
#define NVC86F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
|
||||
#define NVC86F_WFI_SCOPE_CURRENT_VEID 0x00000000
|
||||
#define NVC86F_WFI_SCOPE_ALL 0x00000001
|
||||
#define NVC86F_YIELD (0x00000080)
|
||||
#define NVC86F_YIELD_OP 1:0
|
||||
#define NVC86F_YIELD_OP_NOP 0x00000000
|
||||
#define NVC86F_YIELD_OP_TSG 0x00000003
|
||||
#define NVC86F_CLEAR_FAULTED (0x00000084)
|
||||
// Note: RM provides the HANDLE as an opaque value; the internal detail fields
|
||||
// are intentionally not exposed to the driver through these defines.
|
||||
#define NVC86F_CLEAR_FAULTED_HANDLE 30:0
|
||||
#define NVC86F_CLEAR_FAULTED_TYPE 31:31
|
||||
#define NVC86F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
|
||||
#define NVC86F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
|
||||
#define NVC86F_QUADRO_VERIFY (0x000000a0)
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVC86F_GP_ENTRY__SIZE 8
|
||||
#define NVC86F_GP_ENTRY0_FETCH 0:0
|
||||
#define NVC86F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
|
||||
#define NVC86F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
|
||||
#define NVC86F_GP_ENTRY0_GET 31:2
|
||||
#define NVC86F_GP_ENTRY0_OPERAND 31:0
|
||||
#define NVC86F_GP_ENTRY0_PB_EXTENDED_BASE_OPERAND 24:8
|
||||
#define NVC86F_GP_ENTRY1_GET_HI 7:0
|
||||
#define NVC86F_GP_ENTRY1_LEVEL 9:9
|
||||
#define NVC86F_GP_ENTRY1_LEVEL_MAIN 0x00000000
|
||||
#define NVC86F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
|
||||
#define NVC86F_GP_ENTRY1_LENGTH 30:10
|
||||
#define NVC86F_GP_ENTRY1_SYNC 31:31
|
||||
#define NVC86F_GP_ENTRY1_SYNC_PROCEED 0x00000000
|
||||
#define NVC86F_GP_ENTRY1_SYNC_WAIT 0x00000001
|
||||
#define NVC86F_GP_ENTRY1_OPCODE 7:0
|
||||
#define NVC86F_GP_ENTRY1_OPCODE_NOP 0x00000000
|
||||
#define NVC86F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
|
||||
#define NVC86F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
|
||||
#define NVC86F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
#define NVC86F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE 0x00000004
|
||||
|
||||
/* dma method formats */
|
||||
#define NVC86F_DMA_METHOD_ADDRESS_OLD 12:2
|
||||
#define NVC86F_DMA_METHOD_ADDRESS 11:0
|
||||
#define NVC86F_DMA_SUBDEVICE_MASK 15:4
|
||||
#define NVC86F_DMA_METHOD_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_TERT_OP 17:16
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
|
||||
#define NVC86F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
|
||||
#define NVC86F_DMA_METHOD_COUNT_OLD 28:18
|
||||
#define NVC86F_DMA_METHOD_COUNT 28:16
|
||||
#define NVC86F_DMA_IMMD_DATA 28:16
|
||||
#define NVC86F_DMA_SEC_OP 31:29
|
||||
#define NVC86F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
|
||||
#define NVC86F_DMA_SEC_OP_INC_METHOD (0x00000001)
|
||||
#define NVC86F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
|
||||
#define NVC86F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
|
||||
#define NVC86F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
|
||||
#define NVC86F_DMA_SEC_OP_ONE_INC (0x00000005)
|
||||
#define NVC86F_DMA_SEC_OP_RESERVED6 (0x00000006)
|
||||
#define NVC86F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
|
||||
/* dma incrementing method format */
|
||||
#define NVC86F_DMA_INCR_ADDRESS 11:0
|
||||
#define NVC86F_DMA_INCR_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_INCR_COUNT 28:16
|
||||
#define NVC86F_DMA_INCR_OPCODE 31:29
|
||||
#define NVC86F_DMA_INCR_OPCODE_VALUE (0x00000001)
|
||||
#define NVC86F_DMA_INCR_DATA 31:0
|
||||
/* dma non-incrementing method format */
|
||||
#define NVC86F_DMA_NONINCR_ADDRESS 11:0
|
||||
#define NVC86F_DMA_NONINCR_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_NONINCR_COUNT 28:16
|
||||
#define NVC86F_DMA_NONINCR_OPCODE 31:29
|
||||
#define NVC86F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
|
||||
#define NVC86F_DMA_NONINCR_DATA 31:0
|
||||
/* dma increment-once method format */
|
||||
#define NVC86F_DMA_ONEINCR_ADDRESS 11:0
|
||||
#define NVC86F_DMA_ONEINCR_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_ONEINCR_COUNT 28:16
|
||||
#define NVC86F_DMA_ONEINCR_OPCODE 31:29
|
||||
#define NVC86F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
|
||||
#define NVC86F_DMA_ONEINCR_DATA 31:0
|
||||
/* dma no-operation format */
|
||||
#define NVC86F_DMA_NOP (0x00000000)
|
||||
/* dma immediate-data format */
|
||||
#define NVC86F_DMA_IMMD_ADDRESS 11:0
|
||||
#define NVC86F_DMA_IMMD_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_IMMD_DATA 28:16
|
||||
#define NVC86F_DMA_IMMD_OPCODE 31:29
|
||||
#define NVC86F_DMA_IMMD_OPCODE_VALUE (0x00000004)
|
||||
/* dma set sub-device mask format */
|
||||
#define NVC86F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC86F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC86F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
|
||||
/* dma store sub-device mask format */
|
||||
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
|
||||
/* dma use sub-device mask format */
|
||||
#define NVC86F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC86F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
|
||||
/* dma end-segment format */
|
||||
#define NVC86F_DMA_ENDSEG_OPCODE 31:29
|
||||
#define NVC86F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
|
||||
/* dma legacy incrementing/non-incrementing formats */
|
||||
#define NVC86F_DMA_ADDRESS 12:2
|
||||
#define NVC86F_DMA_SUBCH 15:13
|
||||
#define NVC86F_DMA_OPCODE3 17:16
|
||||
#define NVC86F_DMA_OPCODE3_NONE (0x00000000)
|
||||
#define NVC86F_DMA_COUNT 28:18
|
||||
#define NVC86F_DMA_OPCODE 31:29
|
||||
#define NVC86F_DMA_OPCODE_METHOD (0x00000000)
|
||||
#define NVC86F_DMA_OPCODE_NONINC_METHOD (0x00000002)
|
||||
#define NVC86F_DMA_DATA 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* _clc86f_h_ */
|
||||
430
kernel-open/nvidia-uvm/clc8b5.h
Normal file
430
kernel-open/nvidia-uvm/clc8b5.h
Normal file
@@ -0,0 +1,430 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 1993-2004 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#ifndef _clc8b5_h_
|
||||
#define _clc8b5_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define HOPPER_DMA_COPY_A (0x0000C8B5)
|
||||
|
||||
typedef volatile struct _clc8b5_tag0 {
|
||||
NvV32 Reserved00[0x40];
|
||||
NvV32 Nop; // 0x00000100 - 0x00000103
|
||||
NvV32 Reserved01[0xF];
|
||||
NvV32 PmTrigger; // 0x00000140 - 0x00000143
|
||||
NvV32 Reserved02[0x36];
|
||||
NvV32 SetMonitoredFenceType; // 0x0000021C - 0x0000021F
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseUpper; // 0x00000220 - 0x00000223
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseLower; // 0x00000224 - 0x00000227
|
||||
NvV32 Reserved03[0x6];
|
||||
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
|
||||
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
|
||||
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
|
||||
NvV32 SetSemaphorePayloadUpper; // 0x0000024C - 0x0000024F
|
||||
NvV32 Reserved04[0x1];
|
||||
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
|
||||
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
|
||||
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
|
||||
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
|
||||
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
|
||||
NvV32 Reserved05[0x26];
|
||||
NvV32 LaunchDma; // 0x00000300 - 0x00000303
|
||||
NvV32 Reserved06[0x3F];
|
||||
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
|
||||
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
|
||||
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
|
||||
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
|
||||
NvV32 PitchIn; // 0x00000410 - 0x00000413
|
||||
NvV32 PitchOut; // 0x00000414 - 0x00000417
|
||||
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
|
||||
NvV32 LineCount; // 0x0000041C - 0x0000041F
|
||||
NvV32 Reserved07[0x38];
|
||||
NvV32 SetSecureCopyMode; // 0x00000500 - 0x00000503
|
||||
NvV32 SetDecryptIv0; // 0x00000504 - 0x00000507
|
||||
NvV32 SetDecryptIv1; // 0x00000508 - 0x0000050B
|
||||
NvV32 SetDecryptIv2; // 0x0000050C - 0x0000050F
|
||||
NvV32 Reserved_SetAESCounter; // 0x00000510 - 0x00000513
|
||||
NvV32 SetDecryptAuthTagCompareAddrUpper; // 0x00000514 - 0x00000517
|
||||
NvV32 SetDecryptAuthTagCompareAddrLower; // 0x00000518 - 0x0000051B
|
||||
NvV32 Reserved08[0x5];
|
||||
NvV32 SetEncryptAuthTagAddrUpper; // 0x00000530 - 0x00000533
|
||||
NvV32 SetEncryptAuthTagAddrLower; // 0x00000534 - 0x00000537
|
||||
NvV32 SetEncryptIvAddrUpper; // 0x00000538 - 0x0000053B
|
||||
NvV32 SetEncryptIvAddrLower; // 0x0000053C - 0x0000053F
|
||||
NvV32 Reserved09[0x6F];
|
||||
NvV32 SetMemoryScrubParameters; // 0x000006FC - 0x000006FF
|
||||
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
|
||||
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
|
||||
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
|
||||
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
|
||||
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
|
||||
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
|
||||
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
|
||||
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
|
||||
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
|
||||
NvV32 Reserved10[0x1];
|
||||
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
|
||||
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
|
||||
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
|
||||
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
|
||||
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
|
||||
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
|
||||
NvV32 Reserved11[0x1];
|
||||
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
|
||||
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
|
||||
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
|
||||
NvV32 DstOriginY; // 0x00000750 - 0x00000753
|
||||
NvV32 Reserved12[0x270];
|
||||
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
|
||||
NvV32 Reserved13[0x3BA];
|
||||
} hopper_dma_copy_aControlPio;
|
||||
|
||||
#define NVC8B5_NOP (0x00000100)
|
||||
#define NVC8B5_NOP_PARAMETER 31:0
|
||||
#define NVC8B5_PM_TRIGGER (0x00000140)
|
||||
#define NVC8B5_PM_TRIGGER_V 31:0
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE (0x0000021C)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE 0:0
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE (0x00000000)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE_EXT (0x00000001)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER (0x00000220)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER_UPPER 24:0
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER (0x00000224)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER_LOWER 31:0
|
||||
#define NVC8B5_SET_SEMAPHORE_A (0x00000240)
|
||||
#define NVC8B5_SET_SEMAPHORE_A_UPPER 24:0
|
||||
#define NVC8B5_SET_SEMAPHORE_B (0x00000244)
|
||||
#define NVC8B5_SET_SEMAPHORE_B_LOWER 31:0
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_UPPER (0x0000024C)
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD 31:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_A (0x00000254)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_A_UPPER 24:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_B (0x00000258)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_B_LOWER 31:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C (0x0000025C)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE 2:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE (0x00000260)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET 1:0
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_FLA 9:9
|
||||
#define NVC8B5_SET_DST_PHYS_MODE (0x00000264)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET 1:0
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_FLA 9:9
|
||||
#define NVC8B5_LAUNCH_DMA (0x00000300)
|
||||
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_TYPE 25:25
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_NO_TIMESTAMP (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_WITH_TIMESTAMP (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE (0x00000003)
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
|
||||
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
|
||||
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE 10:10
|
||||
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE 11:11
|
||||
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_TYPE 12:12
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_TYPE 13:13
|
||||
#define NVC8B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDA (0x00000008)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDB (0x00000009)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMIN (0x0000000B)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMAX (0x0000000C)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDC (0x0000000D)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDD (0x0000000E)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDE (0x0000000F)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE 21:20
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_PROT2PROT (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_DEFAULT (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_SECURE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_NONPROT2NONPROT (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_RESERVED (0x00000003)
|
||||
#define NVC8B5_LAUNCH_DMA_VPRMODE 22:22
|
||||
#define NVC8B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE 23:23
|
||||
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC 26:26
|
||||
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE 27:27
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_ONE_WORD (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_TWO_WORD (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
|
||||
#define NVC8B5_OFFSET_IN_UPPER (0x00000400)
|
||||
#define NVC8B5_OFFSET_IN_UPPER_UPPER 24:0
|
||||
#define NVC8B5_OFFSET_IN_LOWER (0x00000404)
|
||||
#define NVC8B5_OFFSET_IN_LOWER_VALUE 31:0
|
||||
#define NVC8B5_OFFSET_OUT_UPPER (0x00000408)
|
||||
#define NVC8B5_OFFSET_OUT_UPPER_UPPER 24:0
|
||||
#define NVC8B5_OFFSET_OUT_LOWER (0x0000040C)
|
||||
#define NVC8B5_OFFSET_OUT_LOWER_VALUE 31:0
|
||||
#define NVC8B5_PITCH_IN (0x00000410)
|
||||
#define NVC8B5_PITCH_IN_VALUE 31:0
|
||||
#define NVC8B5_PITCH_OUT (0x00000414)
|
||||
#define NVC8B5_PITCH_OUT_VALUE 31:0
|
||||
#define NVC8B5_LINE_LENGTH_IN (0x00000418)
|
||||
#define NVC8B5_LINE_LENGTH_IN_VALUE 31:0
|
||||
#define NVC8B5_LINE_COUNT (0x0000041C)
|
||||
#define NVC8B5_LINE_COUNT_VALUE 31:0
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE (0x00000500)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_MODE 0:0
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_MODE_ENCRYPT (0x00000000)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_MODE_DECRYPT (0x00000001)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET 20:19
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_PEER_ID 23:21
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_FLA 24:24
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET 26:25
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_PEER_ID 29:27
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_FLA 30:30
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_END_OF_COPY 31:31
|
||||
#define NVC8B5_SET_DECRYPT_IV0 (0x00000504)
|
||||
#define NVC8B5_SET_DECRYPT_IV0_VALUE 31:0
|
||||
#define NVC8B5_SET_DECRYPT_IV1 (0x00000508)
|
||||
#define NVC8B5_SET_DECRYPT_IV1_VALUE 31:0
|
||||
#define NVC8B5_SET_DECRYPT_IV2 (0x0000050C)
|
||||
#define NVC8B5_SET_DECRYPT_IV2_VALUE 31:0
|
||||
#define NVC8B5_RESERVED_SET_AESCOUNTER (0x00000510)
|
||||
#define NVC8B5_RESERVED_SET_AESCOUNTER_VALUE 31:0
|
||||
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER (0x00000514)
|
||||
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER (0x00000518)
|
||||
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC8B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER (0x00000530)
|
||||
#define NVC8B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC8B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER (0x00000534)
|
||||
#define NVC8B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC8B5_SET_ENCRYPT_IV_ADDR_UPPER (0x00000538)
|
||||
#define NVC8B5_SET_ENCRYPT_IV_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC8B5_SET_ENCRYPT_IV_ADDR_LOWER (0x0000053C)
|
||||
#define NVC8B5_SET_ENCRYPT_IV_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS (0x000006FC)
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE 0:0
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_CONST_A (0x00000700)
|
||||
#define NVC8B5_SET_REMAP_CONST_A_V 31:0
|
||||
#define NVC8B5_SET_REMAP_CONST_B (0x00000704)
|
||||
#define NVC8B5_SET_REMAP_CONST_B_V 31:0
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS (0x00000708)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X 2:0
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y 6:4
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z 10:8
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W 14:12
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE (0x0000070C)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC8B5_SET_DST_WIDTH (0x00000710)
|
||||
#define NVC8B5_SET_DST_WIDTH_V 31:0
|
||||
#define NVC8B5_SET_DST_HEIGHT (0x00000714)
|
||||
#define NVC8B5_SET_DST_HEIGHT_V 31:0
|
||||
#define NVC8B5_SET_DST_DEPTH (0x00000718)
|
||||
#define NVC8B5_SET_DST_DEPTH_V 31:0
|
||||
#define NVC8B5_SET_DST_LAYER (0x0000071C)
|
||||
#define NVC8B5_SET_DST_LAYER_V 31:0
|
||||
#define NVC8B5_SET_DST_ORIGIN (0x00000720)
|
||||
#define NVC8B5_SET_DST_ORIGIN_X 15:0
|
||||
#define NVC8B5_SET_DST_ORIGIN_Y 31:16
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE (0x00000728)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC8B5_SET_SRC_WIDTH (0x0000072C)
|
||||
#define NVC8B5_SET_SRC_WIDTH_V 31:0
|
||||
#define NVC8B5_SET_SRC_HEIGHT (0x00000730)
|
||||
#define NVC8B5_SET_SRC_HEIGHT_V 31:0
|
||||
#define NVC8B5_SET_SRC_DEPTH (0x00000734)
|
||||
#define NVC8B5_SET_SRC_DEPTH_V 31:0
|
||||
#define NVC8B5_SET_SRC_LAYER (0x00000738)
|
||||
#define NVC8B5_SET_SRC_LAYER_V 31:0
|
||||
#define NVC8B5_SET_SRC_ORIGIN (0x0000073C)
|
||||
#define NVC8B5_SET_SRC_ORIGIN_X 15:0
|
||||
#define NVC8B5_SET_SRC_ORIGIN_Y 31:16
|
||||
#define NVC8B5_SRC_ORIGIN_X (0x00000744)
|
||||
#define NVC8B5_SRC_ORIGIN_X_VALUE 31:0
|
||||
#define NVC8B5_SRC_ORIGIN_Y (0x00000748)
|
||||
#define NVC8B5_SRC_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC8B5_DST_ORIGIN_X (0x0000074C)
|
||||
#define NVC8B5_DST_ORIGIN_X_VALUE 31:0
|
||||
#define NVC8B5_DST_ORIGIN_Y (0x00000750)
|
||||
#define NVC8B5_DST_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC8B5_PM_TRIGGER_END (0x00001114)
|
||||
#define NVC8B5_PM_TRIGGER_END_V 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
#endif // _clc8b5_h
|
||||
|
||||
@@ -32,15 +32,8 @@
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100 (0x00000140)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100 (0x00000160)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100 (0x00000170)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100 (0x00000180)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100 (0x00000190)
|
||||
|
||||
/* valid ARCHITECTURE_GP10x implementation values */
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GP100 (0x00000000)
|
||||
|
||||
508
kernel-open/nvidia-uvm/hwref/hopper/gh100/dev_fault.h
Normal file
508
kernel-open/nvidia-uvm/hwref/hopper/gh100/dev_fault.h
Normal file
@@ -0,0 +1,508 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2003-2016 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#ifndef __gh100_dev_fault_h__
|
||||
#define __gh100_dev_fault_h__
|
||||
/* This file is autogenerated. Do not edit */
|
||||
#define NV_PFAULT /* ----G */
|
||||
#define NV_PFAULT_MMU_ENG_ID_GRAPHICS 384 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_DISPLAY 1 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_GSP 2 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_IFB 55 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_FLA 4 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1 256 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2 320 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_SEC 6 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_FSP 7 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF 10 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF0 10 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF1 11 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF2 12 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF3 13 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF4 14 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF5 15 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF6 16 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF7 17 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF8 18 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC 19 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC0 19 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC1 20 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC2 21 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC3 22 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC4 23 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC5 24 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC6 25 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC7 26 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG0 27 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG1 28 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG2 29 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG3 30 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG4 31 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG5 32 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG6 33 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG7 34 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_GRCOPY 43 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE0 43 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE1 44 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE2 45 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE3 46 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE4 47 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE5 48 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE6 49 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE7 50 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE8 51 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE9 52 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PWR_PMU 5 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PTP 3 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVENC0 35 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVENC1 36 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVENC2 37 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_OFA0 53 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PHYSICAL 56 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST0 64 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST1 65 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST2 66 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST3 67 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST4 68 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST5 69 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST6 70 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST7 71 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST8 72 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST9 73 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST10 74 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST11 75 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST12 76 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST13 77 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST14 78 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST15 79 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST16 80 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST17 81 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST18 82 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST19 83 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST20 84 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST21 85 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST22 86 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST23 87 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST24 88 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST25 89 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST26 90 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST27 91 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST28 92 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST29 93 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST30 94 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST31 95 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST32 96 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST33 97 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST34 98 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST35 99 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST36 100 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST37 101 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST38 102 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST39 103 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST40 104 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST41 105 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST42 106 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST43 107 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST44 108 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN0 256 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN1 257 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN2 258 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN3 259 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN4 260 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN5 261 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN6 262 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN7 263 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN8 264 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN9 265 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN10 266 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN11 267 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN12 268 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN13 269 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN14 270 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN15 271 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN16 272 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN17 273 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN18 274 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN19 275 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN20 276 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN21 277 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN22 278 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN23 279 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN24 280 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN25 281 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN26 282 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN27 283 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN28 284 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN29 285 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN30 286 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN31 287 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN32 288 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN33 289 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN34 290 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN35 291 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN36 292 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN37 293 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN38 294 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN39 295 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN40 296 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN41 297 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN42 298 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN43 299 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN44 300 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN45 301 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN46 302 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN47 303 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN48 304 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN49 305 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN50 306 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN51 307 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN52 308 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN53 309 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN54 310 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN55 311 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN56 312 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN57 313 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN58 314 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN59 315 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN60 316 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN61 317 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN62 318 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN63 319 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN0 320 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN1 321 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN2 322 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN3 323 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN4 324 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN5 325 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN6 326 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN7 327 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN8 328 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN9 329 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN10 330 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN11 331 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN12 332 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN13 333 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN14 334 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN15 335 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN16 336 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN17 337 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN18 338 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN19 339 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN20 340 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN21 341 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN22 342 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN23 343 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN24 344 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN25 345 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN26 346 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN27 347 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN28 348 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN29 349 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN30 350 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN31 351 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN32 352 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN33 353 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN34 354 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN35 355 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN36 356 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN37 357 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN38 358 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN39 359 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN40 360 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN41 361 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN42 362 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN43 363 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN44 364 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN45 365 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN46 366 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN47 367 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN48 368 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN49 369 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN50 370 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN51 371 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN52 372 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN53 373 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN54 374 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN55 375 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN56 376 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN57 377 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN58 378 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN59 379 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN60 380 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN61 381 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN62 382 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN63 383 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE 4:0 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_PDE 0x00000000 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_PDE_SIZE 0x00000001 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_PTE 0x00000002 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION 0x00000003 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK 0x00000004 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION 0x00000005 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_RO_VIOLATION 0x00000006 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_WO_VIOLATION 0x00000007 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION 0x00000008 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_WORK_CREATION 0x00000009 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE 0x0000000a /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_COMPRESSION_FAILURE 0x0000000b /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND 0x0000000c /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_REGION_VIOLATION 0x0000000d /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_POISONED 0x0000000e /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION 0x0000000f /* */
|
||||
#define NV_PFAULT_CLIENT 14:8 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_0 0x00000000 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_1 0x00000001 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_2 0x00000002 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_3 0x00000003 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_4 0x00000004 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_5 0x00000005 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_6 0x00000006 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_7 0x00000007 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_0 0x00000008 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_1 0x00000009 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_2 0x0000000A /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_3 0x0000000B /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_4 0x0000000C /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_5 0x0000000D /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_6 0x0000000E /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_7 0x0000000F /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_RAST 0x00000010 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_GCC 0x00000011 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_GPCCS 0x00000012 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PROP_0 0x00000013 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PROP_1 0x00000014 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PROP_2 0x00000015 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PROP_3 0x00000016 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_8 0x00000021 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_9 0x00000022 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_10 0x00000023 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_11 0x00000024 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_12 0x00000025 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_13 0x00000026 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_14 0x00000027 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_15 0x00000028 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_0 0x00000029 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_1 0x0000002A /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_2 0x0000002B /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_3 0x0000002C /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_4 0x0000002D /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_5 0x0000002E /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_6 0x0000002F /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_7 0x00000030 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_8 0x00000031 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_9 0x00000032 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_8 0x00000033 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_9 0x00000034 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_16 0x00000035 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_17 0x00000036 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_18 0x00000037 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_19 0x00000038 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_10 0x00000039 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_11 0x0000003A /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_10 0x0000003B /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_11 0x0000003C /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_20 0x0000003D /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_21 0x0000003E /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_22 0x0000003F /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_23 0x00000040 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_12 0x00000041 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_13 0x00000042 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_12 0x00000043 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_13 0x00000044 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_24 0x00000045 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_25 0x00000046 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_26 0x00000047 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_27 0x00000048 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_14 0x00000049 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_15 0x0000004A /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_14 0x0000004B /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_15 0x0000004C /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_28 0x0000004D /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_29 0x0000004E /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_30 0x0000004F /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_31 0x00000050 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_16 0x00000051 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_17 0x00000052 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_16 0x00000053 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_17 0x00000054 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_32 0x00000055 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_33 0x00000056 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_34 0x00000057 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_35 0x00000058 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_18 0x00000059 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_19 0x0000005A /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_18 0x0000005B /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_19 0x0000005C /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_36 0x0000005D /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_37 0x0000005E /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_38 0x0000005F /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_39 0x00000060 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_ROP_0 0x00000070 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_ROP_1 0x00000071 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_ROP_2 0x00000072 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_ROP_3 0x00000073 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_GPM 0x00000017 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_VIP 0x00000000 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE0 0x00000001 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE1 0x00000002 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_DNISO 0x00000003 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_DISPNISO 0x00000003 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE0 0x00000004 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE 0x00000004 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS0 0x00000005 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS 0x00000005 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HOST 0x00000006 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HOST_CPU 0x00000007 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HOST_CPU_NB 0x00000008 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ISO 0x00000009 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_MMU 0x0000000A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC0 0x0000000B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC 0x0000000B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE3 0x0000000C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVENC1 0x0000000D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NISO 0x0000000E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ACTRS 0x0000000E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_P2P 0x0000000F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PD 0x00000010 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PERF0 0x00000011 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PERF 0x00000011 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PMU 0x00000012 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD 0x00000013 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC 0x00000014 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC_NB 0x00000015 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SEC 0x00000016 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SSYNC 0x00000017 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_GRCOPY 0x00000018 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE2 0x00000018 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_XV 0x00000019 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_MMU_NB 0x0000001A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVENC0 0x0000001B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVENC 0x0000001B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_DFALCON 0x0000001C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED0 0x0000001D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED 0x0000001D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_AFALCON 0x0000001E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_DONT_CARE 0x0000001F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE0 0x00000020 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE1 0x00000021 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE2 0x00000022 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE3 0x00000023 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE4 0x00000024 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE5 0x00000025 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE6 0x00000026 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE7 0x00000027 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE8 0x00000028 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE9 0x00000029 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSHUB 0x0000002A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X0 0x0000002B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X1 0x0000002C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X2 0x0000002D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X3 0x0000002E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X4 0x0000002F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X5 0x00000030 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X6 0x00000031 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X7 0x00000032 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVENC2 0x00000033 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER0 0x00000034 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER1 0x00000035 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_DWBIF 0x00000036 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FBFALCON 0x00000037 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE_SHIM 0x00000038 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_GSP 0x00000039 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC1 0x0000003A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC2 0x0000003B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG0 0x0000003C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC3 0x0000003D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC4 0x0000003E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_OFA0 0x0000003F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE10 0x00000040 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE11 0x00000041 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE12 0x00000042 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE13 0x00000043 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE14 0x00000044 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE15 0x00000045 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X8 0x00000046 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X9 0x00000047 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X10 0x00000048 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X11 0x00000049 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X12 0x0000004A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X13 0x0000004B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X14 0x0000004C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X15 0x0000004D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE1 0x0000004E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE2 0x0000004F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE3 0x00000050 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE4 0x00000051 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE5 0x00000052 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE6 0x00000053 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE7 0x00000054 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS1 0x00000055 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS2 0x00000056 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS3 0x00000057 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS4 0x00000058 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS5 0x00000059 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS6 0x0000005A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS7 0x0000005B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED1 0x0000005C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED2 0x0000005D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED3 0x0000005E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED4 0x0000005F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED5 0x00000060 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED6 0x00000061 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED7 0x00000062 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ESC 0x00000063 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC5 0x0000006F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC6 0x00000070 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC7 0x00000071 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG1 0x00000072 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG2 0x00000073 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG3 0x00000074 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG4 0x00000075 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG5 0x00000076 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG6 0x00000077 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG7 0x00000078 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FSP 0x00000079 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE 19:16 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_READ 0x00000000 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_WRITE 0x00000001 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_ATOMIC 0x00000002 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_PREFETCH 0x00000003 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_READ 0x00000000 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_WRITE 0x00000001 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC 0x00000002 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_STRONG 0x00000002 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_PREFETCH 0x00000003 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_WEAK 0x00000004 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_PHYS_READ 0x00000008 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_PHYS_WRITE 0x00000009 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_PHYS_ATOMIC 0x0000000a /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_PHYS_PREFETCH 0x0000000b /* */
|
||||
#define NV_PFAULT_MMU_CLIENT_TYPE 20:20 /* */
|
||||
#define NV_PFAULT_MMU_CLIENT_TYPE_GPC 0x00000000 /* */
|
||||
#define NV_PFAULT_MMU_CLIENT_TYPE_HUB 0x00000001 /* */
|
||||
#define NV_PFAULT_GPC_ID 28:24 /* */
|
||||
#define NV_PFAULT_PROTECTED_MODE 29:29 /* */
|
||||
#define NV_PFAULT_REPLAYABLE_FAULT_EN 30:30 /* */
|
||||
#define NV_PFAULT_VALID 31:31 /* */
|
||||
#endif // __gh100_dev_fault_h__
|
||||
560
kernel-open/nvidia-uvm/hwref/hopper/gh100/dev_mmu.h
Normal file
560
kernel-open/nvidia-uvm/hwref/hopper/gh100/dev_mmu.h
Normal file
@@ -0,0 +1,560 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2003-2016 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#ifndef __gh100_dev_mmu_h__
|
||||
#define __gh100_dev_mmu_h__
|
||||
/* This file is autogenerated. Do not edit */
|
||||
#define NV_MMU_PDE /* ----G */
|
||||
#define NV_MMU_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_PDE_SIZE_FULL 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE_HALF 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
|
||||
#define NV_MMU_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_PDE__SIZE 8
|
||||
#define NV_MMU_PTE /* ----G */
|
||||
#define NV_MMU_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
|
||||
#define NV_MMU_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
|
||||
#define NV_MMU_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
|
||||
#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PTE_LOCK (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_PTE_LOCK_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_LOCK_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
|
||||
#define NV_MMU_PTE_READ_DISABLE (1*32+30):(1*32+30) /* RWXVF */
|
||||
#define NV_MMU_PTE_READ_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_READ_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_WRITE_DISABLE (1*32+31):(1*32+31) /* RWXVF */
|
||||
#define NV_MMU_PTE_WRITE_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_WRITE_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_PTE__SIZE 8
|
||||
#define NV_MMU_PTE_COMPTAGS_NONE 0x0 /* */
|
||||
#define NV_MMU_PTE_COMPTAGS_1 0x1 /* */
|
||||
#define NV_MMU_PTE_COMPTAGS_2 0x2 /* */
|
||||
#define NV_MMU_PTE_KIND (1*32+7):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_PTE_KIND_INVALID 0x07 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_PITCH 0x00 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_GENERIC_MEMORY 0x6 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z16 0x1 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8 0x2 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8Z24 0x3 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_ZF32_X24S8 0x4 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z24S8 0x5 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE 0x8 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE_DISABLE_PLC 0x9 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8_COMPRESSIBLE_DISABLE_PLC 0xA /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC 0xB /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC 0xC /* R---V */
|
||||
#define NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC 0xD /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC 0xE /* R---V */
|
||||
#define NV_MMU_PTE_KIND_SMSKED_MESSAGE 0xF /* R---V */
|
||||
#define NV_MMU_VER1_PDE /* ----G */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_SIZE_FULL 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE_HALF 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER1_PDE__SIZE 8
|
||||
#define NV_MMU_VER1_PTE /* ----G */
|
||||
#define NV_MMU_VER1_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_VER1_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_KIND (1*32+11):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER1_PTE__SIZE 8
|
||||
#define NV_MMU_VER1_PTE_COMPTAGS_NONE 0x0 /* */
|
||||
#define NV_MMU_VER1_PTE_COMPTAGS_1 0x1 /* */
|
||||
#define NV_MMU_VER1_PTE_COMPTAGS_2 0x2 /* */
|
||||
#define NV_MMU_NEW_PDE /* ----G */
|
||||
#define NV_MMU_NEW_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_NEW_PDE__SIZE 8
|
||||
#define NV_MMU_NEW_DUAL_PDE /* ----G */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
|
||||
#define NV_MMU_NEW_DUAL_PDE__SIZE 16
|
||||
#define NV_MMU_NEW_PTE /* ----G */
|
||||
#define NV_MMU_NEW_PTE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ENCRYPTED 4:4 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_NEW_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_NEW_PTE_PRIVILEGE 5:5 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_READ_ONLY 6:6 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_KIND 63:56 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_NEW_PTE__SIZE 8
|
||||
#define NV_MMU_VER2_PDE /* ----G */
|
||||
#define NV_MMU_VER2_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER2_PDE__SIZE 8
|
||||
#define NV_MMU_VER2_DUAL_PDE /* ----G */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
|
||||
#define NV_MMU_VER2_DUAL_PDE__SIZE 16
|
||||
#define NV_MMU_VER2_PTE /* ----G */
|
||||
#define NV_MMU_VER2_PTE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ENCRYPTED 4:4 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_VER2_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_VER2_PTE_PRIVILEGE 5:5 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_READ_ONLY 6:6 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_KIND 63:56 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER2_PTE__SIZE 8
|
||||
#define NV_MMU_VER3_PDE /* ----G */
|
||||
#define NV_MMU_VER3_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF 5:3 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_ADDRESS 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER3_PDE__SIZE 8
|
||||
#define NV_MMU_VER3_DUAL_PDE /* ----G */
|
||||
#define NV_MMU_VER3_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG 5:3 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG 51:8 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL 69:67 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SMALL 115:76 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
|
||||
#define NV_MMU_VER3_DUAL_PDE__SIZE 16
|
||||
#define NV_MMU_VER3_PTE /* ----G */
|
||||
#define NV_MMU_VER3_PTE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF 7:3 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_PCF_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_SPARSE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_MAPPING_NOWHERE 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_NO_VALID_4KB_PAGE 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACE 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACE 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACE 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACE 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACE 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACE 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACE 0x00000008 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACE 0x00000009 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACE 0x0000000A /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACE 0x0000000B /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACE 0x0000000C /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACE 0x0000000D /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACE 0x0000000E /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACE 0x0000000F /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACD 0x00000010 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACD 0x00000011 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACD 0x00000012 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACD 0x00000013 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACD 0x00000014 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACD 0x00000015 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACD 0x00000016 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACD 0x00000017 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACD 0x00000018 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACD 0x00000019 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACD 0x0000001A /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACD 0x0000001B /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACD 0x0000001C /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACD 0x0000001D /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACD 0x0000001E /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD 0x0000001F /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_KIND 11:8 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_SYS 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_PEER 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_VID 39:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID 63:(64-3) /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER3_PTE__SIZE 8
|
||||
#define NV_MMU_CLIENT /* ----G */
|
||||
#define NV_MMU_CLIENT_KIND 2:0 /* RWXVF */
|
||||
#define NV_MMU_CLIENT_KIND_Z16 0x1 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_S8 0x2 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_S8Z24 0x3 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_ZF32_X24S8 0x4 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_Z24S8 0x5 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_GENERIC_MEMORY 0x6 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_INVALID 0x7 /* R---V */
|
||||
#endif // __gh100_dev_mmu_h__
|
||||
@@ -481,16 +481,6 @@ static int _check_cpu_affinity_test(void)
|
||||
int result, node;
|
||||
nv_kthread_q_t local_q;
|
||||
|
||||
// If the API does not support CPU affinity, check whether the correct
|
||||
// error code is returned.
|
||||
// Non-affinitized queue allocation has been verified by previous test
|
||||
// so just ensure that the affinitized version also works.
|
||||
if (!NV_KTHREAD_Q_SUPPORTS_AFFINITY()) {
|
||||
result = nv_kthread_q_init_on_node(&local_q, "should_fail", 0);
|
||||
TEST_CHECK_RET(result == -ENOTSUPP);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for_each_online_node(node) {
|
||||
unsigned i;
|
||||
const unsigned max_i = 100;
|
||||
|
||||
@@ -169,7 +169,6 @@ void nv_kthread_q_stop(nv_kthread_q_t *q)
|
||||
//
|
||||
// This function is never invoked when there is no NUMA preference (preferred
|
||||
// node is NUMA_NO_NODE).
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
nv_kthread_q_t *q,
|
||||
int preferred_node,
|
||||
@@ -217,7 +216,6 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
return thread[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
|
||||
{
|
||||
@@ -231,11 +229,7 @@ int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferr
|
||||
q->q_kthread = kthread_create(_main_loop, q, q_name);
|
||||
}
|
||||
else {
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
|
||||
#else
|
||||
return -ENOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (IS_ERR(q->q_kthread)) {
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
NVIDIA_UVM_SOURCES ?=
|
||||
NVIDIA_UVM_SOURCES_CXX ?=
|
||||
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
|
||||
|
||||
@@ -67,17 +67,11 @@ endif
|
||||
|
||||
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += address_space_init_once
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vzalloc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += wait_on_bit_lock_argument_count
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += bitmap_clear
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += usleep_range
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
@@ -88,17 +82,16 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += kuid_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += address_space
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages_remote
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages_remote
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
@@ -108,5 +101,6 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += make_device_exclusive_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -41,73 +41,6 @@
|
||||
static dev_t g_uvm_base_dev;
|
||||
static struct cdev g_uvm_cdev;
|
||||
|
||||
// List of fault service contexts for CPU faults
|
||||
static LIST_HEAD(g_cpu_service_block_context_list);
|
||||
|
||||
static uvm_spinlock_t g_cpu_service_block_context_list_lock;
|
||||
|
||||
NV_STATUS uvm_service_block_context_init(void)
|
||||
{
|
||||
unsigned num_preallocated_contexts = 4;
|
||||
|
||||
uvm_spin_lock_init(&g_cpu_service_block_context_list_lock, UVM_LOCK_ORDER_LEAF);
|
||||
|
||||
// Pre-allocate some fault service contexts for the CPU and add them to the global list
|
||||
while (num_preallocated_contexts-- > 0) {
|
||||
uvm_service_block_context_t *service_context = uvm_kvmalloc(sizeof(*service_context));
|
||||
if (!service_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_service_block_context_exit(void)
|
||||
{
|
||||
uvm_service_block_context_t *service_context, *service_context_tmp;
|
||||
|
||||
// Free fault service contexts for the CPU and add clear the global list
|
||||
list_for_each_entry_safe(service_context, service_context_tmp, &g_cpu_service_block_context_list,
|
||||
cpu_fault.service_context_list) {
|
||||
uvm_kvfree(service_context);
|
||||
}
|
||||
INIT_LIST_HEAD(&g_cpu_service_block_context_list);
|
||||
}
|
||||
|
||||
// Get a fault service context from the global list or allocate a new one if there are no
|
||||
// available entries
|
||||
static uvm_service_block_context_t *uvm_service_block_context_cpu_alloc(void)
|
||||
{
|
||||
uvm_service_block_context_t *service_context;
|
||||
|
||||
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
service_context = list_first_entry_or_null(&g_cpu_service_block_context_list, uvm_service_block_context_t,
|
||||
cpu_fault.service_context_list);
|
||||
|
||||
if (service_context)
|
||||
list_del(&service_context->cpu_fault.service_context_list);
|
||||
|
||||
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
if (!service_context)
|
||||
service_context = uvm_kvmalloc(sizeof(*service_context));
|
||||
|
||||
return service_context;
|
||||
}
|
||||
|
||||
// Put a fault service context in the global list
|
||||
static void uvm_service_block_context_cpu_free(uvm_service_block_context_t *service_context)
|
||||
{
|
||||
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
|
||||
|
||||
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
|
||||
}
|
||||
|
||||
static int uvm_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
@@ -489,139 +422,10 @@ static void uvm_vm_close_managed_entry(struct vm_area_struct *vma)
|
||||
static vm_fault_t uvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
|
||||
uvm_va_block_t *va_block;
|
||||
NvU64 fault_addr = nv_page_fault_va(vmf);
|
||||
bool is_write = vmf->flags & FAULT_FLAG_WRITE;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
bool tools_enabled;
|
||||
bool major_fault = false;
|
||||
uvm_service_block_context_t *service_context;
|
||||
uvm_global_processor_mask_t gpus_to_check_for_ecc;
|
||||
|
||||
if (status != NV_OK)
|
||||
goto convert_error;
|
||||
|
||||
// TODO: Bug 2583279: Lock tracking is disabled for the power management
|
||||
// lock in order to suppress reporting of a lock policy violation.
|
||||
// The violation consists in acquiring the power management lock multiple
|
||||
// times, and it is manifested as an error during release. The
|
||||
// re-acquisition of the power management locks happens upon re-entry in the
|
||||
// UVM module, and it is benign on itself, but when combined with certain
|
||||
// power management scenarios, it is indicative of a potential deadlock.
|
||||
// Tracking will be re-enabled once the power management locking strategy is
|
||||
// modified to avoid deadlocks.
|
||||
if (!uvm_down_read_trylock_no_tracking(&g_uvm_global.pm.lock)) {
|
||||
status = NV_ERR_BUSY_RETRY;
|
||||
goto convert_error;
|
||||
}
|
||||
|
||||
service_context = uvm_service_block_context_cpu_alloc();
|
||||
if (!service_context) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
service_context->cpu_fault.wakeup_time_stamp = 0;
|
||||
|
||||
// The mmap_lock might be held in write mode, but the mode doesn't matter
|
||||
// for the purpose of lock ordering and we don't rely on it being in write
|
||||
// anywhere so just record it as read mode in all cases.
|
||||
uvm_record_lock_mmap_lock_read(vma->vm_mm);
|
||||
|
||||
do {
|
||||
bool do_sleep = false;
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
|
||||
NvU64 now = NV_GETTIME();
|
||||
if (now < service_context->cpu_fault.wakeup_time_stamp)
|
||||
do_sleep = true;
|
||||
|
||||
if (do_sleep)
|
||||
uvm_tools_record_throttling_start(va_space, fault_addr, UVM_ID_CPU);
|
||||
|
||||
// Drop the VA space lock while we sleep
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
// usleep_range is preferred because msleep has a 20ms granularity
|
||||
// and udelay uses a busy-wait loop. usleep_range uses high-resolution
|
||||
// timers and, by adding a range, the Linux scheduler may coalesce
|
||||
// our wakeup with others, thus saving some interrupts.
|
||||
if (do_sleep) {
|
||||
unsigned long nap_us = (service_context->cpu_fault.wakeup_time_stamp - now) / 1000;
|
||||
|
||||
usleep_range(nap_us, nap_us + nap_us / 2);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
if (do_sleep)
|
||||
uvm_tools_record_throttling_end(va_space, fault_addr, UVM_ID_CPU);
|
||||
|
||||
status = uvm_va_block_find_create_managed(va_space, fault_addr, &va_block);
|
||||
if (status != NV_OK) {
|
||||
UVM_ASSERT_MSG(status == NV_ERR_NO_MEMORY, "status: %s\n", nvstatusToString(status));
|
||||
break;
|
||||
}
|
||||
|
||||
// Watch out, current->mm might not be vma->vm_mm
|
||||
UVM_ASSERT(vma == uvm_va_range_vma(va_block->va_range));
|
||||
|
||||
// Loop until thrashing goes away.
|
||||
status = uvm_va_block_cpu_fault(va_block, fault_addr, is_write, service_context);
|
||||
} while (status == NV_WARN_MORE_PROCESSING_REQUIRED);
|
||||
|
||||
if (status != NV_OK) {
|
||||
UvmEventFatalReason reason;
|
||||
|
||||
reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||||
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
|
||||
|
||||
uvm_tools_record_cpu_fatal_fault(va_space, fault_addr, is_write, reason);
|
||||
}
|
||||
|
||||
tools_enabled = va_space->tools.enabled;
|
||||
|
||||
if (status == NV_OK) {
|
||||
uvm_va_space_global_gpus_in_mask(va_space,
|
||||
&gpus_to_check_for_ecc,
|
||||
&service_context->cpu_fault.gpus_to_check_for_ecc);
|
||||
uvm_global_mask_retain(&gpus_to_check_for_ecc);
|
||||
}
|
||||
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_record_unlock_mmap_lock_read(vma->vm_mm);
|
||||
|
||||
if (status == NV_OK) {
|
||||
status = uvm_global_mask_check_ecc_error(&gpus_to_check_for_ecc);
|
||||
uvm_global_mask_release(&gpus_to_check_for_ecc);
|
||||
}
|
||||
|
||||
if (tools_enabled)
|
||||
uvm_tools_flush_events();
|
||||
|
||||
// Major faults involve I/O in order to resolve the fault.
|
||||
// If any pages were DMA'ed between the GPU and host memory, that makes it a major fault.
|
||||
// A process can also get statistics for major and minor faults by calling readproc().
|
||||
major_fault = service_context->cpu_fault.did_migrate;
|
||||
uvm_service_block_context_cpu_free(service_context);
|
||||
|
||||
unlock:
|
||||
// TODO: Bug 2583279: See the comment above the matching lock acquisition
|
||||
uvm_up_read_no_tracking(&g_uvm_global.pm.lock);
|
||||
|
||||
convert_error:
|
||||
switch (status) {
|
||||
case NV_OK:
|
||||
case NV_ERR_BUSY_RETRY:
|
||||
return VM_FAULT_NOPAGE | (major_fault ? VM_FAULT_MAJOR : 0);
|
||||
case NV_ERR_NO_MEMORY:
|
||||
return VM_FAULT_OOM;
|
||||
default:
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
return uvm_va_space_cpu_fault_managed(va_space, vma, vmf);
|
||||
}
|
||||
|
||||
|
||||
static vm_fault_t uvm_vm_fault_entry(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
UVM_ENTRY_RET(uvm_vm_fault(vma, vmf));
|
||||
@@ -814,7 +618,7 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
// Using VM_DONTCOPY would be nice, but madvise(MADV_DOFORK) can reset that
|
||||
// so we have to handle vm_open on fork anyway. We could disable MADV_DOFORK
|
||||
// with VM_IO, but that causes other mapping issues.
|
||||
vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
|
||||
nv_vm_flags_set(vma, VM_MIXEDMAP | VM_DONTEXPAND);
|
||||
|
||||
vma->vm_ops = &uvm_vm_ops_managed;
|
||||
|
||||
@@ -986,8 +790,6 @@ bool uvm_file_is_nvidia_uvm(struct file *filp)
|
||||
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params, struct file *filp)
|
||||
{
|
||||
long ret;
|
||||
int write = 1;
|
||||
int force = 0;
|
||||
struct page *page;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
@@ -998,7 +800,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
|
||||
// are not used because unload_state_buf may be a managed memory pointer and
|
||||
// therefore a locking assertion from the CPU fault handler could be fired.
|
||||
nv_mmap_read_lock(current->mm);
|
||||
ret = NV_GET_USER_PAGES(params->unload_state_buf, 1, write, force, &page, NULL);
|
||||
ret = NV_PIN_USER_PAGES(params->unload_state_buf, 1, FOLL_WRITE, &page, NULL);
|
||||
nv_mmap_read_unlock(current->mm);
|
||||
|
||||
if (ret < 0)
|
||||
@@ -1008,7 +810,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
if (g_uvm_global.unload_state.ptr) {
|
||||
put_page(page);
|
||||
NV_UNPIN_USER_PAGE(page);
|
||||
status = NV_ERR_IN_USE;
|
||||
goto error;
|
||||
}
|
||||
@@ -1027,7 +829,7 @@ static void uvm_test_unload_state_exit(void)
|
||||
{
|
||||
if (g_uvm_global.unload_state.ptr) {
|
||||
kunmap(g_uvm_global.unload_state.page);
|
||||
put_page(g_uvm_global.unload_state.page);
|
||||
NV_UNPIN_USER_PAGE(g_uvm_global.unload_state.page);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1094,7 +896,6 @@ static int uvm_init(void)
|
||||
if (uvm_enable_builtin_tests)
|
||||
pr_info("Built-in UVM tests are enabled. This is a security risk.\n");
|
||||
|
||||
|
||||
// After Open RM is released, both the enclosing "#if" and this comment
|
||||
// block should be removed, because the uvm_hmm_is_enabled_system_wide()
|
||||
// check is both necessary and sufficient for reporting functionality.
|
||||
@@ -1104,7 +905,6 @@ static int uvm_init(void)
|
||||
if (uvm_hmm_is_enabled_system_wide())
|
||||
UVM_INFO_PRINT("HMM (Heterogeneous Memory Management) is enabled in the UVM driver.\n");
|
||||
|
||||
|
||||
return 0;
|
||||
|
||||
error:
|
||||
@@ -1146,4 +946,4 @@ module_exit(uvm_exit_entry);
|
||||
|
||||
MODULE_LICENSE("Dual MIT/GPL");
|
||||
MODULE_INFO(supported, "external");
|
||||
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
|
||||
@@ -216,12 +216,6 @@ NV_STATUS UvmDeinitialize(void);
|
||||
// Note that it is not required to release VA ranges that were reserved with
|
||||
// UvmReserveVa().
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// UvmReopen() closes the open file returned by UvmGetFileDescriptor() and
|
||||
// replaces it with a new open file with the same name.
|
||||
//
|
||||
@@ -416,14 +410,6 @@ NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
|
||||
// location will have their range group association changed to
|
||||
// UVM_RANGE_GROUP_ID_NONE.
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to unregister.
|
||||
@@ -1276,14 +1262,6 @@ NV_STATUS UvmCleanUpZombieResources(void);
|
||||
//
|
||||
// The VA range can be unmapped and freed via a call to UvmFree.
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Arguments:
|
||||
// base: (INPUT)
|
||||
// Base address of the virtual address range.
|
||||
@@ -1320,10 +1298,6 @@ NV_STATUS UvmCleanUpZombieResources(void);
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// perGpuAttribs is NULL but gpuAttribsCount is non-zero or vice-versa,
|
||||
// or caching is requested on more than one GPU.
|
||||
|
||||
|
||||
|
||||
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The current process is not the one which called UvmInitialize, and
|
||||
|
||||
95
kernel-open/nvidia-uvm/uvm_ada.c
Normal file
95
kernel-open/nvidia-uvm/uvm_ada.c
Normal file
@@ -0,0 +1,95 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_ada_fault_buffer.h"
|
||||
|
||||
void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
parent_gpu->tlb_batch.va_invalidate_supported = true;
|
||||
|
||||
parent_gpu->tlb_batch.va_range_invalidate_supported = true;
|
||||
|
||||
// TODO: Bug 1767241: Run benchmarks to figure out a good number
|
||||
parent_gpu->tlb_batch.max_ranges = 8;
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_ada_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) *
|
||||
8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Ada covers 128 TB and that's the minimum size
|
||||
// that can be used.
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024 * 1024;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = 384ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
|
||||
// Not all units on Ada support 49-bit addressing, including those which
|
||||
// access channel buffers.
|
||||
parent_gpu->max_channel_va = 1ULL << 40;
|
||||
|
||||
parent_gpu->max_host_va = 1ULL << 40;
|
||||
|
||||
// Ada can map sysmem with any page size
|
||||
parent_gpu->can_map_sysmem_with_large_pages = true;
|
||||
|
||||
// Prefetch instructions will generate faults
|
||||
parent_gpu->prefetch_fault_supported = true;
|
||||
|
||||
// Ada can place GPFIFO in vidmem
|
||||
parent_gpu->gpfifo_in_vidmem_supported = true;
|
||||
|
||||
parent_gpu->replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
|
||||
parent_gpu->has_clear_faulted_channel_sw_method = true;
|
||||
|
||||
parent_gpu->has_clear_faulted_channel_method = false;
|
||||
|
||||
parent_gpu->smc.supported = true;
|
||||
|
||||
parent_gpu->sparse_mappings_supported = true;
|
||||
|
||||
parent_gpu->map_remap_larger_page_promotion = false;
|
||||
|
||||
parent_gpu->plc_supported = true;
|
||||
}
|
||||
84
kernel-open/nvidia-uvm/uvm_ada_fault_buffer.h
Normal file
84
kernel-open/nvidia-uvm/uvm_ada_fault_buffer.h
Normal file
@@ -0,0 +1,84 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef __UVM_HAL_ADA_FAULT_BUFFER_H__
|
||||
#define __UVM_HAL_ADA_FAULT_BUFFER_H__
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_gpu.h"
|
||||
|
||||
// There are up to 6 TPCs per GPC in Ada, and there are 2 LTP uTLB per TPC.
|
||||
// Besides, there is one RGG uTLB per GPC. Each TPC has a number of clients
|
||||
// that can make requests to its uTLBs: 1xTPCCS, 1xPE, 2xT1. Requests from
|
||||
// these units are routed as follows to the 2 LTP uTLBs:
|
||||
//
|
||||
// -------- ---------
|
||||
// | T1_0 | -----------------> | uTLB0 |
|
||||
// -------- ---------
|
||||
//
|
||||
// -------- ---------
|
||||
// | T1_1 | -----------------> | uTLB1 |
|
||||
// -------- --------> ---------
|
||||
// | ^
|
||||
// ------- | |
|
||||
// | PE | ----------- |
|
||||
// ------- |
|
||||
// |
|
||||
// --------- |
|
||||
// | TPCCS | -----------------------
|
||||
// ---------
|
||||
//
|
||||
//
|
||||
// The client ids are local to their GPC and the id mapping is linear across
|
||||
// TPCs: TPC_n has TPCCS_n, PE_n, T1_p, and T1_q, where p=2*n and q=p+1.
|
||||
//
|
||||
// NV_PFAULT_CLIENT_GPC_LTP_UTLB_n and NV_PFAULT_CLIENT_GPC_RGG_UTLB enums can
|
||||
// be ignored. These will never be reported in a fault message, and should
|
||||
// never be used in an invalidate. Therefore, we define our own values.
|
||||
typedef enum {
|
||||
UVM_ADA_GPC_UTLB_ID_RGG = 0,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP0 = 1,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP1 = 2,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP2 = 3,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP3 = 4,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP4 = 5,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP5 = 6,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP6 = 7,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP7 = 8,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP8 = 9,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP9 = 10,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP10 = 11,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP11 = 12,
|
||||
|
||||
UVM_ADA_GPC_UTLB_COUNT,
|
||||
} uvm_ada_gpc_utlb_id_t;
|
||||
|
||||
static NvU32 uvm_ada_get_utlbs_per_gpc(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NvU32 utlbs = parent_gpu->rm_info.maxTpcPerGpcCount * 2 + 1;
|
||||
UVM_ASSERT(utlbs <= UVM_ADA_GPC_UTLB_COUNT);
|
||||
return utlbs;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2021 NVIDIA Corporation
|
||||
Copyright (c) 2018-20221 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -53,7 +53,7 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
// See uvm_mmu.h for mapping placement
|
||||
parent_gpu->flat_vidmem_va_base = 132ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->flat_vidmem_va_base = 136ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->flat_sysmem_va_base = 256ull * 1024 * 1024 * 1024 * 1024;
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2021 NVIDIA Corporation
|
||||
Copyright (c) 2018-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -107,6 +107,8 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
|
||||
uvm_spin_loop_t spin;
|
||||
NvU32 channel_faulted_mask = 0;
|
||||
NvU32 clear_type_value = 0;
|
||||
NvU32 doorbell_value = 0;
|
||||
volatile NvU32 *doorbell_ptr;
|
||||
|
||||
UVM_ASSERT(!user_channel->gpu->parent->has_clear_faulted_channel_method);
|
||||
|
||||
@@ -123,6 +125,12 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
|
||||
uvm_mmu_engine_type_string(fault->fault_source.mmu_engine_type));
|
||||
}
|
||||
|
||||
doorbell_ptr = (NvU32 *)((NvU8 *)user_channel->runlist_pri_base_register + NV_RUNLIST_INTERNAL_DOORBELL);
|
||||
|
||||
// GFID is not required since we clear faulted channel with a SW method on
|
||||
// SRIOV. On baremetal, GFID is always zero.
|
||||
doorbell_value = HWVALUE(_RUNLIST, INTERNAL_DOORBELL, CHID, user_channel->hw_channel_id);
|
||||
|
||||
// Wait for the channel to have the FAULTED bit set as this can race with
|
||||
// interrupt notification
|
||||
UVM_SPIN_WHILE(!(UVM_GPU_READ_ONCE(*user_channel->chram_channel_register) & channel_faulted_mask), &spin);
|
||||
@@ -131,7 +139,7 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
|
||||
|
||||
wmb();
|
||||
|
||||
UVM_GPU_WRITE_ONCE(*user_channel->work_submission_offset, user_channel->work_submission_token);
|
||||
UVM_GPU_WRITE_ONCE(*doorbell_ptr, doorbell_value);
|
||||
}
|
||||
|
||||
static NvU32 instance_ptr_aperture_type_to_hw_value(uvm_aperture_t aperture)
|
||||
|
||||
@@ -58,12 +58,6 @@ NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
return uvm_ats_ibm_add_gpu(parent_gpu);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -77,12 +71,6 @@ void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
uvm_ats_ibm_remove_gpu(parent_gpu);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
@@ -100,10 +88,6 @@ NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
status = uvm_ats_ibm_bind_gpu(gpu_va_space);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -116,10 +100,6 @@ void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_unbind_gpu(gpu_va_space);
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
@@ -147,10 +127,6 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
status = uvm_ats_ibm_register_gpu_va_space(gpu_va_space);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if (status == NV_OK)
|
||||
uvm_processor_mask_set(&va_space->ats.registered_gpu_va_spaces, gpu_id);
|
||||
|
||||
@@ -173,10 +149,6 @@ void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_unregister_gpu_va_space(gpu_va_space);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
uvm_va_space_down_write(va_space);
|
||||
uvm_processor_mask_clear(&va_space->ats.registered_gpu_va_spaces, gpu_id);
|
||||
uvm_va_space_up_write(va_space);
|
||||
|
||||
@@ -29,14 +29,8 @@
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "nv_uvm_types.h"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED())
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Mask of gpu_va_spaces which are registered for ATS access. The mask is
|
||||
@@ -47,11 +41,6 @@ typedef struct
|
||||
{
|
||||
uvm_ibm_va_space_t ibm;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
};
|
||||
} uvm_ats_va_space_t;
|
||||
|
||||
@@ -69,11 +58,6 @@ typedef struct
|
||||
{
|
||||
uvm_ibm_gpu_va_space_t ibm;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
};
|
||||
} uvm_ats_gpu_va_space_t;
|
||||
|
||||
@@ -106,10 +90,6 @@ void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
// LOCKING: mmap_lock must be lockable.
|
||||
// VA space lock must be lockable.
|
||||
// gpu_va_space->gpu must be retained.
|
||||
|
||||
|
||||
|
||||
|
||||
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// Decrements the refcount on the {gpu, mm} pair. Removes the binding from the
|
||||
|
||||
@@ -25,9 +25,62 @@
|
||||
#include "uvm_ats_faults.h"
|
||||
#include "uvm_migrate_pageable.h"
|
||||
|
||||
// TODO: Bug 2103669: Implement a real prefetching policy and remove or adapt
|
||||
// these experimental parameters. These are intended to help guide that policy.
|
||||
static unsigned int uvm_exp_perf_prefetch_ats_order_replayable = 0;
|
||||
module_param(uvm_exp_perf_prefetch_ats_order_replayable, uint, 0644);
|
||||
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_replayable,
|
||||
"Max order of pages (2^N) to prefetch on replayable ATS faults");
|
||||
|
||||
static unsigned int uvm_exp_perf_prefetch_ats_order_non_replayable = 0;
|
||||
module_param(uvm_exp_perf_prefetch_ats_order_non_replayable, uint, 0644);
|
||||
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_non_replayable,
|
||||
"Max order of pages (2^N) to prefetch on non-replayable ATS faults");
|
||||
|
||||
// Expand the fault region to the naturally-aligned region with order given by
|
||||
// the module parameters, clamped to the vma containing fault_addr (if any).
|
||||
// Note that this means the region contains fault_addr but may not begin at
|
||||
// fault_addr.
|
||||
static void expand_fault_region(struct mm_struct *mm,
|
||||
NvU64 fault_addr,
|
||||
uvm_fault_client_type_t client_type,
|
||||
unsigned long *start,
|
||||
unsigned long *size)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned int order;
|
||||
unsigned long outer, aligned_start, aligned_size;
|
||||
|
||||
*start = fault_addr;
|
||||
*size = PAGE_SIZE;
|
||||
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_HUB)
|
||||
order = uvm_exp_perf_prefetch_ats_order_non_replayable;
|
||||
else
|
||||
order = uvm_exp_perf_prefetch_ats_order_replayable;
|
||||
|
||||
if (order == 0)
|
||||
return;
|
||||
|
||||
vma = find_vma_intersection(mm, fault_addr, fault_addr + 1);
|
||||
if (!vma)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(order < BITS_PER_LONG - PAGE_SHIFT);
|
||||
|
||||
aligned_size = (1UL << order) * PAGE_SIZE;
|
||||
|
||||
aligned_start = fault_addr & ~(aligned_size - 1);
|
||||
|
||||
*start = max(vma->vm_start, aligned_start);
|
||||
outer = min(vma->vm_end, aligned_start + aligned_size);
|
||||
*size = outer - *start;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU64 fault_addr,
|
||||
uvm_fault_access_type_t access_type)
|
||||
uvm_fault_access_type_t access_type,
|
||||
uvm_fault_client_type_t client_type)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
@@ -66,8 +119,6 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
{
|
||||
.va_space = va_space,
|
||||
.mm = mm,
|
||||
.start = fault_addr,
|
||||
.length = PAGE_SIZE,
|
||||
.dst_id = gpu_va_space->gpu->parent->id,
|
||||
.dst_node_id = -1,
|
||||
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
|
||||
@@ -79,6 +130,8 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
|
||||
|
||||
expand_fault_region(mm, fault_addr, client_type, &uvm_migrate_args.start, &uvm_migrate_args.length);
|
||||
|
||||
// TODO: Bug 2103669: Service more than a single fault at a time
|
||||
//
|
||||
// We are trying to use migrate_vma API in the kernel (if it exists) to
|
||||
@@ -131,7 +184,10 @@ NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
|
||||
}
|
||||
else {
|
||||
// TODO: Bug 2103669: Service more than a single fault at a time
|
||||
status = uvm_ats_service_fault(gpu_va_space, current_entry->fault_address, service_access_type);
|
||||
status = uvm_ats_service_fault(gpu_va_space,
|
||||
current_entry->fault_address,
|
||||
service_access_type,
|
||||
current_entry->fault_source.client_type);
|
||||
}
|
||||
|
||||
// Do not flag prefetch faults as fatal unless something fatal happened
|
||||
@@ -155,7 +211,8 @@ NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
|
||||
status = uvm_ats_service_fault(gpu_va_space,
|
||||
current_entry->fault_address,
|
||||
UVM_FAULT_ACCESS_TYPE_READ);
|
||||
UVM_FAULT_ACCESS_TYPE_READ,
|
||||
current_entry->fault_source.client_type);
|
||||
|
||||
// If read accesses are also invalid, cancel the fault. If a
|
||||
// different error code is returned, exit
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -52,13 +52,13 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
|
||||
uvm_push_t push;
|
||||
bool is_proxy;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, CE_TEST_MEM_SIZE, &host_mem);
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, CE_TEST_MEM_SIZE, 0, &host_mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
|
||||
memset(host_ptr, 0, CE_TEST_MEM_SIZE);
|
||||
|
||||
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
|
||||
status = uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, CE_TEST_MEM_SIZE, &mem[i]);
|
||||
status = uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, CE_TEST_MEM_SIZE, 0, &mem[i]);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
}
|
||||
|
||||
@@ -167,7 +167,7 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
|
||||
uvm_push_t push;
|
||||
NvU32 value;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(NvU32), &host_mem);
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(NvU32), 0, &host_mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
|
||||
*host_ptr = 0;
|
||||
@@ -429,13 +429,13 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
gpu_addresses[1] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by vidmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, &gpu_rm_mem), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
|
||||
is_proxy_va_space = false;
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
|
||||
gpu_addresses[2] = uvm_gpu_address_virtual(gpu_va);
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by sysmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, &sys_rm_mem), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
|
||||
gpu_addresses[3] = uvm_gpu_address_virtual(gpu_va);
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "uvm_channel.h"
|
||||
|
||||
#include "uvm_api.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_procfs.h"
|
||||
@@ -68,6 +69,38 @@ typedef enum
|
||||
UVM_CHANNEL_UPDATE_MODE_FORCE_ALL
|
||||
} uvm_channel_update_mode_t;
|
||||
|
||||
static void channel_pool_lock_init(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_mutex_init(&pool->mutex, UVM_LOCK_ORDER_CHANNEL);
|
||||
else
|
||||
uvm_spin_lock_init(&pool->spinlock, UVM_LOCK_ORDER_CHANNEL);
|
||||
}
|
||||
|
||||
void uvm_channel_pool_lock(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_mutex_lock(&pool->mutex);
|
||||
else
|
||||
uvm_spin_lock(&pool->spinlock);
|
||||
}
|
||||
|
||||
void uvm_channel_pool_unlock(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_mutex_unlock(&pool->mutex);
|
||||
else
|
||||
uvm_spin_unlock(&pool->spinlock);
|
||||
}
|
||||
|
||||
void uvm_channel_pool_assert_locked(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_assert_mutex_locked(&pool->mutex);
|
||||
else
|
||||
uvm_assert_spinlock_locked(&pool->spinlock);
|
||||
}
|
||||
|
||||
// Update channel progress, completing up to max_to_complete entries
|
||||
static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
NvU32 max_to_complete,
|
||||
@@ -80,7 +113,7 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
|
||||
NvU64 completed_value = uvm_channel_update_completed_value(channel);
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
// Completed value should never exceed the queued value
|
||||
UVM_ASSERT_MSG_RELEASE(completed_value <= channel->tracking_sem.queued_value,
|
||||
@@ -97,15 +130,18 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
if (mode == UVM_CHANNEL_UPDATE_MODE_COMPLETED && entry->tracking_semaphore_value > completed_value)
|
||||
break;
|
||||
|
||||
uvm_pushbuffer_mark_completed(channel->pool->manager->pushbuffer, entry);
|
||||
list_add_tail(&entry->push_info->available_list_node, &channel->available_push_infos);
|
||||
if (entry->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL) {
|
||||
uvm_pushbuffer_mark_completed(channel->pool->manager->pushbuffer, entry);
|
||||
list_add_tail(&entry->push_info->available_list_node, &channel->available_push_infos);
|
||||
}
|
||||
|
||||
gpu_get = (gpu_get + 1) % channel->num_gpfifo_entries;
|
||||
++completed_count;
|
||||
}
|
||||
|
||||
channel->gpu_get = gpu_get;
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
|
||||
if (cpu_put >= gpu_get)
|
||||
pending_gpfifos = cpu_put - gpu_get;
|
||||
@@ -150,55 +186,49 @@ NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager
|
||||
return pending_gpfifos;
|
||||
}
|
||||
|
||||
static bool channel_is_available(uvm_channel_t *channel)
|
||||
static bool channel_is_available(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
NvU32 next_put;
|
||||
NvU32 pending_entries;
|
||||
|
||||
uvm_assert_spinlock_locked(&channel->pool->lock);
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
|
||||
next_put = (channel->cpu_put + channel->current_pushes_count + 1) % channel->num_gpfifo_entries;
|
||||
if (channel->cpu_put >= channel->gpu_get)
|
||||
pending_entries = channel->cpu_put - channel->gpu_get;
|
||||
else
|
||||
pending_entries = channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get;
|
||||
|
||||
return (next_put != channel->gpu_get);
|
||||
return (pending_entries + channel->current_gpfifo_count + num_gpfifo_entries < channel->num_gpfifo_entries);
|
||||
}
|
||||
|
||||
static bool try_claim_channel(uvm_channel_t *channel)
|
||||
static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
bool claimed = false;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
UVM_ASSERT(num_gpfifo_entries > 0);
|
||||
UVM_ASSERT(num_gpfifo_entries < channel->num_gpfifo_entries);
|
||||
|
||||
if (channel_is_available(channel)) {
|
||||
++channel->current_pushes_count;
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
if (channel_is_available(channel, num_gpfifo_entries)) {
|
||||
channel->current_gpfifo_count += num_gpfifo_entries;
|
||||
claimed = true;
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
|
||||
return claimed;
|
||||
}
|
||||
|
||||
static void lock_push(uvm_channel_t *channel)
|
||||
{
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
static void unlock_push(uvm_channel_t *channel)
|
||||
{
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
static bool trylock_push(uvm_channel_t *channel)
|
||||
{
|
||||
|
||||
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -212,13 +242,8 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
// TODO: Bug 1764953: Prefer idle/less busy channels
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if (trylock_push(channel)) {
|
||||
if (try_claim_channel(channel)) {
|
||||
if (try_claim_channel(channel, 1)) {
|
||||
*channel_out = channel;
|
||||
return NV_OK;
|
||||
}
|
||||
@@ -235,7 +260,7 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
|
||||
|
||||
uvm_channel_update_progress(channel);
|
||||
|
||||
if (try_claim_channel(channel)) {
|
||||
if (try_claim_channel(channel, 1)) {
|
||||
lock_push(channel);
|
||||
*channel_out = channel;
|
||||
|
||||
@@ -256,7 +281,8 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
|
||||
|
||||
NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_type_t type, uvm_channel_t **channel_out)
|
||||
{
|
||||
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
|
||||
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
|
||||
|
||||
return channel_reserve_in_pool(manager->pool_to_use.default_for_type[type], channel_out);
|
||||
}
|
||||
|
||||
@@ -297,14 +323,14 @@ static NvU32 channel_get_available_push_info_index(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_push_info_t *push_info;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
push_info = list_first_entry_or_null(&channel->available_push_infos, uvm_push_info_t, available_list_node);
|
||||
UVM_ASSERT(push_info != NULL);
|
||||
UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
|
||||
list_del(&push_info->available_list_node);
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
|
||||
return push_info - channel->push_infos;
|
||||
}
|
||||
@@ -319,13 +345,6 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
|
||||
|
||||
manager = channel->pool->manager;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
status = uvm_pushbuffer_begin_push(manager->pushbuffer, push);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@@ -370,10 +389,6 @@ static void proxy_channel_submit_work(uvm_push_t *push, NvU32 push_size)
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_proxy(channel));
|
||||
|
||||
// nvUvmInterfacePagingChannelPushStream should not sleep, because a
|
||||
// spinlock is currently held.
|
||||
uvm_assert_spinlock_locked(&channel->pool->lock);
|
||||
|
||||
status = nvUvmInterfacePagingChannelPushStream(channel->proxy.handle, (char *) push->begin, push_size);
|
||||
|
||||
if (status != NV_OK) {
|
||||
@@ -407,10 +422,6 @@ static void uvm_channel_semaphore_release(uvm_push_t *push, NvU64 semaphore_va,
|
||||
|
||||
if (uvm_channel_is_ce(push->channel))
|
||||
gpu->parent->ce_hal->semaphore_release(push, semaphore_va, new_payload);
|
||||
|
||||
|
||||
|
||||
|
||||
else
|
||||
UVM_ASSERT_MSG(0, "Semaphore release on an unsupported channel.\n");
|
||||
}
|
||||
@@ -428,7 +439,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
NvU32 cpu_put;
|
||||
NvU32 new_cpu_put;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
new_tracking_value = ++channel->tracking_sem.queued_value;
|
||||
new_payload = (NvU32)new_tracking_value;
|
||||
@@ -447,9 +458,10 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
entry->pushbuffer_offset = uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
|
||||
entry->pushbuffer_size = push_size;
|
||||
entry->push_info = &channel->push_infos[push->push_info_index];
|
||||
entry->type = UVM_GPFIFO_ENTRY_TYPE_NORMAL;
|
||||
|
||||
UVM_ASSERT(channel->current_pushes_count > 0);
|
||||
--channel->current_pushes_count;
|
||||
UVM_ASSERT(channel->current_gpfifo_count > 0);
|
||||
--channel->current_gpfifo_count;
|
||||
|
||||
if (uvm_channel_is_proxy(channel))
|
||||
proxy_channel_submit_work(push, push_size);
|
||||
@@ -464,7 +476,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
// may notice the GPU work to be completed and hence all state tracking the
|
||||
// push must be updated before that. Notably uvm_pushbuffer_end_push() has
|
||||
// to be called first.
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
unlock_push(channel);
|
||||
|
||||
// This memory barrier is borrowed from CUDA, as it supposedly fixes perf
|
||||
@@ -477,18 +489,116 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
push->channel_tracking_value = new_tracking_value;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_reserve(uvm_channel_t *channel)
|
||||
// The caller must submit a normal GPFIFO entry with a semaphore release
|
||||
// following the control GPFIFO, refer to uvm_channel_write_ctrl_gpfifo() for an
|
||||
// example.
|
||||
static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_value)
|
||||
{
|
||||
uvm_gpfifo_entry_t *entry;
|
||||
NvU64 *gpfifo_entry;
|
||||
NvU32 cpu_put;
|
||||
NvU32 new_cpu_put;
|
||||
uvm_gpu_t *gpu = channel->pool->manager->gpu;
|
||||
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
cpu_put = channel->cpu_put;
|
||||
new_cpu_put = (cpu_put + 1) % channel->num_gpfifo_entries;
|
||||
|
||||
entry = &channel->gpfifo_entries[cpu_put];
|
||||
memset(entry, 0, sizeof(*entry));
|
||||
entry->type = UVM_GPFIFO_ENTRY_TYPE_CONTROL;
|
||||
|
||||
// Control GPFIFO entries are followed by a semaphore_release push in UVM.
|
||||
// We add the tracking semaphore value of the next GPFIFO entry,
|
||||
// potentially the associated semaphore release push. Even if a different
|
||||
// GPFIFO entry sneaks in, the purposes of signaling that this control
|
||||
// GPFIFO entry has been processed is accomplished.
|
||||
entry->tracking_semaphore_value = channel->tracking_sem.queued_value + 1;
|
||||
|
||||
UVM_ASSERT(channel->current_gpfifo_count > 1);
|
||||
--channel->current_gpfifo_count;
|
||||
|
||||
gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + cpu_put;
|
||||
*gpfifo_entry = ctrl_fifo_entry_value;
|
||||
|
||||
// Need to make sure all the GPFIFO entries writes complete before updating
|
||||
// GPPUT. We also don't want any reads to be moved after the GPPut write as
|
||||
// the GPU might modify the data they read as soon as the GPPut write
|
||||
// happens.
|
||||
mb();
|
||||
|
||||
gpu->parent->host_hal->write_gpu_put(channel, new_cpu_put);
|
||||
|
||||
channel->cpu_put = new_cpu_put;
|
||||
|
||||
// The moment the channel is unlocked uvm_channel_update_progress_with_max()
|
||||
// may notice the GPU work to be completed and hence all state tracking the
|
||||
// push must be updated before that.
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
unlock_push(channel);
|
||||
|
||||
// This memory barrier is borrowed from CUDA, as it supposedly fixes perf
|
||||
// issues on some systems. Comment from CUDA: "fixes throughput-related
|
||||
// performance problems, e.g. bugs 626179, 593841. This may be related to
|
||||
// bug 124888, which GL works around by doing a clflush"
|
||||
wmb();
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_value)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu = channel->pool->manager->gpu;
|
||||
uvm_push_t push;
|
||||
|
||||
UVM_ASSERT(!uvm_channel_is_proxy(channel));
|
||||
|
||||
// We reserve two GPFIFO entries, i.e., the control GPFIFO entry and the
|
||||
// subsequent semaphore_release push. There is, however, a potential case
|
||||
// for GPFIFO control submission starvation. This may happen because a
|
||||
// GPFIFO control submission requires two available GPFIFO entries. If you
|
||||
// have a full GPFIFO ring buffer that frees one entry at a time, while
|
||||
// there is another thread consuming this recently released entry at the
|
||||
// same rate, a concurrent thread trying to reserve two entries for a GPFIFO
|
||||
// control submission may starve. We could handle this by imposing minimal
|
||||
// release entries in uvm_channel.c:uvm_channel_update_progress(). Instead,
|
||||
// we don't deal with this potential starvation case because:
|
||||
// - Control GPFIFO are rarely used.
|
||||
// - By default, we release up to 8 GPFIFO entries at a time, except if the
|
||||
// release rate is constrained by lengthy pushbuffers -- another rare
|
||||
// situation.
|
||||
// - It would add unnecessary complexity to channel_update_progress().
|
||||
status = uvm_channel_reserve(channel, 2);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
write_ctrl_gpfifo(channel, ctrl_fifo_entry_value);
|
||||
|
||||
status = uvm_push_begin_on_reserved_channel(channel, &push, "write_ctrl_GPFIFO");
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to begin push on channel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
// This is an empty push, the push's embedded semaphore_release signals that
|
||||
// the GPFIFO control entry has been processed.
|
||||
uvm_push_end(&push);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_spin_loop_t spin;
|
||||
|
||||
if (try_claim_channel(channel))
|
||||
if (try_claim_channel(channel, num_gpfifo_entries))
|
||||
goto out;
|
||||
|
||||
uvm_channel_update_progress(channel);
|
||||
|
||||
uvm_spin_loop_init(&spin);
|
||||
while (!try_claim_channel(channel) && status == NV_OK) {
|
||||
while (!try_claim_channel(channel, num_gpfifo_entries) && status == NV_OK) {
|
||||
UVM_SPIN_LOOP(&spin);
|
||||
status = uvm_channel_check_errors(channel);
|
||||
uvm_channel_update_progress(channel);
|
||||
@@ -511,12 +621,12 @@ static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *ch
|
||||
if (pending_count == 0)
|
||||
return NULL;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
if (channel->gpu_get != channel->cpu_put)
|
||||
entry = &channel->gpfifo_entries[channel->gpu_get];
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
|
||||
return entry;
|
||||
}
|
||||
@@ -568,12 +678,28 @@ NV_STATUS uvm_channel_check_errors(uvm_channel_t *channel)
|
||||
|
||||
fatal_entry = uvm_channel_get_fatal_entry(channel);
|
||||
if (fatal_entry != NULL) {
|
||||
uvm_push_info_t *push_info = fatal_entry->push_info;
|
||||
UVM_ERR_PRINT("Channel error likely caused by push '%s' started at %s:%d in %s()\n",
|
||||
push_info->description, push_info->filename, push_info->line, push_info->function);
|
||||
if (fatal_entry->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL) {
|
||||
uvm_push_info_t *push_info = fatal_entry->push_info;
|
||||
UVM_ERR_PRINT("Channel error likely caused by push '%s' started at %s:%d in %s()\n",
|
||||
push_info->description,
|
||||
push_info->filename,
|
||||
push_info->line,
|
||||
push_info->function);
|
||||
}
|
||||
else {
|
||||
NvU64 *gpfifo_entry;
|
||||
|
||||
UVM_ASSERT(!uvm_channel_is_proxy(channel));
|
||||
|
||||
gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + (fatal_entry - channel->gpfifo_entries);
|
||||
UVM_ERR_PRINT("Channel error likely caused by GPFIFO control entry, data: 0x%llx, gpu_get: %d\n",
|
||||
*gpfifo_entry,
|
||||
channel->gpu_get);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_global_set_fatal_error(status);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -608,40 +734,6 @@ NvU64 uvm_channel_update_completed_value(uvm_channel_t *channel)
|
||||
return uvm_gpu_tracking_semaphore_update_completed_value(&channel->tracking_sem);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
|
||||
{
|
||||
UVM_ASSERT(pool->num_channels > 0);
|
||||
@@ -658,19 +750,15 @@ static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
|
||||
channel_update_progress_all(channel, UVM_CHANNEL_UPDATE_MODE_FORCE_ALL);
|
||||
}
|
||||
|
||||
uvm_procfs_destroy_entry(channel->procfs.pushes);
|
||||
uvm_procfs_destroy_entry(channel->procfs.info);
|
||||
uvm_procfs_destroy_entry(channel->procfs.dir);
|
||||
proc_remove(channel->procfs.pushes);
|
||||
proc_remove(channel->procfs.info);
|
||||
proc_remove(channel->procfs.dir);
|
||||
|
||||
uvm_kvfree(channel->push_acquire_infos);
|
||||
uvm_kvfree(channel->push_infos);
|
||||
|
||||
uvm_kvfree(channel->gpfifo_entries);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if (uvm_channel_is_proxy(channel))
|
||||
uvm_rm_locked_call_void(nvUvmInterfacePagingChannelDestroy(channel->proxy.handle));
|
||||
else
|
||||
@@ -694,11 +782,6 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel, unsigned engine
|
||||
|
||||
if (uvm_channel_is_ce(channel)) {
|
||||
UVM_ASSERT(channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
memset(&channel_alloc_params, 0, sizeof(channel_alloc_params));
|
||||
@@ -710,10 +793,6 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel, unsigned engine
|
||||
if (uvm_channel_is_ce(channel))
|
||||
channel_alloc_params.engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_CE;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceChannelAllocate(gpu->rm_address_space,
|
||||
&channel_alloc_params,
|
||||
&channel->handle,
|
||||
@@ -732,11 +811,7 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel, unsigned engine
|
||||
channel_info->hwChannelId,
|
||||
channel_info->hwRunlistId,
|
||||
channel_info->hwChannelId,
|
||||
|
||||
|
||||
|
||||
"CE",
|
||||
|
||||
engine_index);
|
||||
|
||||
return NV_OK;
|
||||
@@ -803,12 +878,6 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
channel->num_gpfifo_entries = manager->conf.num_gpfifo_entries;
|
||||
channel->gpfifo_entries = uvm_kvmalloc_zero(sizeof(*channel->gpfifo_entries) * channel->num_gpfifo_entries);
|
||||
if (channel->gpfifo_entries == NULL) {
|
||||
@@ -858,8 +927,25 @@ static NV_STATUS init_channel(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_push_t push;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
NV_STATUS status = uvm_push_begin_on_channel(channel, &push, "Init channel");
|
||||
NV_STATUS status;
|
||||
NvU32 num_entries = 1;
|
||||
|
||||
if (uvm_gpu_has_pushbuffer_segments(gpu))
|
||||
num_entries++;
|
||||
|
||||
status = uvm_channel_reserve(channel, num_entries);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
if (uvm_gpu_has_pushbuffer_segments(gpu)) {
|
||||
NvU64 gpfifo_entry;
|
||||
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
|
||||
gpu->parent->host_hal->set_gpfifo_pushbuffer_segment_base(&gpfifo_entry,
|
||||
uvm_pushbuffer_get_gpu_va_base(pushbuffer));
|
||||
write_ctrl_gpfifo(channel, gpfifo_entry);
|
||||
}
|
||||
|
||||
status = uvm_push_begin_on_reserved_channel(channel, &push, "Init channel");
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to begin push on channel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
return status;
|
||||
@@ -868,10 +954,6 @@ static NV_STATUS init_channel(uvm_channel_t *channel)
|
||||
if (uvm_channel_is_ce(channel))
|
||||
gpu->parent->ce_hal->init(&push);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
gpu->parent->host_hal->init(&push);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
@@ -925,7 +1007,7 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
|
||||
pool->engine_index = engine_index;
|
||||
pool->pool_type = pool_type;
|
||||
|
||||
uvm_spin_lock_init(&pool->lock, UVM_LOCK_ORDER_CHANNEL);
|
||||
channel_pool_lock_init(pool);
|
||||
|
||||
num_channels = channel_pool_type_num_channels(pool_type);
|
||||
|
||||
@@ -1348,12 +1430,6 @@ static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
|
||||
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu))
|
||||
num_channel_pools++;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
manager->channel_pools = uvm_kvmalloc_zero(sizeof(*manager->channel_pools) * num_channel_pools);
|
||||
if (!manager->channel_pools)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
@@ -1384,17 +1460,6 @@ static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
|
||||
manager->pool_to_use.default_for_type[channel_type] = pool;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -1447,11 +1512,11 @@ void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager)
|
||||
if (channel_manager == NULL)
|
||||
return;
|
||||
|
||||
uvm_procfs_destroy_entry(channel_manager->procfs.pending_pushes);
|
||||
proc_remove(channel_manager->procfs.pending_pushes);
|
||||
|
||||
channel_manager_destroy_pools(channel_manager);
|
||||
|
||||
uvm_procfs_destroy_entry(channel_manager->procfs.channels_dir);
|
||||
proc_remove(channel_manager->procfs.channels_dir);
|
||||
|
||||
uvm_pushbuffer_destroy(channel_manager->pushbuffer);
|
||||
|
||||
@@ -1520,39 +1585,25 @@ uvm_channel_t *uvm_channel_any_of_type(uvm_channel_manager_t *manager, NvU32 poo
|
||||
|
||||
const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type)
|
||||
{
|
||||
|
||||
|
||||
|
||||
BUILD_BUG_ON(UVM_CHANNEL_TYPE_COUNT != 5);
|
||||
|
||||
|
||||
switch (channel_type) {
|
||||
UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_CPU_TO_GPU);
|
||||
UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_GPU_TO_CPU);
|
||||
UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_GPU_INTERNAL);
|
||||
UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_MEMOPS);
|
||||
UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_GPU_TO_GPU);
|
||||
|
||||
|
||||
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
}
|
||||
|
||||
const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type)
|
||||
{
|
||||
|
||||
|
||||
|
||||
BUILD_BUG_ON(UVM_CHANNEL_POOL_TYPE_COUNT != 2);
|
||||
|
||||
|
||||
switch (channel_pool_type) {
|
||||
UVM_ENUM_STRING_CASE(UVM_CHANNEL_POOL_TYPE_CE);
|
||||
UVM_ENUM_STRING_CASE(UVM_CHANNEL_POOL_TYPE_CE_PROXY);
|
||||
|
||||
|
||||
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
}
|
||||
@@ -1562,7 +1613,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
|
||||
uvm_channel_manager_t *manager = channel->pool->manager;
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Channel %s\n", channel->name);
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "completed %llu\n", uvm_channel_update_completed_value(channel));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "queued %llu\n", channel->tracking_sem.queued_value);
|
||||
@@ -1574,7 +1625,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore GPU VA 0x%llx\n", uvm_channel_tracking_semaphore_get_gpu_va(channel));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore CPU VA 0x%llx\n", (NvU64)(uintptr_t)channel->tracking_sem.semaphore.payload);
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
}
|
||||
|
||||
static void channel_print_push_acquires(uvm_push_acquire_info_t *push_acquire_info, struct seq_file *seq)
|
||||
@@ -1618,7 +1669,7 @@ static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_c
|
||||
|
||||
NvU64 completed_value = uvm_channel_update_completed_value(channel);
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
cpu_put = channel->cpu_put;
|
||||
|
||||
@@ -1630,28 +1681,43 @@ static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_c
|
||||
if (entry->tracking_semaphore_value + finished_pushes_count <= completed_value)
|
||||
continue;
|
||||
|
||||
// Obtain the value acquire tracking information from the push_info index
|
||||
if (uvm_push_info_is_tracking_acquires()) {
|
||||
NvU32 push_info_index = push_info - channel->push_infos;
|
||||
UVM_ASSERT(push_info_index < channel->num_gpfifo_entries);
|
||||
if (entry->type == UVM_GPFIFO_ENTRY_TYPE_CONTROL) {
|
||||
NvU64 *gpfifo_entry;
|
||||
|
||||
push_acquire_info = &channel->push_acquire_infos[push_info_index];
|
||||
UVM_ASSERT(!uvm_channel_is_proxy(channel));
|
||||
|
||||
gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + gpu_get;
|
||||
UVM_SEQ_OR_DBG_PRINT(seq,
|
||||
" control GPFIFO entry - data: 0x%llx, gpu_get: %d\n",
|
||||
*gpfifo_entry,
|
||||
gpu_get);
|
||||
}
|
||||
else {
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(seq,
|
||||
" %s push '%s' started at %s:%d in %s() releasing value %llu%s",
|
||||
entry->tracking_semaphore_value <= completed_value ? "finished" : "pending",
|
||||
push_info->description,
|
||||
push_info->filename,
|
||||
push_info->line,
|
||||
push_info->function,
|
||||
entry->tracking_semaphore_value,
|
||||
!push_acquire_info || push_acquire_info->num_values == 0? "\n" : "");
|
||||
// Obtain the value acquire tracking information from the push_info
|
||||
// index
|
||||
if (uvm_push_info_is_tracking_acquires()) {
|
||||
NvU32 push_info_index = push_info - channel->push_infos;
|
||||
UVM_ASSERT(push_info_index < channel->num_gpfifo_entries);
|
||||
|
||||
if (push_acquire_info)
|
||||
channel_print_push_acquires(push_acquire_info, seq);
|
||||
push_acquire_info = &channel->push_acquire_infos[push_info_index];
|
||||
}
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(seq,
|
||||
" %s push '%s' started at %s:%d in %s() releasing value %llu%s",
|
||||
entry->tracking_semaphore_value <= completed_value ? "finished" : "pending",
|
||||
push_info->description,
|
||||
push_info->filename,
|
||||
push_info->line,
|
||||
push_info->function,
|
||||
entry->tracking_semaphore_value,
|
||||
!push_acquire_info || push_acquire_info->num_values == 0 ? "\n" : "");
|
||||
|
||||
if (push_acquire_info)
|
||||
channel_print_push_acquires(push_acquire_info, seq);
|
||||
}
|
||||
}
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
}
|
||||
|
||||
void uvm_channel_print_pending_pushes(uvm_channel_t *channel)
|
||||
@@ -1694,7 +1760,7 @@ static int nv_procfs_read_manager_pending_pushes(struct seq_file *s, void *v)
|
||||
uvm_channel_manager_t *manager = (uvm_channel_manager_t *)s->private;
|
||||
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
return -EAGAIN;
|
||||
|
||||
channel_manager_print_pending_pushes(manager, s);
|
||||
|
||||
@@ -1733,7 +1799,7 @@ static int nv_procfs_read_channel_info(struct seq_file *s, void *v)
|
||||
uvm_channel_t *channel = (uvm_channel_t *)s->private;
|
||||
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
return -EAGAIN;
|
||||
|
||||
uvm_channel_print_info(channel, s);
|
||||
|
||||
@@ -1754,7 +1820,7 @@ static int nv_procfs_read_channel_pushes(struct seq_file *s, void *v)
|
||||
uvm_channel_t *channel = (uvm_channel_t *)s->private;
|
||||
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
return -EAGAIN;
|
||||
|
||||
// Include up to 5 finished pushes for some context
|
||||
channel_print_pushes(channel, 5, s);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -83,18 +83,7 @@ typedef enum
|
||||
|
||||
// ^^^^^^
|
||||
// Channel types backed by a CE.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
UVM_CHANNEL_TYPE_COUNT = UVM_CHANNEL_TYPE_CE_COUNT,
|
||||
|
||||
} uvm_channel_type_t;
|
||||
|
||||
typedef enum
|
||||
@@ -112,34 +101,43 @@ typedef enum
|
||||
// There is a single proxy pool and channel per GPU.
|
||||
UVM_CHANNEL_POOL_TYPE_CE_PROXY = (1 << 1),
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
UVM_CHANNEL_POOL_TYPE_COUNT = 2,
|
||||
|
||||
|
||||
// A mask used to select pools of any type.
|
||||
UVM_CHANNEL_POOL_TYPE_MASK = ((1U << UVM_CHANNEL_POOL_TYPE_COUNT) - 1)
|
||||
} uvm_channel_pool_type_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
// Push-based GPFIFO entry
|
||||
UVM_GPFIFO_ENTRY_TYPE_NORMAL,
|
||||
|
||||
// Control GPFIFO entry, i.e., the LENGTH field is zero, not associated with
|
||||
// a push.
|
||||
UVM_GPFIFO_ENTRY_TYPE_CONTROL
|
||||
} uvm_gpfifo_entry_type_t;
|
||||
|
||||
struct uvm_gpfifo_entry_struct
|
||||
{
|
||||
// Offset of the pushbuffer in the pushbuffer allocation used by this entry
|
||||
uvm_gpfifo_entry_type_t type;
|
||||
|
||||
// Channel tracking semaphore value that indicates completion of
|
||||
// this entry.
|
||||
NvU64 tracking_semaphore_value;
|
||||
|
||||
// The following fields are only valid when type is
|
||||
// UVM_GPFIFO_ENTRY_TYPE_NORMAL.
|
||||
|
||||
// Offset of the pushbuffer in the pushbuffer allocation used by
|
||||
// this entry.
|
||||
NvU32 pushbuffer_offset;
|
||||
|
||||
// Size of the pushbuffer used for this entry
|
||||
// Size of the pushbuffer used for this entry.
|
||||
NvU32 pushbuffer_size;
|
||||
|
||||
// List node used by the pushbuffer tracking
|
||||
struct list_head pending_list_node;
|
||||
|
||||
// Channel tracking semaphore value that indicates completion of this entry
|
||||
NvU64 tracking_semaphore_value;
|
||||
|
||||
// Push info for the pending push that used this GPFIFO entry
|
||||
uvm_push_info_t *push_info;
|
||||
};
|
||||
@@ -165,7 +163,11 @@ typedef struct
|
||||
uvm_channel_pool_type_t pool_type;
|
||||
|
||||
// Lock protecting the state of channels in the pool
|
||||
uvm_spinlock_t lock;
|
||||
union {
|
||||
uvm_spinlock_t spinlock;
|
||||
uvm_mutex_t mutex;
|
||||
};
|
||||
|
||||
} uvm_channel_pool_t;
|
||||
|
||||
struct uvm_channel_struct
|
||||
@@ -193,10 +195,10 @@ struct uvm_channel_struct
|
||||
// for completion.
|
||||
NvU32 gpu_get;
|
||||
|
||||
// Number of currently on-going pushes on this channel
|
||||
// A new push is only allowed to begin on the channel if there is a free
|
||||
// GPFIFO entry for it.
|
||||
NvU32 current_pushes_count;
|
||||
// Number of currently on-going gpfifo entries on this channel
|
||||
// A new push or control GPFIFO is only allowed to begin on the channel if
|
||||
// there is a free GPFIFO entry for it.
|
||||
NvU32 current_gpfifo_count;
|
||||
|
||||
// Array of uvm_push_info_t for all pending pushes on the channel
|
||||
uvm_push_info_t *push_infos;
|
||||
@@ -211,30 +213,10 @@ struct uvm_channel_struct
|
||||
// been marked as completed.
|
||||
struct list_head available_push_infos;
|
||||
|
||||
// GPU tracking semaphore tracking the work in the channel
|
||||
// GPU tracking semaphore tracking the work in the channel.
|
||||
// Each push on the channel increments the semaphore, see
|
||||
// uvm_channel_end_push().
|
||||
uvm_gpu_tracking_semaphore_t tracking_sem;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// RM channel information
|
||||
union
|
||||
@@ -331,10 +313,20 @@ struct uvm_channel_manager_struct
|
||||
// Create a channel manager for the GPU
|
||||
NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
|
||||
|
||||
void uvm_channel_pool_lock(uvm_channel_pool_t *pool);
|
||||
void uvm_channel_pool_unlock(uvm_channel_pool_t *pool);
|
||||
void uvm_channel_pool_assert_locked(uvm_channel_pool_t *pool);
|
||||
|
||||
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
|
||||
return pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_proxy(uvm_channel_t *channel)
|
||||
{
|
||||
UVM_ASSERT(channel->pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
return channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
|
||||
return uvm_channel_pool_is_proxy(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_ce(uvm_channel_t *channel)
|
||||
@@ -343,14 +335,6 @@ static bool uvm_channel_is_ce(uvm_channel_t *channel)
|
||||
return (channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_is_proxy(channel);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Proxy channels are used to push page tree related methods, so their channel
|
||||
// type is UVM_CHANNEL_TYPE_MEMOPS.
|
||||
static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
|
||||
@@ -437,8 +421,8 @@ NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *channel_manager,
|
||||
uvm_gpu_t *dst_gpu,
|
||||
uvm_channel_t **channel_out);
|
||||
|
||||
// Reserve a specific channel for a push
|
||||
NV_STATUS uvm_channel_reserve(uvm_channel_t *channel);
|
||||
// Reserve a specific channel for a push or for a control GPFIFO entry.
|
||||
NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries);
|
||||
|
||||
// Set optimal CE for P2P transfers between manager->gpu and peer
|
||||
void uvm_channel_manager_set_p2p_ce(uvm_channel_manager_t *manager, uvm_gpu_t *peer, NvU32 optimal_ce);
|
||||
@@ -451,6 +435,17 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push);
|
||||
// Should be used by uvm_push_end() only.
|
||||
void uvm_channel_end_push(uvm_push_t *push);
|
||||
|
||||
// Write/send a control GPFIFO to channel. This is not supported by proxy
|
||||
// channels.
|
||||
// Ordering guarantees:
|
||||
// Input: Control GPFIFO entries are guaranteed to be processed by ESCHED after
|
||||
// all prior GPFIFO entries and pushbuffers have been fetched, but not
|
||||
// necessarily completed.
|
||||
// Output ordering: A caller can wait for this control entry to complete with
|
||||
// uvm_channel_manager_wait(), or by waiting for any later push in the same
|
||||
// channel to complete.
|
||||
NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_value);
|
||||
|
||||
const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type);
|
||||
const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -60,7 +60,7 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
TEST_CHECK_RET(gpu != NULL);
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_all(gpu, UVM_RM_MEM_TYPE_SYS, buffer_size, &mem);
|
||||
status = uvm_rm_mem_alloc_and_map_all(gpu, UVM_RM_MEM_TYPE_SYS, buffer_size, 0, &mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
host_mem = (NvU32*)uvm_rm_mem_get_cpu_va(mem);
|
||||
@@ -153,7 +153,6 @@ done:
|
||||
|
||||
static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
@@ -306,7 +305,6 @@ static NV_STATUS test_rc(uvm_va_space_t *va_space)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uvm_push_t push;
|
||||
@@ -481,12 +479,14 @@ static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
|
||||
status = uvm_rm_mem_alloc_and_map_all(gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
MAX_COUNTER_REPEAT_COUNT * sizeof(NvU32),
|
||||
0,
|
||||
&stream->counter_mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_all(gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
TEST_SNAPSHOT_SIZE(iterations_per_stream),
|
||||
0,
|
||||
&stream->counter_snapshots_mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
@@ -495,6 +495,7 @@ static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
|
||||
status = uvm_rm_mem_alloc_and_map_all(gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
TEST_SNAPSHOT_SIZE(iterations_per_stream),
|
||||
0,
|
||||
&stream->other_stream_counter_snapshots_mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
@@ -565,6 +566,7 @@ static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
|
||||
stream->counter_snapshots_mem,
|
||||
i,
|
||||
stream->queued_counter_repeat);
|
||||
|
||||
// Set a random number [2, MAX_COUNTER_REPEAT_COUNT] of counters
|
||||
stream->queued_counter_repeat = uvm_test_rng_range_32(&rng, 2, MAX_COUNTER_REPEAT_COUNT);
|
||||
set_counter(&stream->push,
|
||||
@@ -669,61 +671,215 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_manager_t *manager = gpu->channel_manager;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
|
||||
if (uvm_channel_is_proxy(channel))
|
||||
continue;
|
||||
|
||||
// We submit 8x the channel's GPFIFO entries to force a few
|
||||
// complete loops in the GPFIFO circular buffer.
|
||||
for (i = 0; i < 8 * channel->num_gpfifo_entries; i++) {
|
||||
NvU64 entry;
|
||||
gpu->parent->host_hal->set_gpfifo_noop(&entry);
|
||||
TEST_NV_CHECK_RET(uvm_channel_write_ctrl_gpfifo(channel, entry));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_manager_t *manager = gpu->channel_manager;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
uvm_push_t push;
|
||||
|
||||
if (uvm_channel_is_proxy(channel))
|
||||
continue;
|
||||
|
||||
// We submit 8x the channel's GPFIFO entries to force a few
|
||||
// complete loops in the GPFIFO circular buffer.
|
||||
for (i = 0; i < 8 * channel->num_gpfifo_entries; i++) {
|
||||
if (i % 2 == 0) {
|
||||
NvU64 entry;
|
||||
gpu->parent->host_hal->set_gpfifo_noop(&entry);
|
||||
TEST_NV_CHECK_RET(uvm_channel_write_ctrl_gpfifo(channel, entry));
|
||||
}
|
||||
else {
|
||||
TEST_NV_CHECK_RET(uvm_push_begin_on_channel(channel, &push, "gpfifo ctrl and push test"));
|
||||
uvm_push_end(&push);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_push_wait(&push));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NvU32 get_available_gpfifo_entries(uvm_channel_t *channel)
|
||||
{
|
||||
NvU32 pending_entries;
|
||||
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
if (channel->cpu_put >= channel->gpu_get)
|
||||
pending_entries = channel->cpu_put - channel->gpu_get;
|
||||
else
|
||||
pending_entries = channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get;
|
||||
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
|
||||
return channel->num_gpfifo_entries - pending_entries - 1;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_channel_t *channel;
|
||||
uvm_rm_mem_t *mem;
|
||||
NvU32 *cpu_ptr;
|
||||
NvU64 gpu_va;
|
||||
NvU32 i;
|
||||
NvU64 entry;
|
||||
uvm_push_t push;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_manager_t *manager = gpu->channel_manager;
|
||||
gpu = manager->gpu;
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(*cpu_ptr), 0, &mem));
|
||||
cpu_ptr = uvm_rm_mem_get_cpu_va(mem);
|
||||
gpu_va = uvm_rm_mem_get_gpu_uvm_va(mem, gpu);
|
||||
|
||||
*cpu_ptr = 0;
|
||||
|
||||
// This semaphore acquire takes 1 GPFIFO entries.
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin(manager, UVM_CHANNEL_TYPE_GPU_TO_GPU, &push, "gpfifo ctrl tight test acq"),
|
||||
error);
|
||||
|
||||
channel = push.channel;
|
||||
UVM_ASSERT(!uvm_channel_is_proxy(channel));
|
||||
|
||||
gpu->parent->host_hal->semaphore_acquire(&push, gpu_va, 1);
|
||||
uvm_push_end(&push);
|
||||
|
||||
// Populate the remaining GPFIFO entries, leaving 2 slots available.
|
||||
// 2 available entries + 1 semaphore acquire (above) + 1 spare entry to
|
||||
// indicate a terminal condition for the GPFIFO ringbuffer, therefore we
|
||||
// push num_gpfifo_entries-4.
|
||||
for (i = 0; i < channel->num_gpfifo_entries - 4; i++) {
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin_on_channel(channel, &push, "gpfifo ctrl tight test populate"), error);
|
||||
uvm_push_end(&push);
|
||||
}
|
||||
|
||||
TEST_CHECK_GOTO(get_available_gpfifo_entries(channel) == 2, error);
|
||||
|
||||
// We should have room for the control GPFIFO and the subsequent
|
||||
// semaphore release.
|
||||
gpu->parent->host_hal->set_gpfifo_noop(&entry);
|
||||
TEST_NV_CHECK_GOTO(uvm_channel_write_ctrl_gpfifo(channel, entry), error);
|
||||
|
||||
// Release the semaphore.
|
||||
UVM_WRITE_ONCE(*cpu_ptr, 1);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_wait(&push), error);
|
||||
|
||||
uvm_rm_mem_free(mem);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
||||
error:
|
||||
uvm_rm_mem_free(mem);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// This test is inspired by the test_rc (above).
|
||||
// The test recreates the GPU's channel manager forcing its pushbuffer to be
|
||||
// mapped on a non-zero 1TB segment. This exercises work submission from
|
||||
// pushbuffers whose VAs are greater than 1TB.
|
||||
static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_manager_t *manager;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
if (!uvm_gpu_has_pushbuffer_segments(gpu))
|
||||
continue;
|
||||
|
||||
// The GPU channel manager pushbuffer is destroyed and then re-created
|
||||
// after testing a non-zero pushbuffer extension base, so this test
|
||||
// requires exclusive access to the GPU.
|
||||
TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
|
||||
|
||||
gpu->uvm_test_force_upper_pushbuffer_segment = 1;
|
||||
uvm_channel_manager_destroy(gpu->channel_manager);
|
||||
TEST_NV_CHECK_GOTO(uvm_channel_manager_create(gpu, &gpu->channel_manager), error);
|
||||
gpu->uvm_test_force_upper_pushbuffer_segment = 0;
|
||||
|
||||
manager = gpu->channel_manager;
|
||||
TEST_CHECK_GOTO(uvm_pushbuffer_get_gpu_va_base(manager->pushbuffer) >= (1ull << 40), error);
|
||||
|
||||
// Submit a few pushes with the recently allocated
|
||||
// channel_manager->pushbuffer.
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
uvm_push_t push;
|
||||
|
||||
for (i = 0; i < channel->num_gpfifo_entries; i++) {
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin_on_channel(channel, &push, "pushbuffer extension push test"),
|
||||
error);
|
||||
uvm_push_end(&push);
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_wait(&push), error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
||||
error:
|
||||
gpu->uvm_test_force_upper_pushbuffer_segment = 0;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct file *filp)
|
||||
{
|
||||
@@ -737,11 +893,23 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
status = test_write_ctrl_gpfifo_noop(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
status = test_write_ctrl_gpfifo_and_pushes(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
status = test_write_ctrl_gpfifo_tight(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
|
||||
|
||||
// The following tests have side effects, they reset the GPU's
|
||||
// channel_manager.
|
||||
status = test_channel_pushbuffer_extension_base(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
g_uvm_global.disable_fatal_error_assert = true;
|
||||
uvm_release_asserts_set_global_error_for_tests = true;
|
||||
|
||||
@@ -34,7 +34,7 @@ static int uvm_debug_prints = UVM_IS_DEBUG() || UVM_IS_DEVELOP();
|
||||
module_param(uvm_debug_prints, int, S_IRUGO|S_IWUSR);
|
||||
MODULE_PARM_DESC(uvm_debug_prints, "Enable uvm debug prints.");
|
||||
|
||||
bool uvm_debug_prints_enabled()
|
||||
bool uvm_debug_prints_enabled(void)
|
||||
{
|
||||
return uvm_debug_prints != 0;
|
||||
}
|
||||
|
||||
@@ -35,9 +35,6 @@ typedef struct uvm_ce_hal_struct uvm_ce_hal_t;
|
||||
typedef struct uvm_arch_hal_struct uvm_arch_hal_t;
|
||||
typedef struct uvm_fault_buffer_hal_struct uvm_fault_buffer_hal_t;
|
||||
typedef struct uvm_access_counter_buffer_hal_struct uvm_access_counter_buffer_hal_t;
|
||||
|
||||
|
||||
|
||||
typedef struct uvm_gpu_semaphore_struct uvm_gpu_semaphore_t;
|
||||
typedef struct uvm_gpu_tracking_semaphore_struct uvm_gpu_tracking_semaphore_t;
|
||||
typedef struct uvm_gpu_semaphore_pool_struct uvm_gpu_semaphore_pool_t;
|
||||
|
||||
@@ -71,12 +71,10 @@ static void uvm_unregister_callbacks(void)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static void sev_init(const UvmPlatformInfo *platform_info)
|
||||
{
|
||||
g_uvm_global.sev_enabled = platform_info->sevEnabled;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_global_init(void)
|
||||
{
|
||||
@@ -127,9 +125,7 @@ NV_STATUS uvm_global_init(void)
|
||||
uvm_ats_init(&platform_info);
|
||||
g_uvm_global.num_simulated_devices = 0;
|
||||
|
||||
|
||||
|
||||
|
||||
sev_init(&platform_info);
|
||||
|
||||
status = uvm_gpu_init();
|
||||
if (status != NV_OK) {
|
||||
|
||||
@@ -143,12 +143,11 @@ struct uvm_global_struct
|
||||
struct page *page;
|
||||
} unload_state;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// AMD Secure Encrypted Virtualization (SEV) status. True if VM has SEV
|
||||
// enabled. This field is set once during global initialization
|
||||
// (uvm_global_init), and can be read afterwards without acquiring any
|
||||
// locks.
|
||||
bool sev_enabled;
|
||||
};
|
||||
|
||||
// Initialize global uvm state
|
||||
@@ -187,8 +186,7 @@ static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
|
||||
|
||||
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index]);
|
||||
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == parent_gpu);
|
||||
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == NULL || g_uvm_global.parent_gpus[gpu_index] == parent_gpu);
|
||||
|
||||
g_uvm_global.parent_gpus[gpu_index] = NULL;
|
||||
}
|
||||
|
||||
@@ -42,9 +42,6 @@
|
||||
#include "uvm_ats.h"
|
||||
#include "uvm_test.h"
|
||||
|
||||
|
||||
|
||||
|
||||
#include "uvm_linux.h"
|
||||
|
||||
#define UVM_PROC_GPUS_PEER_DIR_NAME "peers"
|
||||
@@ -95,12 +92,10 @@ static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
|
||||
return UVM_GPU_LINK_NVLINK_2;
|
||||
case UVM_LINK_TYPE_NVLINK_3:
|
||||
return UVM_GPU_LINK_NVLINK_3;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
case UVM_LINK_TYPE_NVLINK_4:
|
||||
return UVM_GPU_LINK_NVLINK_4;
|
||||
case UVM_LINK_TYPE_C2C:
|
||||
return UVM_GPU_LINK_C2C;
|
||||
default:
|
||||
return UVM_GPU_LINK_INVALID;
|
||||
}
|
||||
@@ -356,11 +351,7 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
|
||||
|
||||
static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
{
|
||||
|
||||
|
||||
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 5);
|
||||
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 7);
|
||||
|
||||
switch (link_type) {
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_INVALID);
|
||||
@@ -368,10 +359,8 @@ static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_1);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_2);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_3);
|
||||
|
||||
|
||||
|
||||
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_4);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_C2C);
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
}
|
||||
@@ -519,12 +508,6 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
|
||||
gpu_info_print_ce_caps(gpu, s);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -711,7 +694,7 @@ static NV_STATUS init_parent_procfs_dir(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
static void deinit_parent_procfs_dir(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(parent_gpu->procfs.dir);
|
||||
proc_remove(parent_gpu->procfs.dir);
|
||||
}
|
||||
|
||||
static NV_STATUS init_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
|
||||
@@ -739,8 +722,8 @@ static NV_STATUS init_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
static void deinit_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(parent_gpu->procfs.access_counters_file);
|
||||
uvm_procfs_destroy_entry(parent_gpu->procfs.fault_stats_file);
|
||||
proc_remove(parent_gpu->procfs.access_counters_file);
|
||||
proc_remove(parent_gpu->procfs.fault_stats_file);
|
||||
}
|
||||
|
||||
static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
@@ -791,9 +774,9 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
// The kernel waits on readers to finish before returning from those calls
|
||||
static void deinit_procfs_dirs(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(gpu->procfs.dir_peers);
|
||||
uvm_procfs_destroy_entry(gpu->procfs.dir_symlink);
|
||||
uvm_procfs_destroy_entry(gpu->procfs.dir);
|
||||
proc_remove(gpu->procfs.dir_peers);
|
||||
proc_remove(gpu->procfs.dir_symlink);
|
||||
proc_remove(gpu->procfs.dir);
|
||||
}
|
||||
|
||||
static NV_STATUS init_procfs_files(uvm_gpu_t *gpu)
|
||||
@@ -807,15 +790,15 @@ static NV_STATUS init_procfs_files(uvm_gpu_t *gpu)
|
||||
|
||||
static void deinit_procfs_files(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(gpu->procfs.info_file);
|
||||
proc_remove(gpu->procfs.info_file);
|
||||
}
|
||||
|
||||
static void deinit_procfs_peer_cap_files(uvm_gpu_peer_t *peer_caps)
|
||||
{
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_symlink_file[0]);
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_symlink_file[1]);
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_file[0]);
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_file[1]);
|
||||
proc_remove(peer_caps->procfs.peer_symlink_file[0]);
|
||||
proc_remove(peer_caps->procfs.peer_symlink_file[1]);
|
||||
proc_remove(peer_caps->procfs.peer_file[0]);
|
||||
proc_remove(peer_caps->procfs.peer_file[1]);
|
||||
}
|
||||
|
||||
static NV_STATUS init_semaphore_pool(uvm_gpu_t *gpu)
|
||||
@@ -1038,15 +1021,6 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
parent_gpu->pci_dev = gpu_platform_info->pci_dev;
|
||||
parent_gpu->closest_cpu_numa_node = dev_to_node(&parent_gpu->pci_dev->dev);
|
||||
parent_gpu->dma_addressable_start = gpu_platform_info->dma_addressable_start;
|
||||
@@ -1208,16 +1182,6 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
status = init_procfs_files(gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init procfs files: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
@@ -1393,10 +1357,6 @@ static void remove_gpus_from_gpu(uvm_gpu_t *gpu)
|
||||
// Sync all trackers in PMM
|
||||
uvm_pmm_gpu_sync(&gpu->pmm);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
// Remove all references to the given GPU from its parent, since it is being
|
||||
@@ -1530,13 +1490,6 @@ static void remove_gpu(uvm_gpu_t *gpu)
|
||||
if (free_parent)
|
||||
destroy_nvlink_peers(gpu);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// TODO: Bug 2844714: If the parent is not being freed, the following
|
||||
// gpu_table_lock is only needed to protect concurrent
|
||||
// find_first_valid_gpu() in BH from the __clear_bit here. After
|
||||
@@ -2212,16 +2165,12 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
|
||||
|
||||
|
||||
UVM_ASSERT(p2p_caps_params->p2pLink != UVM_LINK_TYPE_C2C);
|
||||
|
||||
// check for peer-to-peer compatibility (PCI-E or NvLink).
|
||||
peer_caps->link_type = get_gpu_link_type(p2p_caps_params->p2pLink);
|
||||
if (peer_caps->link_type == UVM_GPU_LINK_INVALID
|
||||
|
||||
|
||||
|
||||
|| peer_caps->link_type == UVM_GPU_LINK_C2C
|
||||
)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
@@ -3131,41 +3080,41 @@ void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_add
|
||||
atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
|
||||
UVM_ASSERT(PAGE_ALIGNED(size));
|
||||
|
||||
dma_addr = dma_map_page(&gpu->parent->pci_dev->dev, page, 0, size, DMA_BIDIRECTIONAL);
|
||||
if (dma_mapping_error(&gpu->parent->pci_dev->dev, dma_addr))
|
||||
dma_addr = dma_map_page(&parent_gpu->pci_dev->dev, page, 0, size, DMA_BIDIRECTIONAL);
|
||||
if (dma_mapping_error(&parent_gpu->pci_dev->dev, dma_addr))
|
||||
return NV_ERR_OPERATING_SYSTEM;
|
||||
|
||||
if (dma_addr < gpu->parent->dma_addressable_start ||
|
||||
dma_addr + size - 1 > gpu->parent->dma_addressable_limit) {
|
||||
dma_unmap_page(&gpu->parent->pci_dev->dev, dma_addr, size, DMA_BIDIRECTIONAL);
|
||||
if (dma_addr < parent_gpu->dma_addressable_start ||
|
||||
dma_addr + size - 1 > parent_gpu->dma_addressable_limit) {
|
||||
dma_unmap_page(&parent_gpu->pci_dev->dev, dma_addr, size, DMA_BIDIRECTIONAL);
|
||||
UVM_ERR_PRINT_RL("PCI mapped range [0x%llx, 0x%llx) not in the addressable range [0x%llx, 0x%llx), GPU %s\n",
|
||||
dma_addr,
|
||||
dma_addr + (NvU64)size,
|
||||
gpu->parent->dma_addressable_start,
|
||||
gpu->parent->dma_addressable_limit + 1,
|
||||
uvm_gpu_name(gpu));
|
||||
parent_gpu->dma_addressable_start,
|
||||
parent_gpu->dma_addressable_limit + 1,
|
||||
parent_gpu->name);
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
atomic64_add(size, &gpu->parent->mapped_cpu_pages_size);
|
||||
*dma_address_out = dma_addr_to_gpu_addr(gpu->parent, dma_addr);
|
||||
atomic64_add(size, &parent_gpu->mapped_cpu_pages_size);
|
||||
*dma_address_out = dma_addr_to_gpu_addr(parent_gpu, dma_addr);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_gpu_t *gpu, NvU64 dma_address, size_t size)
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size)
|
||||
{
|
||||
UVM_ASSERT(PAGE_ALIGNED(size));
|
||||
|
||||
dma_address = gpu_addr_to_dma_addr(gpu->parent, dma_address);
|
||||
dma_unmap_page(&gpu->parent->pci_dev->dev, dma_address, size, DMA_BIDIRECTIONAL);
|
||||
atomic64_sub(size, &gpu->parent->mapped_cpu_pages_size);
|
||||
dma_address = gpu_addr_to_dma_addr(parent_gpu, dma_address);
|
||||
dma_unmap_page(&parent_gpu->pci_dev->dev, dma_address, size, DMA_BIDIRECTIONAL);
|
||||
atomic64_sub(size, &parent_gpu->mapped_cpu_pages_size);
|
||||
}
|
||||
|
||||
// This function implements the UvmRegisterGpu API call, as described in uvm.h.
|
||||
|
||||
@@ -44,11 +44,9 @@
|
||||
#include "uvm_va_block_types.h"
|
||||
#include "uvm_perf_module.h"
|
||||
#include "uvm_rb_tree.h"
|
||||
#include "uvm_perf_prefetch.h"
|
||||
#include "nv-kthread-q.h"
|
||||
|
||||
|
||||
|
||||
|
||||
// Buffer length to store uvm gpu id, RM device name and gpu uuid.
|
||||
#define UVM_GPU_NICE_NAME_BUFFER_LENGTH (sizeof("ID 999: : ") + \
|
||||
UVM_GPU_NAME_LENGTH + UVM_GPU_UUID_TEXT_BUFFER_LENGTH)
|
||||
@@ -162,6 +160,12 @@ struct uvm_service_block_context_struct
|
||||
|
||||
// State used by the VA block routines called by the servicing routine
|
||||
uvm_va_block_context_t block_context;
|
||||
|
||||
// Prefetch state hint
|
||||
uvm_perf_prefetch_hint_t prefetch_hint;
|
||||
|
||||
// Prefetch temporary state.
|
||||
uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
|
||||
};
|
||||
|
||||
struct uvm_fault_service_batch_context_struct
|
||||
@@ -377,6 +381,16 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
// determine at fetch time that all the access counter notifications in the
|
||||
// batch report the same instance_ptr
|
||||
bool is_single_instance_ptr;
|
||||
|
||||
// Scratch space, used to generate artificial physically addressed notifications.
|
||||
// Virtual address notifications are always aligned to 64k. This means up to 16
|
||||
// different physical locations could have been accessed to trigger one notification.
|
||||
// The sub-granularity mask can correspond to any of them.
|
||||
struct {
|
||||
uvm_processor_id_t resident_processors[16];
|
||||
uvm_gpu_phys_address_t phys_addresses[16];
|
||||
uvm_access_counter_buffer_entry_t phys_entry;
|
||||
} scratch;
|
||||
} virt;
|
||||
|
||||
struct
|
||||
@@ -508,10 +522,8 @@ typedef enum
|
||||
UVM_GPU_LINK_NVLINK_1,
|
||||
UVM_GPU_LINK_NVLINK_2,
|
||||
UVM_GPU_LINK_NVLINK_3,
|
||||
|
||||
|
||||
|
||||
|
||||
UVM_GPU_LINK_NVLINK_4,
|
||||
UVM_GPU_LINK_C2C,
|
||||
UVM_GPU_LINK_MAX
|
||||
} uvm_gpu_link_type_t;
|
||||
|
||||
@@ -684,10 +696,6 @@ struct uvm_gpu_struct
|
||||
// mappings (instead of kernel), and it is used in most configurations.
|
||||
uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// ECC handling
|
||||
// In order to trap ECC errors as soon as possible the driver has the hw
|
||||
// interrupt register mapped directly. If an ECC interrupt is ever noticed
|
||||
@@ -742,6 +750,9 @@ struct uvm_gpu_struct
|
||||
|
||||
// Placeholder for per-GPU performance heuristics information
|
||||
uvm_perf_module_data_desc_t perf_modules_data[UVM_PERF_MODULE_TYPE_COUNT];
|
||||
|
||||
// Force pushbuffer's GPU VA to be >= 1TB; used only for testing purposes.
|
||||
bool uvm_test_force_upper_pushbuffer_segment;
|
||||
};
|
||||
|
||||
struct uvm_parent_gpu_struct
|
||||
@@ -823,9 +834,6 @@ struct uvm_parent_gpu_struct
|
||||
uvm_fault_buffer_hal_t *fault_buffer_hal;
|
||||
uvm_access_counter_buffer_hal_t *access_counter_buffer_hal;
|
||||
|
||||
|
||||
|
||||
|
||||
uvm_gpu_peer_copy_mode_t peer_copy_mode;
|
||||
|
||||
// Virtualization mode of the GPU.
|
||||
@@ -1318,19 +1326,19 @@ NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
|
||||
//
|
||||
// Returns the physical address of the pages that can be used to access them on
|
||||
// the GPU.
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
|
||||
// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_gpu_t *gpu, NvU64 dma_address, size_t size);
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
|
||||
|
||||
static NV_STATUS uvm_gpu_map_cpu_page(uvm_gpu_t *gpu, struct page *page, NvU64 *dma_address_out)
|
||||
static NV_STATUS uvm_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
|
||||
{
|
||||
return uvm_gpu_map_cpu_pages(gpu, page, PAGE_SIZE, dma_address_out);
|
||||
return uvm_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
|
||||
}
|
||||
|
||||
static void uvm_gpu_unmap_cpu_page(uvm_gpu_t *gpu, NvU64 dma_address)
|
||||
static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
|
||||
{
|
||||
uvm_gpu_unmap_cpu_pages(gpu, dma_address, PAGE_SIZE);
|
||||
uvm_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
|
||||
}
|
||||
|
||||
// Allocate and map a page of system DMA memory on the GPU for physical access
|
||||
@@ -1360,14 +1368,13 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
// addresses.
|
||||
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
|
||||
|
||||
static bool uvm_gpu_has_pushbuffer_segments(uvm_gpu_t *gpu)
|
||||
{
|
||||
return gpu->parent->max_host_va > (1ull << 40);
|
||||
}
|
||||
|
||||
static bool uvm_gpu_supports_eviction(uvm_gpu_t *gpu)
|
||||
{
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Eviction is supported only if the GPU supports replayable faults
|
||||
return gpu->parent->replayable_faults_supported;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2021 NVIDIA Corporation
|
||||
Copyright (c) 2017-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -41,6 +41,10 @@
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX ((1 << 16) - 1)
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT 256
|
||||
|
||||
#define UVM_ACCESS_COUNTER_ACTION_NOTIFY 0x1
|
||||
#define UVM_ACCESS_COUNTER_ACTION_CLEAR 0x2
|
||||
#define UVM_ACCESS_COUNTER_ON_MANAGED 0x4
|
||||
|
||||
// Each page in a tracked physical range may belong to a different VA Block. We
|
||||
// preallocate an array of reverse map translations. However, access counter
|
||||
// granularity can be set to up to 16G, which would require an array too large
|
||||
@@ -934,25 +938,6 @@ static void preprocess_virt_notifications(uvm_gpu_t *gpu,
|
||||
translate_virt_notifications_instance_ptrs(gpu, batch_context);
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
// TODO: Bug 1990466: Service virtual notifications. Entries with NULL
|
||||
// va_space are simply dropped.
|
||||
if (uvm_enable_builtin_tests) {
|
||||
NvU32 i;
|
||||
|
||||
preprocess_virt_notifications(gpu, batch_context);
|
||||
|
||||
for (i = 0; i < batch_context->virt.num_notifications; ++i) {
|
||||
const bool on_managed = false;
|
||||
uvm_tools_broadcast_access_counter(gpu, batch_context->virt.notifications[i], on_managed);
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// GPA notifications provide a physical address and an aperture. Sort
|
||||
// accesses by aperture to try to coalesce operations on the same target
|
||||
// processor.
|
||||
@@ -1046,9 +1031,19 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
uvm_page_mask_set(&service_context->thrashing_pin_mask, page_index);
|
||||
}
|
||||
|
||||
// If the underlying VMA is gone, skip HMM migrations.
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
status = uvm_hmm_find_vma(&service_context->block_context, address);
|
||||
if (status == NV_ERR_INVALID_ADDRESS)
|
||||
continue;
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
service_context->block_context.policy = uvm_va_policy_get(va_block, address);
|
||||
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&service_context->block_context,
|
||||
page_index,
|
||||
processor,
|
||||
uvm_fault_access_type_mask_bit(UVM_FAULT_ACCESS_TYPE_PREFETCH),
|
||||
@@ -1158,7 +1153,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
bool *clear_counter)
|
||||
unsigned *out_flags)
|
||||
{
|
||||
size_t index;
|
||||
uvm_va_block_t *va_block = reverse_mappings[0].va_block;
|
||||
@@ -1168,7 +1163,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
const uvm_processor_id_t processor = current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC?
|
||||
gpu->id: UVM_ID_CPU;
|
||||
|
||||
*clear_counter = false;
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
UVM_ASSERT(num_reverse_mappings > 0);
|
||||
|
||||
@@ -1217,7 +1212,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
if (status == NV_OK)
|
||||
*clear_counter = true;
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
}
|
||||
|
||||
done:
|
||||
@@ -1238,25 +1233,26 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
bool *clear_counter)
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t index;
|
||||
|
||||
*clear_counter = false;
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
for (index = 0; index < num_reverse_mappings; ++index) {
|
||||
bool clear_counter_local = false;
|
||||
unsigned out_flags_local = 0;
|
||||
status = service_phys_single_va_block(gpu,
|
||||
batch_context,
|
||||
current_entry,
|
||||
reverse_mappings + index,
|
||||
1,
|
||||
&clear_counter_local);
|
||||
&out_flags_local);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
*clear_counter = *clear_counter || clear_counter_local;
|
||||
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
|
||||
*out_flags |= out_flags_local;
|
||||
}
|
||||
|
||||
// In the case of failure, drop the refcounts for the remaining reverse mappings
|
||||
@@ -1267,18 +1263,13 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
// Iterate over all regions set in the given sub_granularity mask
|
||||
#define for_each_sub_granularity_region(region_start, region_end, sub_granularity, config) \
|
||||
for ((region_start) = find_first_bit(&(sub_granularity), (config)->sub_granularity_regions_per_translation), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), \
|
||||
(config)->sub_granularity_regions_per_translation, \
|
||||
(region_start) + 1); \
|
||||
(region_start) < config->sub_granularity_regions_per_translation; \
|
||||
(region_start) = find_next_bit(&(sub_granularity), \
|
||||
(config)->sub_granularity_regions_per_translation, \
|
||||
(region_end) + 1), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), \
|
||||
(config)->sub_granularity_regions_per_translation, \
|
||||
(region_start) + 1))
|
||||
#define for_each_sub_granularity_region(region_start, region_end, sub_granularity, num_regions) \
|
||||
for ((region_start) = find_first_bit(&(sub_granularity), (num_regions)), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1); \
|
||||
(region_start) < (num_regions); \
|
||||
(region_start) = find_next_bit(&(sub_granularity), (num_regions), (region_end) + 1), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1))
|
||||
|
||||
|
||||
static bool are_reverse_mappings_on_single_block(const uvm_reverse_map_t *reverse_mappings, size_t num_reverse_mappings)
|
||||
{
|
||||
@@ -1309,7 +1300,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
NvU64 address,
|
||||
unsigned long sub_granularity,
|
||||
size_t *num_reverse_mappings,
|
||||
bool *clear_counter)
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 region_start, region_end;
|
||||
@@ -1318,7 +1309,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
|
||||
// Get the reverse_map translations for all the regions set in the
|
||||
// sub_granularity field of the counter.
|
||||
for_each_sub_granularity_region(region_start, region_end, sub_granularity, config) {
|
||||
for_each_sub_granularity_region(region_start, region_end, sub_granularity, config->sub_granularity_regions_per_translation) {
|
||||
NvU64 local_address = address + region_start * config->sub_granularity_region_size;
|
||||
NvU32 local_translation_size = (region_end - region_start) * config->sub_granularity_region_size;
|
||||
uvm_reverse_map_t *local_reverse_mappings = batch_context->phys.translations + *num_reverse_mappings;
|
||||
@@ -1350,7 +1341,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
current_entry,
|
||||
batch_context->phys.translations,
|
||||
*num_reverse_mappings,
|
||||
clear_counter);
|
||||
out_flags);
|
||||
}
|
||||
else {
|
||||
status = service_phys_va_blocks(gpu,
|
||||
@@ -1358,7 +1349,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
current_entry,
|
||||
batch_context->phys.translations,
|
||||
*num_reverse_mappings,
|
||||
clear_counter);
|
||||
out_flags);
|
||||
}
|
||||
|
||||
return status;
|
||||
@@ -1366,7 +1357,8 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
|
||||
static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry)
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NvU64 address;
|
||||
NvU64 translation_index;
|
||||
@@ -1377,7 +1369,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
size_t total_reverse_mappings = 0;
|
||||
uvm_gpu_t *resident_gpu = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
bool clear_counter = false;
|
||||
unsigned flags = 0;
|
||||
|
||||
address = current_entry->address.address;
|
||||
UVM_ASSERT(address % config->translation_size == 0);
|
||||
@@ -1405,7 +1397,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
|
||||
for (translation_index = 0; translation_index < config->translations_per_counter; ++translation_index) {
|
||||
size_t num_reverse_mappings;
|
||||
bool clear_counter_local = false;
|
||||
unsigned out_flags_local = 0;
|
||||
status = service_phys_notification_translation(gpu,
|
||||
resident_gpu,
|
||||
batch_context,
|
||||
@@ -1414,9 +1406,11 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
address,
|
||||
sub_granularity,
|
||||
&num_reverse_mappings,
|
||||
&clear_counter_local);
|
||||
&out_flags_local);
|
||||
total_reverse_mappings += num_reverse_mappings;
|
||||
clear_counter = clear_counter || clear_counter_local;
|
||||
|
||||
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
|
||||
flags |= out_flags_local;
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
@@ -1425,17 +1419,14 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
sub_granularity = sub_granularity >> config->sub_granularity_regions_per_translation;
|
||||
}
|
||||
|
||||
// TODO: Bug 1990466: Here we already have virtual addresses and
|
||||
// address spaces. Merge virtual and physical notification handling
|
||||
|
||||
// Currently we only report events for our tests, not for tools
|
||||
if (uvm_enable_builtin_tests) {
|
||||
const bool on_managed = total_reverse_mappings != 0;
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, on_managed);
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_NOTIFY;
|
||||
*out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_ON_MANAGED : 0);
|
||||
}
|
||||
|
||||
if (status == NV_OK && clear_counter)
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -1450,11 +1441,18 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
|
||||
for (i = 0; i < batch_context->phys.num_notifications; ++i) {
|
||||
NV_STATUS status;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->phys.notifications[i];
|
||||
unsigned flags = 0;
|
||||
|
||||
if (!UVM_ID_IS_VALID(current_entry->physical_info.resident_id))
|
||||
continue;
|
||||
|
||||
status = service_phys_notification(gpu, batch_context, current_entry);
|
||||
status = service_phys_notification(gpu, batch_context, current_entry, &flags);
|
||||
if (flags & UVM_ACCESS_COUNTER_ACTION_NOTIFY)
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
|
||||
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
@@ -1462,6 +1460,191 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static int cmp_sort_gpu_phys_addr(const void *_a, const void *_b)
|
||||
{
|
||||
return uvm_gpu_phys_addr_cmp(*(uvm_gpu_phys_address_t*)_a,
|
||||
*(uvm_gpu_phys_address_t*)_b);
|
||||
}
|
||||
|
||||
static bool gpu_phys_same_region(uvm_gpu_phys_address_t a, uvm_gpu_phys_address_t b, NvU64 granularity)
|
||||
{
|
||||
if (a.aperture != b.aperture)
|
||||
return false;
|
||||
|
||||
UVM_ASSERT(is_power_of_2(granularity));
|
||||
|
||||
return UVM_ALIGN_DOWN(a.address, granularity) == UVM_ALIGN_DOWN(b.address, granularity);
|
||||
}
|
||||
|
||||
static bool phys_address_in_accessed_sub_region(uvm_gpu_phys_address_t address,
|
||||
NvU64 region_size,
|
||||
NvU64 sub_region_size,
|
||||
NvU32 accessed_mask)
|
||||
{
|
||||
const unsigned accessed_index = (address.address % region_size) / sub_region_size;
|
||||
|
||||
// accessed_mask is only filled for tracking granularities larger than 64K
|
||||
if (region_size == UVM_PAGE_SIZE_64K)
|
||||
return true;
|
||||
|
||||
UVM_ASSERT(accessed_index < 32);
|
||||
return ((1 << accessed_index) & accessed_mask) != 0;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notification(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU64 notification_size;
|
||||
NvU64 address;
|
||||
uvm_processor_id_t *resident_processors = batch_context->virt.scratch.resident_processors;
|
||||
uvm_gpu_phys_address_t *phys_addresses = batch_context->virt.scratch.phys_addresses;
|
||||
int num_addresses = 0;
|
||||
int i;
|
||||
|
||||
// Virtual address notifications are always 64K aligned
|
||||
NvU64 region_start = current_entry->address.address;
|
||||
NvU64 region_end = current_entry->address.address + UVM_PAGE_SIZE_64K;
|
||||
|
||||
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
uvm_access_counter_type_t counter_type = current_entry->counter_type;
|
||||
|
||||
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters, counter_type);
|
||||
|
||||
uvm_va_space_t *va_space = current_entry->virtual_info.va_space;
|
||||
|
||||
UVM_ASSERT(counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC);
|
||||
|
||||
// Entries with NULL va_space are simply dropped.
|
||||
if (!va_space)
|
||||
return NV_OK;
|
||||
|
||||
status = config_granularity_to_bytes(config->rm.granularity, ¬ification_size);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// Collect physical locations that could have been touched
|
||||
// in the reported 64K VA region. The notification mask can
|
||||
// correspond to any of them.
|
||||
uvm_va_space_down_read(va_space);
|
||||
for (address = region_start; address < region_end;) {
|
||||
uvm_va_block_t *va_block;
|
||||
|
||||
NV_STATUS local_status = uvm_va_block_find(va_space, address, &va_block);
|
||||
if (local_status == NV_ERR_INVALID_ADDRESS || local_status == NV_ERR_OBJECT_NOT_FOUND) {
|
||||
address += PAGE_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
while (address < va_block->end && address < region_end) {
|
||||
const unsigned page_index = uvm_va_block_cpu_page_index(va_block, address);
|
||||
|
||||
// UVM va_block always maps the closest resident location to processor
|
||||
const uvm_processor_id_t res_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
|
||||
|
||||
// Add physical location if it's valid and not local vidmem
|
||||
if (UVM_ID_IS_VALID(res_id) && !uvm_id_equal(res_id, gpu->id)) {
|
||||
uvm_gpu_phys_address_t phys_address = uvm_va_block_res_phys_page_address(va_block, page_index, res_id, gpu);
|
||||
if (phys_address_in_accessed_sub_region(phys_address,
|
||||
notification_size,
|
||||
config->sub_granularity_region_size,
|
||||
current_entry->sub_granularity)) {
|
||||
resident_processors[num_addresses] = res_id;
|
||||
phys_addresses[num_addresses] = phys_address;
|
||||
++num_addresses;
|
||||
}
|
||||
else {
|
||||
UVM_DBG_PRINT_RL("Skipping phys address %llx:%s, because it couldn't have been accessed in mask %x",
|
||||
phys_address.address,
|
||||
uvm_aperture_string(phys_address.aperture),
|
||||
current_entry->sub_granularity);
|
||||
}
|
||||
}
|
||||
|
||||
address += PAGE_SIZE;
|
||||
}
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
}
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
// The addresses need to be sorted to aid coalescing.
|
||||
sort(phys_addresses,
|
||||
num_addresses,
|
||||
sizeof(*phys_addresses),
|
||||
cmp_sort_gpu_phys_addr,
|
||||
NULL);
|
||||
|
||||
for (i = 0; i < num_addresses; ++i) {
|
||||
uvm_access_counter_buffer_entry_t *fake_entry = &batch_context->virt.scratch.phys_entry;
|
||||
|
||||
// Skip the current pointer if the physical region was already handled
|
||||
if (i > 0 && gpu_phys_same_region(phys_addresses[i - 1], phys_addresses[i], notification_size)) {
|
||||
UVM_ASSERT(uvm_id_equal(resident_processors[i - 1], resident_processors[i]));
|
||||
continue;
|
||||
}
|
||||
UVM_DBG_PRINT_RL("Faking MIMC address[%i/%i]: %llx (granularity mask: %llx) in aperture %s on device %s\n",
|
||||
i,
|
||||
num_addresses,
|
||||
phys_addresses[i].address,
|
||||
notification_size - 1,
|
||||
uvm_aperture_string(phys_addresses[i].aperture),
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
// Construct a fake phys addr AC entry
|
||||
fake_entry->counter_type = current_entry->counter_type;
|
||||
fake_entry->address.address = UVM_ALIGN_DOWN(phys_addresses[i].address, notification_size);
|
||||
fake_entry->address.aperture = phys_addresses[i].aperture;
|
||||
fake_entry->address.is_virtual = false;
|
||||
fake_entry->physical_info.resident_id = resident_processors[i];
|
||||
fake_entry->counter_value = current_entry->counter_value;
|
||||
fake_entry->sub_granularity = current_entry->sub_granularity;
|
||||
|
||||
status = service_phys_notification(gpu, batch_context, fake_entry, out_flags);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
NvU32 i;
|
||||
NV_STATUS status = NV_OK;
|
||||
preprocess_virt_notifications(gpu, batch_context);
|
||||
|
||||
for (i = 0; i < batch_context->virt.num_notifications; ++i) {
|
||||
unsigned flags = 0;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
|
||||
|
||||
status = service_virt_notification(gpu, batch_context, current_entry, &flags);
|
||||
|
||||
UVM_DBG_PRINT_RL("Processed virt access counter (%d/%d): %sMANAGED (status: %d) clear: %s\n",
|
||||
i + 1,
|
||||
batch_context->virt.num_notifications,
|
||||
(flags & UVM_ACCESS_COUNTER_ON_MANAGED) ? "" : "NOT ",
|
||||
status,
|
||||
(flags & UVM_ACCESS_COUNTER_ACTION_CLEAR) ? "YES" : "NO");
|
||||
|
||||
if (uvm_enable_builtin_tests)
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
|
||||
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
void uvm_gpu_service_access_counters(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@@ -1535,7 +1718,7 @@ bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space)
|
||||
return atomic_read(&va_space_access_counters->params.enable_mimc_migrations);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_perf_access_counters_init()
|
||||
NV_STATUS uvm_perf_access_counters_init(void)
|
||||
{
|
||||
uvm_perf_module_init("perf_access_counters",
|
||||
UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS,
|
||||
@@ -1546,7 +1729,7 @@ NV_STATUS uvm_perf_access_counters_init()
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_perf_access_counters_exit()
|
||||
void uvm_perf_access_counters_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2021 NVIDIA Corporation
|
||||
Copyright (c) 2017-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -338,7 +338,6 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_processor_id_t new_residency;
|
||||
bool read_duplicate;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_va_range_t *va_range = va_block->va_range;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
|
||||
UVM_ASSERT(!fault_entry->is_fatal);
|
||||
@@ -365,8 +364,11 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
// Check logical permissions
|
||||
status = uvm_va_range_check_logical_permissions(va_range,
|
||||
status = uvm_va_block_check_logical_permissions(va_block,
|
||||
&service_context->block_context,
|
||||
gpu->id,
|
||||
uvm_va_block_cpu_page_index(va_block,
|
||||
fault_entry->fault_address),
|
||||
fault_entry->fault_access_type,
|
||||
uvm_range_group_address_migratable(va_space,
|
||||
fault_entry->fault_address));
|
||||
@@ -386,6 +388,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Compute new residency and update the masks
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&service_context->block_context,
|
||||
page_index,
|
||||
gpu->id,
|
||||
fault_entry->access_type_mask,
|
||||
@@ -422,7 +425,6 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||||
struct mm_struct *mm,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_fault_buffer_entry_t *fault_entry)
|
||||
{
|
||||
@@ -432,7 +434,6 @@ static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||||
|
||||
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
|
||||
service_context->num_retries = 0;
|
||||
service_context->block_context.mm = mm;
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
@@ -598,6 +599,7 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
// to remain valid until we release. If no mm is registered, we
|
||||
// can only service managed faults, not ATS/HMM faults.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
va_block_context->mm = mm;
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
@@ -622,12 +624,11 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
|
||||
if (!fault_entry->is_fatal) {
|
||||
status = uvm_va_block_find_create(fault_entry->va_space,
|
||||
mm,
|
||||
fault_entry->fault_address,
|
||||
va_block_context,
|
||||
&va_block);
|
||||
if (status == NV_OK)
|
||||
status = service_managed_fault_in_block(gpu_va_space->gpu, mm, va_block, fault_entry);
|
||||
status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry);
|
||||
else
|
||||
status = service_non_managed_fault(gpu_va_space, mm, fault_entry, status);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -1055,13 +1055,17 @@ static NV_STATUS preprocess_fault_batch(uvm_gpu_t *gpu, uvm_fault_service_batch_
|
||||
// - service_access_type: highest access type that can be serviced.
|
||||
static uvm_fault_access_type_t check_fault_access_permissions(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_fault_buffer_entry_t *fault_entry,
|
||||
bool allow_migration)
|
||||
{
|
||||
NV_STATUS perm_status;
|
||||
|
||||
perm_status = uvm_va_range_check_logical_permissions(va_block->va_range,
|
||||
perm_status = uvm_va_block_check_logical_permissions(va_block,
|
||||
va_block_context,
|
||||
gpu->id,
|
||||
uvm_va_block_cpu_page_index(va_block,
|
||||
fault_entry->fault_address),
|
||||
fault_entry->fault_access_type,
|
||||
allow_migration);
|
||||
if (perm_status == NV_OK)
|
||||
@@ -1083,8 +1087,11 @@ static uvm_fault_access_type_t check_fault_access_permissions(uvm_gpu_t *gpu,
|
||||
// service them before we can cancel the write/atomic faults. So we
|
||||
// retry with read fault access type.
|
||||
if (uvm_fault_access_type_mask_test(fault_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
|
||||
perm_status = uvm_va_range_check_logical_permissions(va_block->va_range,
|
||||
perm_status = uvm_va_block_check_logical_permissions(va_block,
|
||||
va_block_context,
|
||||
gpu->id,
|
||||
uvm_va_block_cpu_page_index(va_block,
|
||||
fault_entry->fault_address),
|
||||
UVM_FAULT_ACCESS_TYPE_READ,
|
||||
allow_migration);
|
||||
if (perm_status == NV_OK)
|
||||
@@ -1156,14 +1163,16 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
UVM_ASSERT(ordered_fault_cache[first_fault_index]->fault_address >= va_block->start);
|
||||
UVM_ASSERT(ordered_fault_cache[first_fault_index]->fault_address <= va_block->end);
|
||||
|
||||
end = va_block->end;
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_hmm_find_policy_end(va_block,
|
||||
&block_context->block_context,
|
||||
ordered_fault_cache[first_fault_index]->fault_address,
|
||||
&end);
|
||||
else
|
||||
}
|
||||
else {
|
||||
block_context->block_context.policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
end = va_block->end;
|
||||
}
|
||||
|
||||
// Scan the sorted array and notify the fault event for all fault entries
|
||||
// in the block
|
||||
@@ -1226,7 +1235,11 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
UVM_ASSERT(iter.start <= current_entry->fault_address && iter.end >= current_entry->fault_address);
|
||||
|
||||
service_access_type = check_fault_access_permissions(gpu, va_block, current_entry, iter.migratable);
|
||||
service_access_type = check_fault_access_permissions(gpu,
|
||||
va_block,
|
||||
&block_context->block_context,
|
||||
current_entry,
|
||||
iter.migratable);
|
||||
|
||||
// Do not exit early due to logical errors such as access permission
|
||||
// violation.
|
||||
@@ -1269,6 +1282,7 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Compute new residency and update the masks
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&block_context->block_context,
|
||||
page_index,
|
||||
gpu->id,
|
||||
service_access_type_mask,
|
||||
@@ -1348,7 +1362,6 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
// See the comments for function service_fault_batch_block_locked for
|
||||
// implementation details and error codes.
|
||||
static NV_STATUS service_batch_managed_faults_in_block(uvm_gpu_t *gpu,
|
||||
struct mm_struct *mm,
|
||||
uvm_va_block_t *va_block,
|
||||
NvU32 first_fault_index,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
@@ -1361,7 +1374,6 @@ static NV_STATUS service_batch_managed_faults_in_block(uvm_gpu_t *gpu,
|
||||
|
||||
fault_block_context->operation = UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS;
|
||||
fault_block_context->num_retries = 0;
|
||||
fault_block_context->block_context.mm = mm;
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
@@ -1531,6 +1543,7 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
// to remain valid until we release. If no mm is registered, we
|
||||
// can only service managed faults, not ATS/HMM faults.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
va_block_context->mm = mm;
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
@@ -1576,13 +1589,11 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
// TODO: Bug 2103669: Service more than one ATS fault at a time so we
|
||||
// don't do an unconditional VA range lookup for every ATS fault.
|
||||
status = uvm_va_block_find_create(va_space,
|
||||
mm,
|
||||
current_entry->fault_address,
|
||||
va_block_context,
|
||||
&va_block);
|
||||
if (status == NV_OK) {
|
||||
status = service_batch_managed_faults_in_block(gpu_va_space->gpu,
|
||||
mm,
|
||||
va_block,
|
||||
i,
|
||||
batch_context,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -118,6 +118,13 @@ static bool is_canary(NvU32 val)
|
||||
return (val & ~UVM_SEMAPHORE_CANARY_MASK) == UVM_SEMAPHORE_CANARY_BASE;
|
||||
}
|
||||
|
||||
// Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore
|
||||
// pool?
|
||||
static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
|
||||
{
|
||||
return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
|
||||
}
|
||||
|
||||
static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@@ -134,10 +141,17 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
|
||||
pool_page->pool = pool;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_all(pool->gpu, UVM_RM_MEM_TYPE_SYS, UVM_SEMAPHORE_PAGE_SIZE, &pool_page->memory);
|
||||
status = uvm_rm_mem_alloc_and_map_all(pool->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
0,
|
||||
&pool_page->memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
// Verify the GPU can access the semaphore pool.
|
||||
UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
|
||||
|
||||
// All semaphores are initially free
|
||||
bitmap_fill(pool_page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
|
||||
@@ -321,7 +335,7 @@ NV_STATUS uvm_gpu_semaphore_pool_map_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu
|
||||
uvm_mutex_lock(&pool->mutex);
|
||||
|
||||
list_for_each_entry(page, &pool->pages, all_pages_node) {
|
||||
status = uvm_rm_mem_map_gpu(page->memory, gpu);
|
||||
status = uvm_rm_mem_map_gpu(page->memory, gpu, 0);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
}
|
||||
|
||||
@@ -41,10 +41,8 @@
|
||||
#include "clc6b5.h"
|
||||
#include "clc56f.h"
|
||||
#include "clc7b5.h"
|
||||
|
||||
|
||||
|
||||
|
||||
#include "clc86f.h"
|
||||
#include "clc8b5.h"
|
||||
|
||||
#define CE_OP_COUNT (sizeof(uvm_ce_hal_t) / sizeof(void *))
|
||||
#define HOST_OP_COUNT (sizeof(uvm_host_hal_t) / sizeof(void *))
|
||||
@@ -52,9 +50,6 @@
|
||||
#define FAULT_BUFFER_OP_COUNT (sizeof(uvm_fault_buffer_hal_t) / sizeof(void *))
|
||||
#define ACCESS_COUNTER_BUFFER_OP_COUNT (sizeof(uvm_access_counter_buffer_hal_t) / sizeof(void *))
|
||||
|
||||
|
||||
|
||||
|
||||
// Table for copy engine functions.
|
||||
// Each entry is associated with a copy engine class through the 'class' field.
|
||||
// By setting the 'parent_class' field, a class will inherit the parent class's
|
||||
@@ -133,22 +128,20 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.memset_validate = uvm_hal_ce_memset_validate_stub,
|
||||
},
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
.id = HOPPER_DMA_COPY_A,
|
||||
.parent_id = AMPERE_DMA_COPY_B,
|
||||
.u.ce_ops = {
|
||||
.semaphore_release = uvm_hal_hopper_ce_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_hopper_ce_semaphore_timestamp,
|
||||
.semaphore_reduction_inc = uvm_hal_hopper_ce_semaphore_reduction_inc,
|
||||
.offset_out = uvm_hal_hopper_ce_offset_out,
|
||||
.offset_in_out = uvm_hal_hopper_ce_offset_in_out,
|
||||
.memset_1 = uvm_hal_hopper_ce_memset_1,
|
||||
.memset_4 = uvm_hal_hopper_ce_memset_4,
|
||||
.memset_8 = uvm_hal_hopper_ce_memset_8,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
// Table for GPFIFO functions. Same idea as the copy engine table.
|
||||
@@ -171,6 +164,8 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.semaphore_release = uvm_hal_maxwell_host_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_maxwell_host_semaphore_timestamp,
|
||||
.set_gpfifo_entry = uvm_hal_maxwell_host_set_gpfifo_entry,
|
||||
.set_gpfifo_noop = uvm_hal_maxwell_host_set_gpfifo_noop,
|
||||
.set_gpfifo_pushbuffer_segment_base = uvm_hal_maxwell_host_set_gpfifo_pushbuffer_segment_base_unsupported,
|
||||
.write_gpu_put = uvm_hal_maxwell_host_write_gpu_put,
|
||||
.tlb_invalidate_all = uvm_hal_maxwell_host_tlb_invalidate_all_a16f,
|
||||
.tlb_invalidate_va = uvm_hal_maxwell_host_tlb_invalidate_va,
|
||||
@@ -249,23 +244,23 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.tlb_invalidate_test = uvm_hal_ampere_host_tlb_invalidate_test,
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
.id = HOPPER_CHANNEL_GPFIFO_A,
|
||||
.parent_id = AMPERE_CHANNEL_GPFIFO_A,
|
||||
.u.host_ops = {
|
||||
.method_validate = uvm_hal_method_validate_stub,
|
||||
.sw_method_validate = uvm_hal_method_validate_stub,
|
||||
.semaphore_acquire = uvm_hal_hopper_host_semaphore_acquire,
|
||||
.semaphore_release = uvm_hal_hopper_host_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_hopper_host_semaphore_timestamp,
|
||||
.tlb_invalidate_all = uvm_hal_hopper_host_tlb_invalidate_all,
|
||||
.tlb_invalidate_va = uvm_hal_hopper_host_tlb_invalidate_va,
|
||||
.tlb_invalidate_test = uvm_hal_hopper_host_tlb_invalidate_test,
|
||||
.cancel_faults_va = uvm_hal_hopper_cancel_faults_va,
|
||||
.set_gpfifo_entry = uvm_hal_hopper_host_set_gpfifo_entry,
|
||||
.set_gpfifo_pushbuffer_segment_base = uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base,
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t arch_table[] =
|
||||
@@ -326,43 +321,23 @@ static uvm_hal_class_ops_t arch_table[] =
|
||||
.mmu_client_id_to_utlb_id = uvm_hal_ampere_mmu_client_id_to_utlb_id,
|
||||
},
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
|
||||
.u.arch_ops = {
|
||||
.init_properties = uvm_hal_ada_arch_init_properties,
|
||||
},
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.u.arch_ops = {
|
||||
.init_properties = uvm_hal_hopper_arch_init_properties,
|
||||
.mmu_mode_hal = uvm_hal_mmu_mode_hopper,
|
||||
.mmu_engine_id_to_type = uvm_hal_hopper_mmu_engine_id_to_type,
|
||||
.mmu_client_id_to_utlb_id = uvm_hal_hopper_mmu_client_id_to_utlb_id,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
@@ -430,33 +405,18 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
|
||||
.u.fault_buffer_ops = {}
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
|
||||
.u.fault_buffer_ops = {}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.u.fault_buffer_ops = {
|
||||
.get_ve_id = uvm_hal_hopper_fault_buffer_get_ve_id,
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t access_counter_buffer_table[] =
|
||||
@@ -509,105 +469,18 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
|
||||
.u.access_counter_buffer_ops = {}
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
|
||||
.u.access_counter_buffer_ops = {}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.u.access_counter_buffer_ops = {}
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static inline uvm_hal_class_ops_t *ops_find_by_id(uvm_hal_class_ops_t *table, NvU32 row_count, NvU32 id)
|
||||
{
|
||||
NvLength i;
|
||||
@@ -711,17 +584,6 @@ NV_STATUS uvm_hal_init_table(void)
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -772,16 +634,6 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counter_buffer_hal = &class_ops->u.access_counter_buffer_ops;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -41,10 +41,6 @@ void uvm_hal_maxwell_ce_init(uvm_push_t *push);
|
||||
void uvm_hal_maxwell_host_init_noop(uvm_push_t *push);
|
||||
void uvm_hal_pascal_host_init(uvm_push_t *push);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Host method validation
|
||||
typedef bool (*uvm_hal_host_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
@@ -118,12 +114,10 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar);
|
||||
|
||||
// Issue a TLB invalidate applying to the specified VA range in a PDB.
|
||||
//
|
||||
@@ -183,15 +177,13 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar);
|
||||
|
||||
typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
@@ -205,11 +197,9 @@ void uvm_hal_pascal_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
||||
|
||||
// By default all semaphore release operations include a membar sys before the
|
||||
// operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
|
||||
@@ -217,18 +207,10 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
typedef void (*uvm_hal_semaphore_release_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
|
||||
|
||||
void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
// Release a semaphore including a timestamp at the specific GPU VA.
|
||||
//
|
||||
@@ -238,31 +220,28 @@ void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32
|
||||
typedef void (*uvm_hal_semaphore_timestamp_t)(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
|
||||
void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_volta_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
|
||||
typedef void (*uvm_hal_semaphore_acquire_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_turing_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
||||
void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
||||
void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
||||
void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
||||
|
||||
typedef void (*uvm_hal_host_set_gpfifo_noop_t)(NvU64 *fifo_entry);
|
||||
void uvm_hal_maxwell_host_set_gpfifo_noop(NvU64 *fifo_entry);
|
||||
|
||||
typedef void (*uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va);
|
||||
void uvm_hal_maxwell_host_set_gpfifo_pushbuffer_segment_base_unsupported(NvU64 *fifo_entry, NvU64 pushbuffer_va);
|
||||
void uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base(NvU64 *fifo_entry, NvU64 pushbuffer_va);
|
||||
|
||||
typedef void (*uvm_hal_host_write_gpu_put_t)(uvm_channel_t *channel, NvU32 gpu_put);
|
||||
void uvm_hal_maxwell_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put);
|
||||
@@ -277,16 +256,12 @@ NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu);
|
||||
typedef void (*uvm_hal_ce_offset_out_t)(uvm_push_t *push, NvU64 offset);
|
||||
void uvm_hal_maxwell_ce_offset_out(uvm_push_t *push, NvU64 offset);
|
||||
void uvm_hal_pascal_ce_offset_out(uvm_push_t *push, NvU64 offset);
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_ce_offset_out(uvm_push_t *push, NvU64 offset);
|
||||
|
||||
typedef void (*uvm_hal_ce_offset_in_out_t)(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
|
||||
void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
|
||||
void uvm_hal_pascal_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
|
||||
|
||||
typedef NvU32 (*uvm_hal_ce_phys_mode_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
NvU32 uvm_hal_maxwell_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
@@ -354,11 +329,9 @@ void uvm_hal_maxwell_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32
|
||||
void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
|
||||
void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
|
||||
void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
|
||||
void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
|
||||
|
||||
// Increments the semaphore by 1, or resets to 0 if the incremented value would
|
||||
// exceed the payload.
|
||||
@@ -369,9 +342,7 @@ void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value,
|
||||
typedef void (*uvm_hal_semaphore_reduction_inc_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
// Initialize GPU architecture dependent properties
|
||||
typedef void (*uvm_hal_arch_init_properties_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
@@ -380,12 +351,8 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Retrieve the page-tree HAL for a given big page size
|
||||
typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU32 big_page_size);
|
||||
@@ -396,9 +363,7 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size);
|
||||
|
||||
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size);
|
||||
void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
@@ -412,18 +377,14 @@ uvm_mmu_engine_type_t uvm_hal_maxwell_mmu_engine_id_to_type_unsupported(NvU16 mm
|
||||
uvm_mmu_engine_type_t uvm_hal_volta_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
||||
uvm_mmu_engine_type_t uvm_hal_turing_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
||||
uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
||||
|
||||
|
||||
|
||||
uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
||||
|
||||
typedef NvU16 (*uvm_hal_mmu_client_id_to_utlb_id_t)(NvU16 client_id);
|
||||
NvU16 uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported(NvU16 client_id);
|
||||
NvU16 uvm_hal_pascal_mmu_client_id_to_utlb_id(NvU16 client_id);
|
||||
NvU16 uvm_hal_volta_mmu_client_id_to_utlb_id(NvU16 client_id);
|
||||
NvU16 uvm_hal_ampere_mmu_client_id_to_utlb_id(NvU16 client_id);
|
||||
|
||||
|
||||
|
||||
NvU16 uvm_hal_hopper_mmu_client_id_to_utlb_id(NvU16 client_id);
|
||||
|
||||
// Replayable faults
|
||||
typedef void (*uvm_hal_enable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
@@ -477,9 +438,7 @@ void uvm_hal_volta_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_fault_buffer_entry_t *buffer_entry);
|
||||
void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_turing_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
|
||||
|
||||
|
||||
NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
|
||||
|
||||
bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
@@ -529,12 +488,10 @@ void uvm_hal_volta_cancel_faults_va(uvm_push_t *push,
|
||||
const uvm_fault_buffer_entry_t *fault_entry,
|
||||
uvm_fault_cancel_va_mode_t cancel_va_mode);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_cancel_faults_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
const uvm_fault_buffer_entry_t *fault_entry,
|
||||
uvm_fault_cancel_va_mode_t cancel_va_mode);
|
||||
|
||||
typedef void (*uvm_hal_host_clear_faulted_channel_method_t)(uvm_push_t *push,
|
||||
uvm_user_channel_t *user_channel,
|
||||
@@ -619,39 +576,6 @@ void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
|
||||
void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
struct uvm_host_hal_struct
|
||||
{
|
||||
uvm_hal_init_t init;
|
||||
@@ -666,6 +590,8 @@ struct uvm_host_hal_struct
|
||||
uvm_hal_semaphore_acquire_t semaphore_acquire;
|
||||
uvm_hal_semaphore_timestamp_t semaphore_timestamp;
|
||||
uvm_hal_host_set_gpfifo_entry_t set_gpfifo_entry;
|
||||
uvm_hal_host_set_gpfifo_noop_t set_gpfifo_noop;
|
||||
uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t set_gpfifo_pushbuffer_segment_base;
|
||||
uvm_hal_host_write_gpu_put_t write_gpu_put;
|
||||
uvm_hal_host_tlb_invalidate_all_t tlb_invalidate_all;
|
||||
uvm_hal_host_tlb_invalidate_va_t tlb_invalidate_va;
|
||||
@@ -742,17 +668,6 @@ struct uvm_access_counter_buffer_hal_struct
|
||||
uvm_hal_access_counter_buffer_entry_size_t entry_size;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// id is either a hardware class or GPU architecture
|
||||
@@ -775,10 +690,6 @@ typedef struct
|
||||
// access_counter_buffer_ops: id is an architecture
|
||||
uvm_access_counter_buffer_hal_t access_counter_buffer_ops;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} u;
|
||||
} uvm_hal_class_ops_t;
|
||||
|
||||
|
||||
@@ -31,7 +31,6 @@ MODULE_PARM_DESC(uvm_disable_hmm,
|
||||
"HMM is not supported in the driver, or if ATS settings "
|
||||
"conflict with HMM.");
|
||||
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
||||
#include <linux/hmm.h>
|
||||
@@ -47,6 +46,7 @@ MODULE_PARM_DESC(uvm_disable_hmm,
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_api.h"
|
||||
#include "uvm_va_policy.h"
|
||||
#include "uvm_tools.h"
|
||||
|
||||
bool uvm_hmm_is_enabled_system_wide(void)
|
||||
{
|
||||
@@ -97,6 +97,9 @@ NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space)
|
||||
if (!uvm_hmm_is_enabled_system_wide() || !mm)
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
if (va_space->initialization_flags & UVM_INIT_FLAGS_DISABLE_HMM)
|
||||
return NV_ERR_INVALID_STATE;
|
||||
|
||||
uvm_assert_mmap_lock_locked_write(mm);
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
|
||||
@@ -180,12 +183,19 @@ static bool hmm_invalidate(uvm_va_block_t *va_block,
|
||||
mmu_interval_set_seq(mni, cur_seq);
|
||||
|
||||
// Note: unmap_vmas() does MMU_NOTIFY_UNMAP [0, 0xffffffffffffffff]
|
||||
// Also note that hmm_invalidate() can be called when a new va_block is not
|
||||
// yet inserted into the va_space->hmm.blocks table while the original
|
||||
// va_block is being split. The original va_block may have its end address
|
||||
// updated before the mmu interval notifier is updated so this invalidate
|
||||
// may be for a range past the va_block end address.
|
||||
start = range->start;
|
||||
end = (range->end == ULONG_MAX) ? range->end : range->end - 1;
|
||||
if (start < va_block->start)
|
||||
start = va_block->start;
|
||||
if (end > va_block->end)
|
||||
end = va_block->end;
|
||||
if (start > end)
|
||||
goto unlock;
|
||||
|
||||
if (range->event == MMU_NOTIFY_UNMAP)
|
||||
uvm_va_policy_clear(va_block, start, end);
|
||||
@@ -267,6 +277,7 @@ static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
|
||||
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
|
||||
UVM_ASSERT(mm);
|
||||
UVM_ASSERT(!va_block_context || va_block_context->mm == mm);
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
UVM_ASSERT(PAGE_ALIGNED(addr));
|
||||
@@ -295,11 +306,13 @@ static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
// a maximum interval that doesn't overlap any existing UVM va_ranges.
|
||||
// We know that 'addr' is not within a va_range or
|
||||
// hmm_va_block_find_create() wouldn't be called.
|
||||
uvm_range_tree_adjust_interval(&va_space->va_range_tree, addr, &start, &end);
|
||||
status = uvm_range_tree_find_hole_in(&va_space->va_range_tree, addr, &start, &end);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
// Search for existing HMM va_blocks in the start/end interval and create
|
||||
// a maximum interval that doesn't overlap any existing HMM va_blocks.
|
||||
uvm_range_tree_adjust_interval(&va_space->hmm.blocks, addr, &start, &end);
|
||||
status = uvm_range_tree_find_hole_in(&va_space->hmm.blocks, addr, &start, &end);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
// Create a HMM va_block with a NULL va_range pointer.
|
||||
status = uvm_va_block_create(NULL, start, end, &va_block);
|
||||
@@ -322,10 +335,7 @@ static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
}
|
||||
|
||||
status = uvm_range_tree_add(&va_space->hmm.blocks, &va_block->hmm.node);
|
||||
if (status != NV_OK) {
|
||||
UVM_ASSERT(status != NV_ERR_UVM_ADDRESS_IN_USE);
|
||||
goto err_unreg;
|
||||
}
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
done:
|
||||
uvm_mutex_unlock(&va_space->hmm.blocks_lock);
|
||||
@@ -334,9 +344,6 @@ done:
|
||||
*va_block_ptr = va_block;
|
||||
return NV_OK;
|
||||
|
||||
err_unreg:
|
||||
mmu_interval_notifier_remove(&va_block->hmm.notifier);
|
||||
|
||||
err_release:
|
||||
uvm_va_block_release(va_block);
|
||||
|
||||
@@ -353,10 +360,67 @@ NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
return hmm_va_block_find_create(va_space, addr, false, va_block_context, va_block_ptr);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
|
||||
{
|
||||
struct mm_struct *mm = va_block_context->mm;
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
if (!mm)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
|
||||
vma = find_vma(mm, addr);
|
||||
if (!uvm_hmm_vma_is_valid(vma, addr, false))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
va_block_context->hmm.vma = vma;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
bool uvm_hmm_va_block_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(va_block_context->mm == vma->vm_mm);
|
||||
uvm_assert_mmap_lock_locked(va_block_context->mm);
|
||||
UVM_ASSERT(vma->vm_start <= uvm_va_block_region_start(va_block, region));
|
||||
UVM_ASSERT(vma->vm_end > uvm_va_block_region_end(va_block, region));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr)
|
||||
{
|
||||
uvm_va_block_test_t *block_test;
|
||||
uvm_va_block_t *va_block;
|
||||
NV_STATUS status;
|
||||
|
||||
if (!uvm_hmm_is_enabled(va_space))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
status = hmm_va_block_find_create(va_space, addr, false, NULL, &va_block);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
block_test = uvm_va_block_get_test(va_block);
|
||||
if (block_test)
|
||||
block_test->inject_split_error = true;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
struct mmu_interval_notifier notifier;
|
||||
uvm_va_block_t *existing_block;
|
||||
uvm_va_block_t *new_block;
|
||||
} hmm_split_invalidate_data_t;
|
||||
|
||||
static bool hmm_split_invalidate(struct mmu_interval_notifier *mni,
|
||||
@@ -364,14 +428,9 @@ static bool hmm_split_invalidate(struct mmu_interval_notifier *mni,
|
||||
unsigned long cur_seq)
|
||||
{
|
||||
hmm_split_invalidate_data_t *split_data = container_of(mni, hmm_split_invalidate_data_t, notifier);
|
||||
uvm_va_block_t *existing_block = split_data->existing_block;
|
||||
uvm_va_block_t *new_block = split_data->new_block;
|
||||
|
||||
if (uvm_ranges_overlap(existing_block->start, existing_block->end, range->start, range->end - 1))
|
||||
hmm_invalidate(existing_block, range, cur_seq);
|
||||
|
||||
if (uvm_ranges_overlap(new_block->start, new_block->end, range->start, range->end - 1))
|
||||
hmm_invalidate(new_block, range, cur_seq);
|
||||
uvm_tools_test_hmm_split_invalidate(split_data->existing_block->hmm.va_space);
|
||||
hmm_invalidate(split_data->existing_block, range, cur_seq);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -405,6 +464,7 @@ static NV_STATUS hmm_split_block(uvm_va_block_t *va_block,
|
||||
uvm_va_space_t *va_space = va_block->hmm.va_space;
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
hmm_split_invalidate_data_t split_data;
|
||||
NvU64 delay_us;
|
||||
uvm_va_block_t *new_va_block;
|
||||
NV_STATUS status;
|
||||
int ret;
|
||||
@@ -420,22 +480,23 @@ static NV_STATUS hmm_split_block(uvm_va_block_t *va_block,
|
||||
return status;
|
||||
|
||||
// Initialize the newly created HMM va_block.
|
||||
new_va_block->hmm.node.start = new_va_block->start;
|
||||
new_va_block->hmm.node.end = new_va_block->end;
|
||||
new_va_block->hmm.va_space = va_space;
|
||||
uvm_range_tree_init(&new_va_block->hmm.va_policy_tree);
|
||||
|
||||
// The MMU interval notifier has to be removed in order to resize it.
|
||||
// That means there would be a window of time where invalidation callbacks
|
||||
// could be missed. To handle this case, we register a temporary notifier
|
||||
// to cover the same address range while resizing the old notifier (it is
|
||||
// OK to have multiple notifiers for the same range, we may simply try to
|
||||
// invalidate twice).
|
||||
split_data.existing_block = va_block;
|
||||
split_data.new_block = new_va_block;
|
||||
ret = mmu_interval_notifier_insert(&split_data.notifier,
|
||||
ret = mmu_interval_notifier_insert(&new_va_block->hmm.notifier,
|
||||
mm,
|
||||
va_block->start,
|
||||
new_va_block->end - va_block->start + 1,
|
||||
&hmm_notifier_split_ops);
|
||||
new_va_block->start,
|
||||
uvm_va_block_size(new_va_block),
|
||||
&uvm_hmm_notifier_ops);
|
||||
|
||||
// Since __mmu_notifier_register() was called when the va_space was
|
||||
// initially created, we know that mm->notifier_subscriptions is valid
|
||||
// and mmu_interval_notifier_insert() can't return ENOMEM.
|
||||
// The only error return is for start + length overflowing but we already
|
||||
// registered the same address range before so there should be no error.
|
||||
UVM_ASSERT(!ret);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
@@ -445,40 +506,38 @@ static NV_STATUS hmm_split_block(uvm_va_block_t *va_block,
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
// Since __mmu_notifier_register() was called when the va_space was
|
||||
// initially created, we know that mm->notifier_subscriptions is valid
|
||||
// and mmu_interval_notifier_insert() can't return ENOMEM.
|
||||
// The only error return is for start + length overflowing but we already
|
||||
// registered the same address range before so there should be no error.
|
||||
// The MMU interval notifier has to be removed in order to resize it.
|
||||
// That means there would be a window of time when invalidation callbacks
|
||||
// could be missed. To handle this case, we register a temporary notifier
|
||||
// to cover the address range while resizing the old notifier (it is
|
||||
// OK to have multiple notifiers for the same range, we may simply try to
|
||||
// invalidate twice).
|
||||
split_data.existing_block = va_block;
|
||||
ret = mmu_interval_notifier_insert(&split_data.notifier,
|
||||
mm,
|
||||
va_block->start,
|
||||
new_end - va_block->start + 1,
|
||||
&hmm_notifier_split_ops);
|
||||
UVM_ASSERT(!ret);
|
||||
|
||||
mmu_interval_notifier_remove(&va_block->hmm.notifier);
|
||||
// Delay to allow hmm_sanity test to trigger an mmu_notifier during the
|
||||
// critical window where the split invalidate callback is active.
|
||||
delay_us = atomic64_read(&va_space->test.split_invalidate_delay_us);
|
||||
if (delay_us)
|
||||
udelay(delay_us);
|
||||
|
||||
uvm_range_tree_shrink_node(&va_space->hmm.blocks, &va_block->hmm.node, va_block->start, va_block->end);
|
||||
mmu_interval_notifier_remove(&va_block->hmm.notifier);
|
||||
|
||||
// Enable notifications on the old block with the smaller size.
|
||||
ret = mmu_interval_notifier_insert(&va_block->hmm.notifier,
|
||||
mm,
|
||||
va_block->start,
|
||||
va_block->end - va_block->start + 1,
|
||||
&uvm_hmm_notifier_ops);
|
||||
UVM_ASSERT(!ret);
|
||||
|
||||
new_va_block->hmm.node.start = new_va_block->start;
|
||||
new_va_block->hmm.node.end = new_va_block->end;
|
||||
|
||||
ret = mmu_interval_notifier_insert(&new_va_block->hmm.notifier,
|
||||
mm,
|
||||
new_va_block->start,
|
||||
new_va_block->end - new_va_block->start + 1,
|
||||
uvm_va_block_size(va_block),
|
||||
&uvm_hmm_notifier_ops);
|
||||
UVM_ASSERT(!ret);
|
||||
|
||||
mmu_interval_notifier_remove(&split_data.notifier);
|
||||
|
||||
status = uvm_range_tree_add(&va_space->hmm.blocks, &new_va_block->hmm.node);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
if (new_block_ptr)
|
||||
*new_block_ptr = new_va_block;
|
||||
|
||||
@@ -486,7 +545,7 @@ static NV_STATUS hmm_split_block(uvm_va_block_t *va_block,
|
||||
|
||||
err:
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
mmu_interval_notifier_remove(&split_data.notifier);
|
||||
mmu_interval_notifier_remove(&new_va_block->hmm.notifier);
|
||||
uvm_va_block_release(new_va_block);
|
||||
return status;
|
||||
}
|
||||
@@ -537,9 +596,9 @@ static NV_STATUS split_block_if_needed(uvm_va_block_t *va_block,
|
||||
// page tables. However, it doesn't destroy the va_block because that would
|
||||
// require calling mmu_interval_notifier_remove() which can't be called from
|
||||
// the invalidate callback due to Linux locking constraints. If a process
|
||||
// calls mmap()/munmap() for SAM and then creates a UVM managed allocation,
|
||||
// calls mmap()/munmap() for SAM and then creates a managed allocation,
|
||||
// the same VMA range can be picked and there would be a UVM/HMM va_block
|
||||
// conflict. Creating a UVM managed allocation (or other va_range) calls this
|
||||
// conflict. Creating a managed allocation (or other va_range) calls this
|
||||
// function to remove stale HMM va_blocks or split the HMM va_block so there
|
||||
// is no overlap.
|
||||
NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
|
||||
@@ -586,6 +645,18 @@ NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_hmm_va_block_split_tree(uvm_va_block_t *existing_va_block, uvm_va_block_t *new_block)
|
||||
{
|
||||
uvm_va_space_t *va_space = existing_va_block->hmm.va_space;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(existing_va_block));
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
|
||||
uvm_range_tree_split(&existing_va_block->hmm.va_space->hmm.blocks,
|
||||
&existing_va_block->hmm.node,
|
||||
&new_block->hmm.node);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_split_as_needed(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_policy_is_split_needed_t split_needed_cb,
|
||||
@@ -734,7 +805,7 @@ void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
|
||||
{
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
uvm_va_policy_node_t *node;
|
||||
NvU64 end = *endp;
|
||||
NvU64 end = va_block->end;
|
||||
|
||||
uvm_assert_mmap_lock_locked(vma->vm_mm);
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
@@ -748,8 +819,9 @@ void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
|
||||
if (end > node->node.end)
|
||||
end = node->node.end;
|
||||
}
|
||||
else
|
||||
else {
|
||||
va_block_context->policy = &uvm_va_policy_default;
|
||||
}
|
||||
|
||||
*endp = end;
|
||||
}
|
||||
@@ -761,7 +833,7 @@ NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long addr;
|
||||
NvU64 end = va_block->end;
|
||||
NvU64 end;
|
||||
uvm_page_index_t outer;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
|
||||
@@ -785,6 +857,241 @@ NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS hmm_clear_thrashing_policy(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *block_context)
|
||||
{
|
||||
uvm_va_policy_t *policy;
|
||||
uvm_va_policy_node_t *node;
|
||||
uvm_va_block_region_t region;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
uvm_for_each_va_policy_in(policy, va_block, va_block->start, va_block->end, node, region) {
|
||||
block_context->policy = policy;
|
||||
|
||||
// Unmap may split PTEs and require a retry. Needs to be called
|
||||
// before the pinned pages information is destroyed.
|
||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
|
||||
NULL,
|
||||
uvm_perf_thrashing_unmap_remote_pinned_pages_all(va_block,
|
||||
block_context,
|
||||
region));
|
||||
|
||||
uvm_perf_thrashing_info_destroy(va_block);
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);
|
||||
uvm_range_tree_node_t *node, *next;
|
||||
uvm_va_block_t *va_block;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (!uvm_hmm_is_enabled(va_space))
|
||||
return NV_OK;
|
||||
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
|
||||
uvm_range_tree_for_each_safe(node, next, &va_space->hmm.blocks) {
|
||||
va_block = hmm_va_block_from_node(node);
|
||||
|
||||
status = hmm_clear_thrashing_policy(va_block, block_context);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 address)
|
||||
{
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
uvm_va_policy_t *policy = va_block_context->policy;
|
||||
NvU64 start, end;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
|
||||
|
||||
// We need to limit the prefetch region to the VMA.
|
||||
start = max(va_block->start, (NvU64)vma->vm_start);
|
||||
end = min(va_block->end, (NvU64)vma->vm_end - 1);
|
||||
|
||||
// Also, we need to limit the prefetch region to the policy range.
|
||||
if (policy == &uvm_va_policy_default) {
|
||||
NV_STATUS status = uvm_range_tree_find_hole_in(&va_block->hmm.va_policy_tree,
|
||||
address,
|
||||
&start,
|
||||
&end);
|
||||
// We already know the hole exists and covers the fault region.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
else {
|
||||
uvm_va_policy_node_t *node = uvm_va_policy_node_from_policy(policy);
|
||||
|
||||
start = max(start, node->node.start);
|
||||
end = min(end, node->node.end);
|
||||
}
|
||||
|
||||
return uvm_va_block_region_from_start_end(va_block, start, end);
|
||||
}
|
||||
|
||||
uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 addr)
|
||||
{
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
|
||||
uvm_assert_mmap_lock_locked(va_block_context->mm);
|
||||
UVM_ASSERT(vma && addr >= vma->vm_start && addr < vma->vm_end);
|
||||
|
||||
if (!(vma->vm_flags & VM_READ))
|
||||
return UVM_PROT_NONE;
|
||||
else if (!(vma->vm_flags & VM_WRITE))
|
||||
return UVM_PROT_READ_ONLY;
|
||||
else
|
||||
return UVM_PROT_READ_WRITE_ATOMIC;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
|
||||
atomic64_set(&va_space->test.split_invalidate_delay_us, params->delay_us);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_hmm_init(UVM_TEST_HMM_INIT_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
struct mm_struct *mm;
|
||||
NV_STATUS status;
|
||||
|
||||
mm = uvm_va_space_mm_or_current_retain(va_space);
|
||||
if (!mm)
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
uvm_down_write_mmap_lock(mm);
|
||||
uvm_va_space_down_write(va_space);
|
||||
if (va_space->hmm.disable)
|
||||
status = uvm_hmm_va_space_initialize_test(va_space);
|
||||
else
|
||||
status = NV_OK;
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_up_write_mmap_lock(mm);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
UVM_TEST_VA_RANGE_INFO_PARAMS *params)
|
||||
{
|
||||
uvm_range_tree_node_t *tree_node;
|
||||
uvm_va_policy_node_t *node;
|
||||
struct vm_area_struct *vma;
|
||||
uvm_va_block_t *va_block;
|
||||
|
||||
if (!mm || !uvm_hmm_is_enabled(va_space))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
params->type = UVM_TEST_VA_RANGE_TYPE_MANAGED;
|
||||
params->managed.subtype = UVM_TEST_RANGE_SUBTYPE_HMM;
|
||||
params->va_range_start = 0;
|
||||
params->va_range_end = ULONG_MAX;
|
||||
params->read_duplication = UVM_TEST_READ_DUPLICATION_UNSET;
|
||||
memset(¶ms->preferred_location, 0, sizeof(params->preferred_location));
|
||||
params->accessed_by_count = 0;
|
||||
params->managed.vma_start = 0;
|
||||
params->managed.vma_end = 0;
|
||||
params->managed.is_zombie = NV_FALSE;
|
||||
params->managed.owned_by_calling_process = (mm == current->mm ? NV_TRUE : NV_FALSE);
|
||||
|
||||
vma = find_vma(mm, params->lookup_address);
|
||||
if (!uvm_hmm_vma_is_valid(vma, params->lookup_address, false))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
params->va_range_start = vma->vm_start;
|
||||
params->va_range_end = vma->vm_end - 1;
|
||||
params->managed.vma_start = vma->vm_start;
|
||||
params->managed.vma_end = vma->vm_end - 1;
|
||||
|
||||
uvm_mutex_lock(&va_space->hmm.blocks_lock);
|
||||
tree_node = uvm_range_tree_find(&va_space->hmm.blocks, params->lookup_address);
|
||||
if (!tree_node) {
|
||||
UVM_ASSERT(uvm_range_tree_find_hole_in(&va_space->hmm.blocks, params->lookup_address,
|
||||
¶ms->va_range_start, ¶ms->va_range_end) == NV_OK);
|
||||
uvm_mutex_unlock(&va_space->hmm.blocks_lock);
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&va_space->hmm.blocks_lock);
|
||||
va_block = hmm_va_block_from_node(tree_node);
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
params->va_range_start = va_block->start;
|
||||
params->va_range_end = va_block->end;
|
||||
|
||||
node = uvm_va_policy_node_find(va_block, params->lookup_address);
|
||||
if (node) {
|
||||
uvm_processor_id_t processor_id;
|
||||
|
||||
if (params->va_range_start < node->node.start)
|
||||
params->va_range_start = node->node.start;
|
||||
if (params->va_range_end > node->node.end)
|
||||
params->va_range_end = node->node.end;
|
||||
|
||||
params->read_duplication = node->policy.read_duplication;
|
||||
|
||||
if (!UVM_ID_IS_INVALID(node->policy.preferred_location))
|
||||
uvm_va_space_processor_uuid(va_space, ¶ms->preferred_location, node->policy.preferred_location);
|
||||
|
||||
for_each_id_in_mask(processor_id, &node->policy.accessed_by)
|
||||
uvm_va_space_processor_uuid(va_space, ¶ms->accessed_by[params->accessed_by_count++], processor_id);
|
||||
}
|
||||
else {
|
||||
uvm_range_tree_find_hole_in(&va_block->hmm.va_policy_tree, params->lookup_address,
|
||||
¶ms->va_range_start, ¶ms->va_range_end);
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// TODO: Bug 3660968: Remove this hack as soon as HMM migration is implemented
|
||||
// for VMAs other than anonymous private memory.
|
||||
bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context)
|
||||
{
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
if (!uvm_va_block_is_hmm(va_block))
|
||||
return false;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(va_block_context->mm == vma->vm_mm);
|
||||
uvm_assert_mmap_lock_locked(va_block_context->mm);
|
||||
|
||||
return !vma_is_anonymous(vma);
|
||||
}
|
||||
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
|
||||
|
||||
|
||||
@@ -65,6 +65,8 @@ typedef struct
|
||||
// Initialize HMM for the given the va_space for testing.
|
||||
// Bug 1750144: UVM: Add HMM (Heterogeneous Memory Management) support to
|
||||
// the UVM driver. Remove this when enough HMM functionality is implemented.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
|
||||
// and the va_space lock must be held in write mode.
|
||||
NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space);
|
||||
|
||||
// Destroy any HMM state for the given the va_space.
|
||||
@@ -87,6 +89,10 @@ typedef struct
|
||||
//
|
||||
// Return NV_ERR_INVALID_ADDRESS if there is no VMA associated with the
|
||||
// address 'addr' or the VMA does not have at least PROT_READ permission.
|
||||
// The caller is also responsible for checking that there is no UVM
|
||||
// va_range covering the given address before calling this function.
|
||||
// If va_block_context is not NULL, the VMA is cached in
|
||||
// va_block_context->hmm.vma.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read and the va_space lock at least for read.
|
||||
NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
@@ -94,23 +100,53 @@ typedef struct
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_t **va_block_ptr);
|
||||
|
||||
// Find the VMA for the given address and set va_block_context->hmm.vma.
|
||||
// Return NV_ERR_INVALID_ADDRESS if va_block_context->mm is NULL or there
|
||||
// is no VMA associated with the address 'addr' or the VMA does not have at
|
||||
// least PROT_READ permission.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read or mm equal to NULL.
|
||||
NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr);
|
||||
|
||||
// If va_block is a HMM va_block, check that va_block_context->hmm.vma is
|
||||
// not NULL and covers the given region. This always returns true and is
|
||||
// intended to only be used with UVM_ASSERT().
|
||||
// Locking: This function must be called with the va_block lock held and if
|
||||
// va_block is a HMM block, va_block_context->mm must be retained and
|
||||
// locked for at least read.
|
||||
bool uvm_hmm_va_block_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Find or create a HMM va_block and mark it so the next va_block split
|
||||
// will fail for testing purposes.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read and the va_space lock at least for read.
|
||||
NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr);
|
||||
|
||||
// Reclaim any HMM va_blocks that overlap the given range.
|
||||
// Note that 'end' is inclusive.
|
||||
// A HMM va_block can be reclaimed if it doesn't contain any "valid" VMAs.
|
||||
// See uvm_hmm_vma_is_valid() for details.
|
||||
// Note that 'end' is inclusive. If mm is NULL, any HMM va_block in the
|
||||
// range will be reclaimed which assumes that the mm is being torn down
|
||||
// and was not retained.
|
||||
// Return values:
|
||||
// NV_ERR_NO_MEMORY: Reclaim required a block split, which failed.
|
||||
// NV_OK: There were no HMM blocks in the range, or all HMM
|
||||
// blocks in the range were successfully reclaimed.
|
||||
// Locking: If mm is not NULL, it must equal va_space_mm.mm, the caller
|
||||
// must hold a reference on it, and it must be locked for at least read
|
||||
// mode. Also, the va_space lock must be held in write mode.
|
||||
// must retain it with uvm_va_space_mm_or_current_retain() or be sure that
|
||||
// mm->mm_users is not zero, and it must be locked for at least read mode.
|
||||
// Also, the va_space lock must be held in write mode.
|
||||
// TODO: Bug 3372166: add asynchronous va_block reclaim.
|
||||
NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 start,
|
||||
NvU64 end);
|
||||
|
||||
// This is called to update the va_space tree of HMM va_blocks after an
|
||||
// existing va_block is split.
|
||||
// Locking: the va_space lock must be held in write mode.
|
||||
void uvm_hmm_va_block_split_tree(uvm_va_block_t *existing_va_block, uvm_va_block_t *new_block);
|
||||
|
||||
// Find a HMM policy range that needs to be split. The callback function
|
||||
// 'split_needed_cb' returns true if the policy range needs to be split.
|
||||
// If a policy range is split, the existing range is truncated to
|
||||
@@ -148,7 +184,7 @@ typedef struct
|
||||
// Note that 'last_address' is inclusive.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
|
||||
// and the va_space lock must be held in write mode.
|
||||
// TODO: Bug 2046423: need to implement read duplication support in Linux.
|
||||
// TODO: Bug 3660922: need to implement HMM read duplication support.
|
||||
static NV_STATUS uvm_hmm_set_read_duplication(uvm_va_space_t *va_space,
|
||||
uvm_read_duplication_policy_t new_policy,
|
||||
NvU64 base,
|
||||
@@ -159,10 +195,11 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Set va_block_context->policy to the policy covering the given address
|
||||
// 'addr' and update the ending address '*endp' to the minimum of *endp,
|
||||
// va_block_context->hmm.vma->vm_end - 1, and the ending address of the
|
||||
// policy range.
|
||||
// This function assigns va_block_context->policy to the policy covering
|
||||
// the given address 'addr' and assigns the ending address '*endp' to the
|
||||
// minimum of va_block->end, va_block_context->hmm.vma->vm_end - 1, and the
|
||||
// ending address of the policy range. Note that va_block_context->hmm.vma
|
||||
// is expected to be initialized before calling this function.
|
||||
// Locking: This function must be called with
|
||||
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
|
||||
// the va_block lock held.
|
||||
@@ -171,11 +208,11 @@ typedef struct
|
||||
unsigned long addr,
|
||||
NvU64 *endp);
|
||||
|
||||
// Find the VMA for the page index 'page_index',
|
||||
// set va_block_context->policy to the policy covering the given address,
|
||||
// and update the ending page range '*outerp' to the minimum of *outerp,
|
||||
// va_block_context->hmm.vma->vm_end - 1, and the ending address of the
|
||||
// policy range.
|
||||
// This function finds the VMA for the page index 'page_index' and assigns
|
||||
// it to va_block_context->vma, sets va_block_context->policy to the policy
|
||||
// covering the given address, and sets the ending page range '*outerp'
|
||||
// to the minimum of *outerp, va_block_context->hmm.vma->vm_end - 1, the
|
||||
// ending address of the policy range, and va_block->end.
|
||||
// Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise, NV_OK.
|
||||
// Locking: This function must be called with
|
||||
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
|
||||
@@ -185,6 +222,52 @@ typedef struct
|
||||
uvm_page_index_t page_index,
|
||||
uvm_page_index_t *outerp);
|
||||
|
||||
// Clear thrashing policy information from all HMM va_blocks.
|
||||
// Locking: va_space lock must be held in write mode.
|
||||
NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space);
|
||||
|
||||
// Return the expanded region around 'address' limited to the intersection
|
||||
// of va_block start/end, vma start/end, and policy start/end.
|
||||
// va_block_context must not be NULL, va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()), and
|
||||
// va_block_context->policy must be valid.
|
||||
// Locking: the caller must hold mm->mmap_lock in at least read mode, the
|
||||
// va_space lock must be held in at least read mode, and the va_block lock
|
||||
// held.
|
||||
uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 address);
|
||||
|
||||
// Return the logical protection allowed of a HMM va_block for the page at
|
||||
// the given address.
|
||||
// va_block_context must not be NULL and va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()).
|
||||
// Locking: the caller must hold va_block_context->mm mmap_lock in at least
|
||||
// read mode.
|
||||
uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 addr);
|
||||
|
||||
NV_STATUS uvm_test_hmm_init(UVM_TEST_HMM_INIT_PARAMS *params, struct file *filp);
|
||||
|
||||
NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params,
|
||||
struct file *filp);
|
||||
|
||||
NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
UVM_TEST_VA_RANGE_INFO_PARAMS *params);
|
||||
|
||||
// Return true if GPU fault new residency location should be system memory.
|
||||
// va_block_context must not be NULL and va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()).
|
||||
// TODO: Bug 3660968: Remove this hack as soon as HMM migration is
|
||||
// implemented for VMAs other than anonymous memory.
|
||||
// Locking: the va_block lock must be held. If the va_block is a HMM
|
||||
// va_block, the va_block_context->mm must be retained and locked for least
|
||||
// read.
|
||||
bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context);
|
||||
|
||||
#else // UVM_IS_CONFIG_HMM()
|
||||
|
||||
static bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
|
||||
@@ -226,6 +309,23 @@ typedef struct
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static bool uvm_hmm_va_block_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 start,
|
||||
@@ -234,6 +334,10 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_hmm_va_block_split_tree(uvm_va_block_t *existing_va_block, uvm_va_block_t *new_block)
|
||||
{
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_split_as_needed(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_policy_is_split_needed_t split_needed_cb,
|
||||
@@ -282,6 +386,49 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 address)
|
||||
{
|
||||
return (uvm_va_block_region_t){};
|
||||
}
|
||||
|
||||
static uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 addr)
|
||||
{
|
||||
return UVM_PROT_NONE;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_test_hmm_init(UVM_TEST_HMM_INIT_PARAMS *params, struct file *filp)
|
||||
{
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params,
|
||||
struct file *filp)
|
||||
{
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
UVM_TEST_VA_RANGE_INFO_PARAMS *params)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
|
||||
#endif // _UVM_HMM_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2021-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -35,7 +35,7 @@ NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *f
|
||||
uvm_va_block_t *hmm_block = NULL;
|
||||
NV_STATUS status;
|
||||
|
||||
mm = uvm_va_space_mm_retain(va_space);
|
||||
mm = uvm_va_space_mm_or_current_retain(va_space);
|
||||
if (!mm)
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
@@ -61,7 +61,7 @@ NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *f
|
||||
status = uvm_hmm_va_block_find_create(va_space, 0UL, NULL, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
|
||||
|
||||
// Try to create an HMM va_block which overlaps a UVM managed block.
|
||||
// Try to create an HMM va_block which overlaps a managed block.
|
||||
// It should fail.
|
||||
status = uvm_hmm_va_block_find_create(va_space, params->uvm_address, NULL, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
|
||||
@@ -77,14 +77,14 @@ NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *f
|
||||
done:
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_up_read_mmap_lock(mm);
|
||||
uvm_va_space_mm_release(va_space);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
|
||||
return status;
|
||||
|
||||
out:
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_up_write_mmap_lock(mm);
|
||||
uvm_va_space_mm_release(va_space);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
96
kernel-open/nvidia-uvm/uvm_hopper.c
Normal file
96
kernel-open/nvidia-uvm/uvm_hopper.c
Normal file
@@ -0,0 +1,96 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_hopper_fault_buffer.h"
|
||||
|
||||
void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
parent_gpu->tlb_batch.va_invalidate_supported = true;
|
||||
|
||||
parent_gpu->tlb_batch.va_range_invalidate_supported = true;
|
||||
|
||||
// TODO: Bug 1767241: Run benchmarks to figure out a good number
|
||||
parent_gpu->tlb_batch.max_ranges = 8;
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_hopper_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
|
||||
(sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Hopper covers 64 PB and that's the minimum
|
||||
// size that can be used.
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 64ull * 1024 * 1024 * 1024 * 1024 * 1024;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
|
||||
// All GR context buffers may be mapped to 57b wide VAs. All "compute" units
|
||||
// accessing GR context buffers support the 57-bit VA range.
|
||||
parent_gpu->max_channel_va = 1ull << 57;
|
||||
|
||||
parent_gpu->max_host_va = 1ull << 57;
|
||||
|
||||
// Hopper can map sysmem with any page size
|
||||
parent_gpu->can_map_sysmem_with_large_pages = true;
|
||||
|
||||
// Prefetch instructions will generate faults
|
||||
parent_gpu->prefetch_fault_supported = true;
|
||||
|
||||
// Hopper can place GPFIFO in vidmem
|
||||
parent_gpu->gpfifo_in_vidmem_supported = true;
|
||||
|
||||
parent_gpu->replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
|
||||
parent_gpu->has_clear_faulted_channel_sw_method = true;
|
||||
|
||||
parent_gpu->has_clear_faulted_channel_method = false;
|
||||
|
||||
parent_gpu->smc.supported = true;
|
||||
|
||||
parent_gpu->sparse_mappings_supported = true;
|
||||
|
||||
parent_gpu->map_remap_larger_page_promotion = false;
|
||||
|
||||
parent_gpu->plc_supported = true;
|
||||
}
|
||||
|
||||
339
kernel-open/nvidia-uvm/uvm_hopper_ce.c
Normal file
339
kernel-open/nvidia-uvm/uvm_hopper_ce.c
Normal file
@@ -0,0 +1,339 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_push.h"
|
||||
#include "clc8b5.h"
|
||||
|
||||
static void hopper_membar_after_transfer(uvm_push_t *push)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
|
||||
return;
|
||||
|
||||
// TODO: [UVM-Volta] Remove Host WFI + Membar WAR for CE flush-only bug
|
||||
// http://nvbugs/1734761
|
||||
gpu->parent->host_hal->wait_for_idle(push);
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
else
|
||||
gpu->parent->host_hal->membar_sys(push);
|
||||
}
|
||||
|
||||
static NvU32 ce_aperture(uvm_aperture_t aperture)
|
||||
{
|
||||
BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB) !=
|
||||
HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
|
||||
BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM) !=
|
||||
HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
|
||||
BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) !=
|
||||
HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, PEERMEM));
|
||||
|
||||
if (aperture == UVM_APERTURE_SYS) {
|
||||
return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM);
|
||||
}
|
||||
else if (aperture == UVM_APERTURE_VID) {
|
||||
return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB);
|
||||
}
|
||||
else {
|
||||
return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) |
|
||||
HWVALUE(C8B5, SET_SRC_PHYS_MODE, FLA, 0) |
|
||||
HWVALUE(C8B5, SET_SRC_PHYS_MODE, PEER_ID, UVM_APERTURE_PEER_ID(aperture));
|
||||
}
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_ce_offset_out(uvm_push_t *push, NvU64 offset_out)
|
||||
{
|
||||
NV_PUSH_2U(C8B5, OFFSET_OUT_UPPER, HWVALUE(C8B5, OFFSET_OUT_UPPER, UPPER, NvOffset_HI32(offset_out)),
|
||||
OFFSET_OUT_LOWER, HWVALUE(C8B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out)
|
||||
{
|
||||
NV_PUSH_4U(C8B5, OFFSET_IN_UPPER, HWVALUE(C8B5, OFFSET_IN_UPPER, UPPER, NvOffset_HI32(offset_in)),
|
||||
OFFSET_IN_LOWER, HWVALUE(C8B5, OFFSET_IN_LOWER, VALUE, NvOffset_LO32(offset_in)),
|
||||
OFFSET_OUT_UPPER, HWVALUE(C8B5, OFFSET_OUT_UPPER, UPPER, NvOffset_HI32(offset_out)),
|
||||
OFFSET_OUT_LOWER, HWVALUE(C8B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
|
||||
}
|
||||
|
||||
// Perform an appropriate membar before a semaphore operation. Returns whether
|
||||
// the semaphore operation should include a flush.
|
||||
static bool hopper_membar_before_semaphore(uvm_push_t *push)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE)) {
|
||||
// No MEMBAR requested, don't use a flush.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU)) {
|
||||
// By default do a MEMBAR SYS and for that we can just use flush on the
|
||||
// semaphore operation.
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO: Bug 1734761: Remove the HOST WFI+membar WAR, i.e, perform the CE
|
||||
// flush when MEMBAR GPU is requested.
|
||||
gpu = uvm_push_get_gpu(push);
|
||||
gpu->parent->host_hal->wait_for_idle(push);
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
NvU32 flush_value;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = hopper_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
else
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
|
||||
|
||||
NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
|
||||
SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
|
||||
SET_SEMAPHORE_PAYLOAD, payload);
|
||||
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, flush_value |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE) |
|
||||
launch_dma_plc_mode);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
NvU32 flush_value;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = hopper_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
else
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
|
||||
|
||||
NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
|
||||
SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
|
||||
SET_SEMAPHORE_PAYLOAD, payload);
|
||||
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, flush_value |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_REDUCTION, INC) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_REDUCTION_SIGN, UNSIGNED) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_REDUCTION_ENABLE, TRUE) |
|
||||
launch_dma_plc_mode);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
NvU32 flush_value;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = hopper_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
else
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
|
||||
|
||||
NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
|
||||
SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
|
||||
SET_SEMAPHORE_PAYLOAD, 0xdeadbeef);
|
||||
|
||||
gpu = uvm_push_get_gpu(push);
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, flush_value |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_FOUR_WORD_SEMAPHORE) |
|
||||
launch_dma_plc_mode);
|
||||
}
|
||||
|
||||
static NvU32 hopper_memset_push_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst)
|
||||
{
|
||||
if (dst.is_virtual)
|
||||
return HWCONST(C8B5, LAUNCH_DMA, DST_TYPE, VIRTUAL);
|
||||
|
||||
NV_PUSH_1U(C8B5, SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
|
||||
return HWCONST(C8B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
|
||||
}
|
||||
|
||||
static bool hopper_scrub_enable(uvm_gpu_address_t dst, size_t size)
|
||||
{
|
||||
return !dst.is_virtual &&
|
||||
dst.aperture == UVM_APERTURE_VID &&
|
||||
IS_ALIGNED(dst.address, UVM_PAGE_SIZE_4K) &&
|
||||
IS_ALIGNED(size, UVM_PAGE_SIZE_4K);
|
||||
}
|
||||
|
||||
static void hopper_memset_common(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
size_t num_elements,
|
||||
size_t memset_element_size)
|
||||
{
|
||||
// If >4GB memsets ever become an important use case, this function should
|
||||
// use multi-line transfers so we don't have to iterate (bug 1766588).
|
||||
static const size_t max_single_memset = 0xFFFFFFFF;
|
||||
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
NvU32 pipelined_value;
|
||||
NvU32 launch_dma_dst_type;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
NvU32 launch_dma_remap_enable;
|
||||
NvU32 launch_dma_scrub_enable;
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_validate(push, dst, memset_element_size),
|
||||
"Memset validation failed in channel %s, GPU %s",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
launch_dma_dst_type = hopper_memset_push_phys_mode(push, dst);
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
|
||||
pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
|
||||
else
|
||||
pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
|
||||
|
||||
if (memset_element_size == 8 && hopper_scrub_enable(dst, num_elements * memset_element_size)) {
|
||||
launch_dma_remap_enable = HWCONST(C8B5, LAUNCH_DMA, REMAP_ENABLE, FALSE);
|
||||
launch_dma_scrub_enable = HWCONST(C8B5, LAUNCH_DMA, MEMORY_SCRUB_ENABLE, TRUE);
|
||||
|
||||
NV_PUSH_1U(C8B5, SET_MEMORY_SCRUB_PARAMETERS,
|
||||
HWCONST(C8B5, SET_MEMORY_SCRUB_PARAMETERS, DISCARDABLE, FALSE));
|
||||
|
||||
// Scrub requires disabling remap, and with remap disabled the element
|
||||
// size is 1.
|
||||
num_elements *= memset_element_size;
|
||||
memset_element_size = 1;
|
||||
}
|
||||
else {
|
||||
launch_dma_remap_enable = HWCONST(C8B5, LAUNCH_DMA, REMAP_ENABLE, TRUE);
|
||||
launch_dma_scrub_enable = HWCONST(C8B5, LAUNCH_DMA, MEMORY_SCRUB_ENABLE, FALSE);
|
||||
}
|
||||
|
||||
do {
|
||||
NvU32 memset_this_time = (NvU32)min(num_elements, max_single_memset);
|
||||
|
||||
gpu->parent->ce_hal->offset_out(push, dst.address);
|
||||
|
||||
NV_PUSH_1U(C8B5, LINE_LENGTH_IN, memset_this_time);
|
||||
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA,
|
||||
HWCONST(C8B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE) |
|
||||
launch_dma_remap_enable |
|
||||
launch_dma_scrub_enable |
|
||||
launch_dma_dst_type |
|
||||
launch_dma_plc_mode |
|
||||
pipelined_value);
|
||||
|
||||
dst.address += memset_this_time * memset_element_size;
|
||||
num_elements -= memset_this_time;
|
||||
pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
|
||||
} while (num_elements > 0);
|
||||
|
||||
hopper_membar_after_transfer(push);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size)
|
||||
{
|
||||
UVM_ASSERT_MSG(size % 8 == 0, "size: %zd\n", size);
|
||||
|
||||
size /= 8;
|
||||
|
||||
NV_PUSH_3U(C8B5, SET_REMAP_CONST_A, (NvU32)value,
|
||||
SET_REMAP_CONST_B, (NvU32)(value >> 32),
|
||||
SET_REMAP_COMPONENTS,
|
||||
HWCONST(C8B5, SET_REMAP_COMPONENTS, DST_X, CONST_A) |
|
||||
HWCONST(C8B5, SET_REMAP_COMPONENTS, DST_Y, CONST_B) |
|
||||
HWCONST(C8B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE, FOUR) |
|
||||
HWCONST(C8B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS, TWO));
|
||||
|
||||
hopper_memset_common(push, dst, size, 8);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size)
|
||||
{
|
||||
if (hopper_scrub_enable(dst, size)) {
|
||||
NvU64 value64 = value;
|
||||
|
||||
value64 |= value64 << 8;
|
||||
value64 |= value64 << 16;
|
||||
value64 |= value64 << 32;
|
||||
|
||||
uvm_hal_hopper_ce_memset_8(push, dst, value64, size);
|
||||
return;
|
||||
}
|
||||
|
||||
NV_PUSH_2U(C8B5, SET_REMAP_CONST_B, (NvU32)value,
|
||||
SET_REMAP_COMPONENTS,
|
||||
HWCONST(C8B5, SET_REMAP_COMPONENTS, DST_X, CONST_B) |
|
||||
HWCONST(C8B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE, ONE) |
|
||||
HWCONST(C8B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS, ONE));
|
||||
|
||||
hopper_memset_common(push, dst, size, 1);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size)
|
||||
{
|
||||
UVM_ASSERT_MSG(size % 4 == 0, "size: %zd\n", size);
|
||||
|
||||
if (hopper_scrub_enable(dst, size)) {
|
||||
NvU64 value64 = value;
|
||||
|
||||
value64 |= value64 << 32;
|
||||
|
||||
uvm_hal_hopper_ce_memset_8(push, dst, value64, size);
|
||||
return;
|
||||
}
|
||||
|
||||
size /= 4;
|
||||
|
||||
NV_PUSH_2U(C8B5, SET_REMAP_CONST_B, value,
|
||||
SET_REMAP_COMPONENTS,
|
||||
HWCONST(C8B5, SET_REMAP_COMPONENTS, DST_X, CONST_B) |
|
||||
HWCONST(C8B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE, FOUR) |
|
||||
HWCONST(C8B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS, ONE));
|
||||
|
||||
hopper_memset_common(push, dst, size, 4);
|
||||
}
|
||||
42
kernel-open/nvidia-uvm/uvm_hopper_fault_buffer.c
Normal file
42
kernel-open/nvidia-uvm/uvm_hopper_fault_buffer.c
Normal file
@@ -0,0 +1,42 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_hal_types.h"
|
||||
#include "hwref/hopper/gh100/dev_fault.h"
|
||||
|
||||
// TODO: Bug 1835884: [uvm] Query the maximum number of subcontexts from RM
|
||||
// ... to validate the ve_id
|
||||
#define MAX_SUBCONTEXTS 64
|
||||
NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type)
|
||||
{
|
||||
// Only graphics engines can generate MMU faults from different subcontexts
|
||||
if (mmu_engine_type == UVM_MMU_ENGINE_TYPE_GRAPHICS) {
|
||||
NvU16 ve_id = mmu_engine_id - NV_PFAULT_MMU_ENG_ID_GRAPHICS;
|
||||
UVM_ASSERT(ve_id < MAX_SUBCONTEXTS);
|
||||
|
||||
return (NvU8)ve_id;
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
90
kernel-open/nvidia-uvm/uvm_hopper_fault_buffer.h
Normal file
90
kernel-open/nvidia-uvm/uvm_hopper_fault_buffer.h
Normal file
@@ -0,0 +1,90 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef __UVM_HAL_HOPPER_FAULT_BUFFER_H__
|
||||
#define __UVM_HAL_HOPPER_FAULT_BUFFER_H__
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_gpu.h"
|
||||
|
||||
// There are up to 9 TPCs per GPC in Hopper, and there are 2 LTP uTLBs per TPC.
|
||||
// Besides, there is one active RGG uTLB per GPC. Each TPC has a number of
|
||||
// clients that can make requests to its uTLBs: 1xTPCCS, 1xPE, 2xT1. Requests
|
||||
// from these units are routed as follows to the 2 LTP uTLBs:
|
||||
//
|
||||
// -------- ---------
|
||||
// | T1_0 | -----------------> | uTLB0 |
|
||||
// -------- ---------
|
||||
//
|
||||
// -------- ---------
|
||||
// | T1_1 | -----------------> | uTLB1 |
|
||||
// -------- --------> ---------
|
||||
// | ^
|
||||
// ------- | |
|
||||
// | PE | ----------- |
|
||||
// ------- |
|
||||
// |
|
||||
// --------- |
|
||||
// | TPCCS | -----------------------
|
||||
// ---------
|
||||
//
|
||||
//
|
||||
// The client ids are local to their GPC and the id mapping is linear across
|
||||
// TPCs: TPC_n has TPCCS_n, PE_n, T1_p, and T1_q, where p=2*n and q=p+1.
|
||||
//
|
||||
// NV_PFAULT_CLIENT_GPC_LTP_UTLB_n and NV_PFAULT_CLIENT_GPC_RGG_UTLB enums can
|
||||
// be ignored. These will never be reported in a fault message, and should
|
||||
// never be used in an invalidate. Therefore, we define our own values.
|
||||
typedef enum {
|
||||
UVM_HOPPER_GPC_UTLB_ID_RGG = 0,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP0 = 1,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP1 = 2,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP2 = 3,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP3 = 4,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP4 = 5,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP5 = 6,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP6 = 7,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP7 = 8,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP8 = 9,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP9 = 10,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP10 = 11,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP11 = 12,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP12 = 13,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP13 = 14,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP14 = 15,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP15 = 16,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP16 = 17,
|
||||
UVM_HOPPER_GPC_UTLB_ID_LTP17 = 18,
|
||||
|
||||
UVM_HOPPER_GPC_UTLB_COUNT,
|
||||
} uvm_hopper_gpc_utlb_id_t;
|
||||
|
||||
static NvU32 uvm_hopper_get_utlbs_per_gpc(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NvU32 utlbs = parent_gpu->rm_info.maxTpcPerGpcCount * 2 + 1;
|
||||
UVM_ASSERT(utlbs <= UVM_HOPPER_GPC_UTLB_COUNT);
|
||||
return utlbs;
|
||||
}
|
||||
|
||||
#endif
|
||||
412
kernel-open/nvidia-uvm/uvm_hopper_host.c
Normal file
412
kernel-open/nvidia-uvm/uvm_hopper_host.c
Normal file
@@ -0,0 +1,412 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_push_macros.h"
|
||||
#include "clc86f.h"
|
||||
|
||||
// TODO: Bug 3210931: Rename HOST references and files to ESCHED.
|
||||
|
||||
void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
NvU32 sem_lo;
|
||||
UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(C86F, SEM_ADDR_LO, OFFSET)));
|
||||
sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), C86F, SEM_ADDR_LO, OFFSET);
|
||||
|
||||
uvm_hal_wfi_membar(push, uvm_push_get_and_reset_membar_flag(push));
|
||||
|
||||
NV_PUSH_5U(C86F, SEM_ADDR_LO, HWVALUE(C86F, SEM_ADDR_LO, OFFSET, sem_lo),
|
||||
SEM_ADDR_HI, HWVALUE(C86F, SEM_ADDR_HI, OFFSET, NvOffset_HI32(gpu_va)),
|
||||
SEM_PAYLOAD_LO, payload,
|
||||
SEM_PAYLOAD_HI, 0,
|
||||
SEM_EXECUTE, HWCONST(C86F, SEM_EXECUTE, OPERATION, RELEASE) |
|
||||
HWCONST(C86F, SEM_EXECUTE, PAYLOAD_SIZE, 32BIT) |
|
||||
HWCONST(C86F, SEM_EXECUTE, RELEASE_TIMESTAMP, DIS) |
|
||||
HWCONST(C86F, SEM_EXECUTE, RELEASE_WFI, DIS));
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
NvU32 sem_lo;
|
||||
UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(C86F, SEM_ADDR_LO, OFFSET)));
|
||||
sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), C86F, SEM_ADDR_LO, OFFSET);
|
||||
NV_PUSH_5U(C86F, SEM_ADDR_LO, HWVALUE(C86F, SEM_ADDR_LO, OFFSET, sem_lo),
|
||||
SEM_ADDR_HI, HWVALUE(C86F, SEM_ADDR_HI, OFFSET, NvOffset_HI32(gpu_va)),
|
||||
SEM_PAYLOAD_LO, payload,
|
||||
SEM_PAYLOAD_HI, 0,
|
||||
SEM_EXECUTE, HWCONST(C86F, SEM_EXECUTE, OPERATION, ACQ_CIRC_GEQ) |
|
||||
HWCONST(C86F, SEM_EXECUTE, PAYLOAD_SIZE, 32BIT) |
|
||||
HWCONST(C86F, SEM_EXECUTE, ACQUIRE_SWITCH_TSG, EN));
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
{
|
||||
NvU32 sem_lo;
|
||||
UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(C86F, SEM_ADDR_LO, OFFSET)));
|
||||
sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), C86F, SEM_ADDR_LO, OFFSET);
|
||||
|
||||
uvm_hal_wfi_membar(push, uvm_push_get_and_reset_membar_flag(push));
|
||||
|
||||
NV_PUSH_5U(C86F, SEM_ADDR_LO, HWVALUE(C86F, SEM_ADDR_LO, OFFSET, sem_lo),
|
||||
SEM_ADDR_HI, HWVALUE(C86F, SEM_ADDR_HI, OFFSET, NvOffset_HI32(gpu_va)),
|
||||
SEM_PAYLOAD_LO, 0xdeadbeef,
|
||||
SEM_PAYLOAD_HI, 0,
|
||||
SEM_EXECUTE, HWCONST(C86F, SEM_EXECUTE, OPERATION, RELEASE) |
|
||||
HWCONST(C86F, SEM_EXECUTE, PAYLOAD_SIZE, 32BIT) |
|
||||
HWCONST(C86F, SEM_EXECUTE, RELEASE_TIMESTAMP, EN) |
|
||||
HWCONST(C86F, SEM_EXECUTE, RELEASE_WFI, DIS));
|
||||
}
|
||||
|
||||
static NvU32 fault_cancel_va_mode_to_cancel_access_type(uvm_fault_cancel_va_mode_t cancel_va_mode)
|
||||
{
|
||||
// There are only two logical cases from the perspective of UVM. Accesses to
|
||||
// an invalid address, which will cancel all accesses on the page, and
|
||||
// accesses with an invalid type on a read-only page, which will cancel all
|
||||
// write/atomic accesses on the page.
|
||||
switch (cancel_va_mode)
|
||||
{
|
||||
case UVM_FAULT_CANCEL_VA_MODE_ALL:
|
||||
return HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACCESS_TYPE, VIRT_ALL);
|
||||
case UVM_FAULT_CANCEL_VA_MODE_WRITE_AND_ATOMIC:
|
||||
return HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACCESS_TYPE, VIRT_WRITE_AND_ATOMIC);
|
||||
default:
|
||||
UVM_ASSERT_MSG(false, "Invalid cancel_va_mode %d\n", cancel_va_mode);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_cancel_faults_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
const uvm_fault_buffer_entry_t *fault_entry,
|
||||
uvm_fault_cancel_va_mode_t cancel_va_mode)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 addr_lo;
|
||||
NvU32 addr_hi;
|
||||
NvU32 access_type_value;
|
||||
NvU64 addr = fault_entry->fault_address;
|
||||
NvU32 mmu_engine_id = fault_entry->fault_source.mmu_engine_id;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
|
||||
else
|
||||
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx not aligned to 4KB\n", pdb.address);
|
||||
pdb.address >>= 12;
|
||||
|
||||
pdb_lo = pdb.address & HWMASK(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
access_type_value = fault_cancel_va_mode_to_cancel_access_type(cancel_va_mode);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(addr, 1 << 12), "addr 0x%llx not aligned to 4KB\n", addr);
|
||||
addr >>= 12;
|
||||
|
||||
addr_lo = addr & HWMASK(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
addr_hi = addr >> HWSIZE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, addr_lo) |
|
||||
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID, mmu_engine_id),
|
||||
MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, addr_hi),
|
||||
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
|
||||
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_REPLAY, CANCEL_VA_GLOBAL) |
|
||||
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, NONE) |
|
||||
access_type_value |
|
||||
aperture_value,
|
||||
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
NvU32 page_table_level;
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
|
||||
else
|
||||
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
|
||||
pdb.address >>= 12;
|
||||
|
||||
pdb_lo = pdb.address & HWMASK(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
// PDE4 is the highest level on Hopper, see the comment in uvm_hopper_mmu.c
|
||||
// for details.
|
||||
UVM_ASSERT_MSG(depth < NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4, "depth %u", depth);
|
||||
page_table_level = NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 - depth;
|
||||
|
||||
if (membar != UVM_MEMBAR_NONE) {
|
||||
// If a GPU or SYS membar is needed, ACK_TYPE needs to be set to
|
||||
// GLOBALLY to make sure all the pending accesses can be picked up by
|
||||
// the membar.
|
||||
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
|
||||
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
|
||||
aperture_value |
|
||||
ack_value,
|
||||
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
NvU32 page_table_level;
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 va_lo;
|
||||
NvU32 va_hi;
|
||||
NvU64 end;
|
||||
NvU64 actual_base;
|
||||
NvU64 actual_size;
|
||||
NvU64 actual_end;
|
||||
NvU32 log2_invalidation_size;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
|
||||
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
|
||||
|
||||
// The invalidation size must be a power-of-two number of pages containing
|
||||
// the passed interval
|
||||
end = base + size - 1;
|
||||
log2_invalidation_size = __fls((unsigned long)(end ^ base)) + 1;
|
||||
|
||||
if (log2_invalidation_size == 64) {
|
||||
// Invalidate everything
|
||||
gpu->parent->host_hal->tlb_invalidate_all(push, pdb, depth, membar);
|
||||
return;
|
||||
}
|
||||
|
||||
// The hardware aligns the target address down to the invalidation size.
|
||||
actual_size = 1ULL << log2_invalidation_size;
|
||||
actual_base = UVM_ALIGN_DOWN(base, actual_size);
|
||||
actual_end = actual_base + actual_size - 1;
|
||||
UVM_ASSERT(actual_end >= end);
|
||||
|
||||
// The invalidation size field expects log2(invalidation size in 4K), not
|
||||
// log2(invalidation size in bytes)
|
||||
log2_invalidation_size -= 12;
|
||||
|
||||
// Address to invalidate, as a multiple of 4K.
|
||||
base >>= 12;
|
||||
va_lo = base & HWMASK(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
va_hi = base >> HWSIZE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
|
||||
else
|
||||
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
|
||||
pdb.address >>= 12;
|
||||
|
||||
pdb_lo = pdb.address & HWMASK(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
// PDE4 is the highest level on Hopper, see the comment in uvm_hopper_mmu.c
|
||||
// for details.
|
||||
UVM_ASSERT_MSG(depth < NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4, "depth %u", depth);
|
||||
page_table_level = NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 - depth;
|
||||
|
||||
if (membar != UVM_MEMBAR_NONE) {
|
||||
// If a GPU or SYS membar is needed, ACK_TYPE needs to be set to
|
||||
// GLOBALLY to make sure all the pending accesses can be picked up by
|
||||
// the membar.
|
||||
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
|
||||
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
|
||||
aperture_value |
|
||||
ack_value,
|
||||
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
|
||||
{
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 invalidate_gpc_value = 0;
|
||||
NvU32 aperture_value = 0;
|
||||
NvU32 pdb_lo = 0;
|
||||
NvU32 pdb_hi = 0;
|
||||
NvU32 page_table_level = 0;
|
||||
uvm_membar_t membar;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
|
||||
else
|
||||
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
|
||||
pdb.address >>= 12;
|
||||
|
||||
pdb_lo = pdb.address & HWMASK(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
if (params->page_table_level != UvmInvalidatePageTableLevelAll) {
|
||||
// PDE4 is the highest level on Hopper, see the comment in
|
||||
// uvm_hopper_mmu.c for details.
|
||||
page_table_level = min((NvU32)UvmInvalidatePageTableLevelPde4, params->page_table_level) - 1;
|
||||
}
|
||||
|
||||
if (params->membar != UvmInvalidateTlbMemBarNone) {
|
||||
// If a GPU or SYS membar is needed, ack_value needs to be set to
|
||||
// GLOBALLY to make sure all the pending accesses can be picked up by
|
||||
// the membar.
|
||||
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (params->disable_gpc_invalidate)
|
||||
invalidate_gpc_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
|
||||
else
|
||||
invalidate_gpc_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE);
|
||||
|
||||
if (params->target_va_mode == UvmTargetVaModeTargeted) {
|
||||
NvU64 va = params->va >> 12;
|
||||
|
||||
NvU32 va_lo = va & HWMASK(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
NvU32 va_hi = va >> HWSIZE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
|
||||
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
invalidate_gpc_value |
|
||||
aperture_value |
|
||||
ack_value,
|
||||
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
else {
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
|
||||
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
invalidate_gpc_value |
|
||||
aperture_value |
|
||||
ack_value,
|
||||
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
|
||||
if (params->membar == UvmInvalidateTlbMemBarSys)
|
||||
membar = UVM_MEMBAR_SYS;
|
||||
else if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
membar = UVM_MEMBAR_GPU;
|
||||
else
|
||||
membar = UVM_MEMBAR_NONE;
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base(NvU64 *fifo_entry, NvU64 pushbuffer_va)
|
||||
{
|
||||
UVM_ASSERT(!uvm_global_is_suspended());
|
||||
UVM_ASSERT((pushbuffer_va >> 40) == ((NvU64)(pushbuffer_va + UVM_PUSHBUFFER_SIZE) >> 40));
|
||||
|
||||
*fifo_entry = HWVALUE(C86F, GP_ENTRY0, PB_EXTENDED_BASE_OPERAND, (pushbuffer_va >> 40));
|
||||
*fifo_entry |= (NvU64)(HWCONST(C86F, GP_ENTRY1, OPCODE, SET_PB_SEGMENT_EXTENDED_BASE)) << 32;
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length)
|
||||
{
|
||||
NvU64 fifo_entry_value;
|
||||
NvU64 pb_low_bits_mask = (1ull << 40) - 1;
|
||||
|
||||
UVM_ASSERT(!uvm_global_is_suspended());
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(pushbuffer_va, 4), "pushbuffer va unaligned: %llu\n", pushbuffer_va);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(pushbuffer_length, 4), "pushbuffer length unaligned: %u\n", pushbuffer_length);
|
||||
|
||||
// The PBDMA/Esched fetches the lower 40-bits of a pushbuffer VA from the
|
||||
// GPFIFO entry. The VA upper bits are stored internally by Esched and set
|
||||
// by uvm_channel_write_ctrl_gpfifo()/set_gpfifo_pushbuffer_segment_base().
|
||||
pushbuffer_va &= pb_low_bits_mask;
|
||||
fifo_entry_value = HWVALUE(C86F, GP_ENTRY0, GET, NvU64_LO32(pushbuffer_va) >> 2);
|
||||
fifo_entry_value |= (NvU64)(HWVALUE(C86F, GP_ENTRY1, GET_HI, NvU64_HI32(pushbuffer_va)) |
|
||||
HWVALUE(C86F, GP_ENTRY1, LENGTH, pushbuffer_length >> 2)) << 32;
|
||||
|
||||
*fifo_entry = fifo_entry_value;
|
||||
}
|
||||
501
kernel-open/nvidia-uvm/uvm_hopper_mmu.c
Normal file
501
kernel-open/nvidia-uvm/uvm_hopper_mmu.c
Normal file
@@ -0,0 +1,501 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
// For Hopper, the UVM page tree 'depth' maps to hardware as follows:
|
||||
//
|
||||
// UVM depth HW level VA bits
|
||||
// 0 PDE4 56:56
|
||||
// 1 PDE3 55:47
|
||||
// 2 PDE2 46:38
|
||||
// 3 PDE1 (or 512M PTE) 37:29
|
||||
// 4 PDE0 (dual 64k/4k PDE, or 2M PTE) 28:21
|
||||
// 5 PTE_64K / PTE_4K 20:16 / 20:12
|
||||
|
||||
#include "uvm_types.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_hal_types.h"
|
||||
#include "uvm_hopper_fault_buffer.h"
|
||||
#include "hwref/hopper/gh100/dev_fault.h"
|
||||
#include "hwref/hopper/gh100/dev_mmu.h"
|
||||
|
||||
#define MMU_BIG 0
|
||||
#define MMU_SMALL 1
|
||||
|
||||
uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id)
|
||||
{
|
||||
if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST44)
|
||||
return UVM_MMU_ENGINE_TYPE_HOST;
|
||||
|
||||
if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_CE0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_CE9)
|
||||
return UVM_MMU_ENGINE_TYPE_CE;
|
||||
|
||||
// We shouldn't be servicing faults from any other engines
|
||||
UVM_ASSERT_MSG(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS, "Unexpected engine ID: 0x%x\n", mmu_engine_id);
|
||||
|
||||
return UVM_MMU_ENGINE_TYPE_GRAPHICS;
|
||||
}
|
||||
|
||||
static NvU32 page_table_depth_hopper(NvU32 page_size)
|
||||
{
|
||||
// The common-case is page_size == UVM_PAGE_SIZE_2M, hence the first check
|
||||
if (page_size == UVM_PAGE_SIZE_2M)
|
||||
return 4;
|
||||
else if (page_size == UVM_PAGE_SIZE_512M)
|
||||
return 3;
|
||||
return 5;
|
||||
}
|
||||
|
||||
static NvU32 entries_per_index_hopper(NvU32 depth)
|
||||
{
|
||||
UVM_ASSERT(depth < 6);
|
||||
if (depth == 4)
|
||||
return 2;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static NvLength entry_offset_hopper(NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 6);
|
||||
if ((page_size == UVM_PAGE_SIZE_4K) && (depth == 4))
|
||||
return MMU_SMALL;
|
||||
return MMU_BIG;
|
||||
}
|
||||
|
||||
static NvLength entry_size_hopper(NvU32 depth)
|
||||
{
|
||||
return entries_per_index_hopper(depth) * 8;
|
||||
}
|
||||
|
||||
static NvU32 index_bits_hopper(NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
static const NvU32 bit_widths[] = {1, 9, 9, 9, 8};
|
||||
|
||||
// some code paths keep on querying this until they get a 0, meaning only
|
||||
// the page offset remains.
|
||||
UVM_ASSERT(depth < 6);
|
||||
if (depth < 5) {
|
||||
return bit_widths[depth];
|
||||
}
|
||||
else if (depth == 5) {
|
||||
switch (page_size) {
|
||||
case UVM_PAGE_SIZE_4K:
|
||||
return 9;
|
||||
case UVM_PAGE_SIZE_64K:
|
||||
return 5;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static NvU32 num_va_bits_hopper(void)
|
||||
{
|
||||
return 57;
|
||||
}
|
||||
|
||||
static NvLength allocation_size_hopper(NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 6);
|
||||
if (depth == 5 && page_size == UVM_PAGE_SIZE_64K)
|
||||
return 256;
|
||||
|
||||
// depth 0 requires only a 16-byte allocation, but it must be 4k aligned.
|
||||
return 4096;
|
||||
}
|
||||
|
||||
// PTE Permission Control Flags
|
||||
static NvU64 pte_pcf(uvm_prot_t prot, NvU64 flags)
|
||||
{
|
||||
bool ac = !(flags & UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED);
|
||||
bool cached = flags & UVM_MMU_PTE_FLAGS_CACHED;
|
||||
|
||||
UVM_ASSERT(prot != UVM_PROT_NONE);
|
||||
UVM_ASSERT((flags & ~UVM_MMU_PTE_FLAGS_MASK) == 0);
|
||||
|
||||
if (ac) {
|
||||
switch (prot) {
|
||||
case UVM_PROT_READ_ONLY:
|
||||
return cached ? NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACE :
|
||||
NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACE;
|
||||
case UVM_PROT_READ_WRITE:
|
||||
return cached ? NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACE :
|
||||
NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACE;
|
||||
case UVM_PROT_READ_WRITE_ATOMIC:
|
||||
return cached ? NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACE :
|
||||
NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACE;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch (prot) {
|
||||
case UVM_PROT_READ_ONLY:
|
||||
return cached ? NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACD :
|
||||
NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACD;
|
||||
case UVM_PROT_READ_WRITE:
|
||||
return cached ? NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACD :
|
||||
NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACD;
|
||||
case UVM_PROT_READ_WRITE_ATOMIC:
|
||||
return cached ? NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACD :
|
||||
NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACD;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Unsupported PCF
|
||||
UVM_ASSERT_MSG(0, "Unsupported PTE PCF: prot: %s, ac: %d, cached: %d\n", uvm_prot_string(prot), ac, cached);
|
||||
|
||||
return NV_MMU_VER3_PTE_PCF_INVALID;
|
||||
}
|
||||
|
||||
static NvU64 make_pte_hopper(uvm_aperture_t aperture, NvU64 address, uvm_prot_t prot, NvU64 flags)
|
||||
{
|
||||
NvU8 aperture_bits = 0;
|
||||
NvU64 pte_bits = 0;
|
||||
|
||||
// valid 0:0
|
||||
pte_bits |= HWCONST64(_MMU_VER3, PTE, VALID, TRUE);
|
||||
|
||||
// aperture 2:1
|
||||
if (aperture == UVM_APERTURE_SYS)
|
||||
aperture_bits = NV_MMU_VER3_PTE_APERTURE_SYSTEM_COHERENT_MEMORY;
|
||||
else if (aperture == UVM_APERTURE_VID)
|
||||
aperture_bits = NV_MMU_VER3_PTE_APERTURE_VIDEO_MEMORY;
|
||||
else if (aperture >= UVM_APERTURE_PEER_0 && aperture <= UVM_APERTURE_PEER_7)
|
||||
aperture_bits = NV_MMU_VER3_PTE_APERTURE_PEER_MEMORY;
|
||||
else
|
||||
UVM_ASSERT_MSG(0, "Invalid aperture: %d\n", aperture);
|
||||
|
||||
pte_bits |= HWVALUE64(_MMU_VER3, PTE, APERTURE, aperture_bits);
|
||||
|
||||
// PCF (permission control flags) 7:3
|
||||
pte_bits |= HWVALUE64(_MMU_VER3, PTE, PCF, pte_pcf(prot, flags));
|
||||
|
||||
// kind 11:8
|
||||
pte_bits |= HWVALUE64(_MMU_VER3, PTE, KIND, NV_MMU_PTE_KIND_GENERIC_MEMORY);
|
||||
|
||||
address >>= NV_MMU_VER3_PTE_ADDRESS_SHIFT;
|
||||
|
||||
if (aperture == UVM_APERTURE_VID) {
|
||||
// vid address 39:12
|
||||
pte_bits |= HWVALUE64(_MMU_VER3, PTE, ADDRESS_VID, address);
|
||||
}
|
||||
else {
|
||||
// sys/peer address 51:12
|
||||
pte_bits |= HWVALUE64(_MMU_VER3, PTE, ADDRESS, address);
|
||||
|
||||
// peer id 63:61
|
||||
if (aperture >= UVM_APERTURE_PEER_0 && aperture <= UVM_APERTURE_PEER_7)
|
||||
pte_bits |= HWVALUE64(_MMU_VER3, PTE, PEER_ID, UVM_APERTURE_PEER_ID(aperture));
|
||||
}
|
||||
|
||||
return pte_bits;
|
||||
}
|
||||
|
||||
static NvU64 make_sked_reflected_pte_hopper(void)
|
||||
{
|
||||
return HWCONST64(_MMU_VER3, PTE, VALID, TRUE) |
|
||||
HWVALUE64(_MMU_VER3, PTE, PCF, pte_pcf(UVM_PROT_READ_WRITE_ATOMIC, UVM_MMU_PTE_FLAGS_NONE)) |
|
||||
HWVALUE64(_MMU_VER3, PTE, KIND, NV_MMU_PTE_KIND_SMSKED_MESSAGE);
|
||||
}
|
||||
|
||||
static NvU64 make_sparse_pte_hopper(void)
|
||||
{
|
||||
return HWCONST64(_MMU_VER3, PTE, VALID, FALSE) |
|
||||
HWCONST64(_MMU_VER3, PTE, PCF, SPARSE);
|
||||
}
|
||||
|
||||
static NvU64 unmapped_pte_hopper(NvU32 page_size)
|
||||
{
|
||||
// Setting PCF to NO_VALID_4KB_PAGE on an otherwise-zeroed big PTE causes
|
||||
// the corresponding 4k PTEs to be ignored. This allows the invalidation of
|
||||
// a mixed PDE range to be much faster.
|
||||
if (page_size != UVM_PAGE_SIZE_64K)
|
||||
return 0;
|
||||
|
||||
// When VALID == 0, GMMU still reads the PCF field, which indicates the PTE
|
||||
// is sparse (make_sparse_pte_hopper) or an unmapped big-page PTE.
|
||||
return HWCONST64(_MMU_VER3, PTE, VALID, FALSE) |
|
||||
HWCONST64(_MMU_VER3, PTE, PCF, NO_VALID_4KB_PAGE);
|
||||
}
|
||||
|
||||
static NvU64 poisoned_pte_hopper(void)
|
||||
{
|
||||
// An invalid PTE won't be fatal from faultable units like SM, which is the
|
||||
// most likely source of bad PTE accesses.
|
||||
|
||||
// Engines with priv accesses won't fault on the priv PTE, so add a backup
|
||||
// mechanism using an impossible memory address. MMU will trigger an
|
||||
// interrupt when it detects a bad physical address, i.e., a physical
|
||||
// address > GPU memory size.
|
||||
//
|
||||
// This address has to fit within 38 bits (max address width of vidmem) and
|
||||
// be aligned to page_size.
|
||||
NvU64 phys_addr = 0x2bad000000ULL;
|
||||
|
||||
NvU64 pte_bits = make_pte_hopper(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
|
||||
return WRITE_HWCONST64(pte_bits, _MMU_VER3, PTE, PCF, PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD);
|
||||
}
|
||||
|
||||
static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, NvU32 depth)
|
||||
{
|
||||
NvU64 pde_bits = 0;
|
||||
|
||||
if (phys_alloc != NULL) {
|
||||
NvU64 address = phys_alloc->addr.address >> NV_MMU_VER3_PDE_ADDRESS_SHIFT;
|
||||
pde_bits |= HWCONST64(_MMU_VER3, PDE, IS_PTE, FALSE);
|
||||
|
||||
switch (phys_alloc->addr.aperture) {
|
||||
case UVM_APERTURE_SYS:
|
||||
pde_bits |= HWCONST64(_MMU_VER3, PDE, APERTURE, SYSTEM_COHERENT_MEMORY);
|
||||
break;
|
||||
case UVM_APERTURE_VID:
|
||||
pde_bits |= HWCONST64(_MMU_VER3, PDE, APERTURE, VIDEO_MEMORY);
|
||||
break;
|
||||
default:
|
||||
UVM_ASSERT_MSG(0, "Invalid aperture: %d\n", phys_alloc->addr.aperture);
|
||||
break;
|
||||
}
|
||||
|
||||
// PCF (permission control flags) 5:3
|
||||
// Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
|
||||
// ATS and GMMU page tables. For managed memory we need to prevent this
|
||||
// parallel lookup since we would not get any GPU fault if the CPU has
|
||||
// a valid mapping. Also, for external ranges that are known to be
|
||||
// mapped entirely on the GMMU page table we can skip the ATS lookup
|
||||
// for performance reasons. Parallel ATS lookup is disabled in PDE1
|
||||
// (depth 3) and, therefore, it applies to the underlying 512MB VA
|
||||
// range.
|
||||
//
|
||||
// UVM sets ATS_NOT_ALLOWED for all Hopper+ mappings on ATS systems.
|
||||
// This is fine because CUDA ensures that all managed and external
|
||||
// allocations are properly compartmentalized in 512MB-aligned VA
|
||||
// regions. For cudaHostRegister CUDA cannot control the VA range, but
|
||||
// we rely on ATS for those allocations so they can't choose the
|
||||
// ATS_NOT_ALLOWED mode.
|
||||
//
|
||||
// TODO: Bug 3254055: Relax the NO_ATS setting from 512MB (pde1) range
|
||||
// to PTEs.
|
||||
if (depth == 3 && g_uvm_global.ats.enabled)
|
||||
pde_bits |= HWCONST64(_MMU_VER3, PDE, PCF, VALID_UNCACHED_ATS_NOT_ALLOWED);
|
||||
else
|
||||
pde_bits |= HWCONST64(_MMU_VER3, PDE, PCF, VALID_UNCACHED_ATS_ALLOWED);
|
||||
|
||||
// address 51:12
|
||||
pde_bits |= HWVALUE64(_MMU_VER3, PDE, ADDRESS, address);
|
||||
}
|
||||
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
{
|
||||
NvU64 pde_bits = 0;
|
||||
|
||||
if (phys_alloc != NULL) {
|
||||
NvU64 address = phys_alloc->addr.address >> NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG_SHIFT;
|
||||
|
||||
switch (phys_alloc->addr.aperture) {
|
||||
case UVM_APERTURE_SYS:
|
||||
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, APERTURE_BIG, SYSTEM_COHERENT_MEMORY);
|
||||
break;
|
||||
case UVM_APERTURE_VID:
|
||||
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, APERTURE_BIG, VIDEO_MEMORY);
|
||||
break;
|
||||
default:
|
||||
UVM_ASSERT_MSG(0, "Invalid big aperture %d\n", phys_alloc->addr.aperture);
|
||||
break;
|
||||
}
|
||||
|
||||
// PCF (permission control flags) 5:3
|
||||
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, PCF_BIG, VALID_UNCACHED_ATS_NOT_ALLOWED);
|
||||
|
||||
// address 51:8
|
||||
pde_bits |= HWVALUE64(_MMU_VER3, DUAL_PDE, ADDRESS_BIG, address);
|
||||
}
|
||||
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
{
|
||||
NvU64 pde_bits = 0;
|
||||
|
||||
if (phys_alloc != NULL) {
|
||||
NvU64 address = phys_alloc->addr.address >> NV_MMU_VER3_DUAL_PDE_ADDRESS_SHIFT;
|
||||
|
||||
switch (phys_alloc->addr.aperture) {
|
||||
case UVM_APERTURE_SYS:
|
||||
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, APERTURE_SMALL, SYSTEM_COHERENT_MEMORY);
|
||||
break;
|
||||
case UVM_APERTURE_VID:
|
||||
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, APERTURE_SMALL, VIDEO_MEMORY);
|
||||
break;
|
||||
default:
|
||||
UVM_ASSERT_MSG(0, "Invalid small aperture %d\n", phys_alloc->addr.aperture);
|
||||
break;
|
||||
}
|
||||
|
||||
// PCF (permission control flags) 69:67 [5:3]
|
||||
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, PCF_SMALL, VALID_UNCACHED_ATS_NOT_ALLOWED);
|
||||
|
||||
// address 115:76 [51:12]
|
||||
pde_bits |= HWVALUE64(_MMU_VER3, DUAL_PDE, ADDRESS_SMALL, address);
|
||||
}
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static void make_pde_hopper(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
|
||||
{
|
||||
NvU32 entry_count = entries_per_index_hopper(depth);
|
||||
NvU64 *entry_bits = (NvU64 *)entry;
|
||||
|
||||
if (entry_count == 1) {
|
||||
*entry_bits = single_pde_hopper(*phys_allocs, depth);
|
||||
}
|
||||
else if (entry_count == 2) {
|
||||
entry_bits[MMU_BIG] = big_half_pde_hopper(phys_allocs[MMU_BIG]);
|
||||
entry_bits[MMU_SMALL] = small_half_pde_hopper(phys_allocs[MMU_SMALL]);
|
||||
|
||||
// This entry applies to the whole dual PDE but is stored in the lower
|
||||
// bits
|
||||
entry_bits[MMU_BIG] |= HWCONST64(_MMU_VER3, DUAL_PDE, IS_PTE, FALSE);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT_MSG(0, "Invalid number of entries per index: %d\n", entry_count);
|
||||
}
|
||||
}
|
||||
|
||||
static uvm_mmu_mode_hal_t hopper_mmu_mode_hal;
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size)
|
||||
{
|
||||
static bool initialized = false;
|
||||
|
||||
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
|
||||
|
||||
// TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with
|
||||
// 128K big page size for Pascal+ GPUs
|
||||
if (big_page_size == UVM_PAGE_SIZE_128K)
|
||||
return NULL;
|
||||
|
||||
if (!initialized) {
|
||||
uvm_mmu_mode_hal_t *ampere_mmu_mode_hal = uvm_hal_mmu_mode_ampere(big_page_size);
|
||||
UVM_ASSERT(ampere_mmu_mode_hal);
|
||||
|
||||
// The assumption made is that arch_hal->mmu_mode_hal() will be called
|
||||
// under the global lock the first time, so check it here.
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
hopper_mmu_mode_hal = *ampere_mmu_mode_hal;
|
||||
hopper_mmu_mode_hal.entry_size = entry_size_hopper;
|
||||
hopper_mmu_mode_hal.index_bits = index_bits_hopper;
|
||||
hopper_mmu_mode_hal.entries_per_index = entries_per_index_hopper;
|
||||
hopper_mmu_mode_hal.entry_offset = entry_offset_hopper;
|
||||
hopper_mmu_mode_hal.num_va_bits = num_va_bits_hopper;
|
||||
hopper_mmu_mode_hal.allocation_size = allocation_size_hopper;
|
||||
hopper_mmu_mode_hal.page_table_depth = page_table_depth_hopper;
|
||||
hopper_mmu_mode_hal.make_pte = make_pte_hopper;
|
||||
hopper_mmu_mode_hal.make_sked_reflected_pte = make_sked_reflected_pte_hopper;
|
||||
hopper_mmu_mode_hal.make_sparse_pte = make_sparse_pte_hopper;
|
||||
hopper_mmu_mode_hal.unmapped_pte = unmapped_pte_hopper;
|
||||
hopper_mmu_mode_hal.poisoned_pte = poisoned_pte_hopper;
|
||||
hopper_mmu_mode_hal.make_pde = make_pde_hopper;
|
||||
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
return &hopper_mmu_mode_hal;
|
||||
}
|
||||
|
||||
NvU16 uvm_hal_hopper_mmu_client_id_to_utlb_id(NvU16 client_id)
|
||||
{
|
||||
switch (client_id) {
|
||||
case NV_PFAULT_CLIENT_GPC_RAST:
|
||||
case NV_PFAULT_CLIENT_GPC_GCC:
|
||||
case NV_PFAULT_CLIENT_GPC_GPCCS:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_RGG;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_0:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP0;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_1:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_0:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_0:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP1;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_2:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP2;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_3:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_1:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_1:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP3;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_4:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP4;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_5:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_2:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_2:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP5;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_6:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP6;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_7:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_3:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_3:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP7;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_8:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP8;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_9:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_4:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_4:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP9;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_10:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP10;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_11:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_5:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_5:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP11;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_12:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP12;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_13:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_6:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_6:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP13;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_14:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP14;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_15:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_7:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_7:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP15;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_16:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP16;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_17:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_8:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_8:
|
||||
return UVM_HOPPER_GPC_UTLB_ID_LTP17;
|
||||
|
||||
default:
|
||||
UVM_ASSERT_MSG(false, "Invalid client value: 0x%x\n", client_id);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2021 NVIDIA Corporation
|
||||
Copyright (c) 2020-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -46,7 +46,7 @@ static NV_STATUS test_semaphore_alloc_uvm_rm_mem(uvm_gpu_t *gpu, const size_t si
|
||||
uvm_rm_mem_t *mem = NULL;
|
||||
NvU64 gpu_va;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, size, &mem);
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &mem);
|
||||
TEST_NV_CHECK_RET(status);
|
||||
|
||||
gpu_va = uvm_rm_mem_get_gpu_uvm_va(mem, gpu);
|
||||
|
||||
@@ -34,31 +34,6 @@
|
||||
// the (out-of-tree) UVM driver from changes to the upstream Linux kernel.
|
||||
//
|
||||
|
||||
#if !defined(NV_ADDRESS_SPACE_INIT_ONCE_PRESENT)
|
||||
void address_space_init_once(struct address_space *mapping)
|
||||
{
|
||||
memset(mapping, 0, sizeof(*mapping));
|
||||
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
|
||||
|
||||
#if defined(NV_ADDRESS_SPACE_HAS_RWLOCK_TREE_LOCK)
|
||||
//
|
||||
// The .tree_lock member variable was changed from type rwlock_t, to
|
||||
// spinlock_t, on 25 July 2008, by mainline commit
|
||||
// 19fd6231279be3c3bdd02ed99f9b0eb195978064.
|
||||
//
|
||||
rwlock_init(&mapping->tree_lock);
|
||||
#else
|
||||
spin_lock_init(&mapping->tree_lock);
|
||||
#endif
|
||||
|
||||
spin_lock_init(&mapping->i_mmap_lock);
|
||||
INIT_LIST_HEAD(&mapping->private_list);
|
||||
spin_lock_init(&mapping->private_lock);
|
||||
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
|
||||
INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if UVM_CGROUP_ACCOUNTING_SUPPORTED()
|
||||
void uvm_memcg_context_start(uvm_memcg_context_t *context, struct mm_struct *mm)
|
||||
{
|
||||
|
||||
@@ -88,16 +88,12 @@
|
||||
|
||||
#include "nv-kthread-q.h"
|
||||
|
||||
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1 && defined(NV_CPUMASK_OF_NODE_PRESENT)
|
||||
#if defined(NV_CPUMASK_OF_NODE_PRESENT)
|
||||
#define UVM_THREAD_AFFINITY_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_THREAD_AFFINITY_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
// The ARM arch lacks support for cpumask_of_node() until kernel 4.7. It was
|
||||
// added via commit1a2db300348b ("arm64, numa: Add NUMA support for arm64
|
||||
// platforms.") Callers should either check UVM_THREAD_AFFINITY_SUPPORTED()
|
||||
@@ -112,16 +108,12 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#if defined(CONFIG_HMM_MIRROR) && defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MAKE_DEVICE_EXCLUSIVE_RANGE_PRESENT)
|
||||
#define UVM_IS_CONFIG_HMM() 1
|
||||
#else
|
||||
#define UVM_IS_CONFIG_HMM() 0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
// Various issues prevent us from using mmu_notifiers in older kernels. These
|
||||
// include:
|
||||
// - ->release being called under RCU instead of SRCU: fixed by commit
|
||||
@@ -137,24 +129,15 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
//
|
||||
// The callback was added in commit 0f0a327fa12cd55de5e7f8c05a70ac3d047f405e,
|
||||
// v3.19 (2014-11-13).
|
||||
|
||||
#if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE)
|
||||
#define UVM_CAN_USE_MMU_NOTIFIERS() 1
|
||||
#else
|
||||
#define UVM_CAN_USE_MMU_NOTIFIERS() 0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// See bug 1707453 for further details about setting the minimum kernel version.
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
|
||||
# error This driver does not support kernels older than 2.6.32!
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
|
||||
# error This driver does not support kernels older than 3.10!
|
||||
#endif
|
||||
|
||||
#if !defined(VM_RESERVED)
|
||||
@@ -234,10 +217,6 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
|
||||
#define NV_UVM_GFP_FLAGS (GFP_KERNEL)
|
||||
|
||||
#if !defined(NV_ADDRESS_SPACE_INIT_ONCE_PRESENT)
|
||||
void address_space_init_once(struct address_space *mapping);
|
||||
#endif
|
||||
|
||||
// Develop builds define DEBUG but enable optimization
|
||||
#if defined(DEBUG) && !defined(NVIDIA_UVM_DEVELOP)
|
||||
// Wrappers for functions not building correctly without optimizations on,
|
||||
@@ -369,23 +348,6 @@ static inline NvU64 NV_GETTIME(void)
|
||||
(bit) = find_next_zero_bit((addr), (size), (bit) + 1))
|
||||
#endif
|
||||
|
||||
// bitmap_clear was added in 2.6.33 via commit c1a2a962a2ad103846e7950b4591471fabecece7
|
||||
#if !defined(NV_BITMAP_CLEAR_PRESENT)
|
||||
static inline void bitmap_clear(unsigned long *map, unsigned int start, int len)
|
||||
{
|
||||
unsigned int index = start;
|
||||
for_each_set_bit_from(index, map, start + len)
|
||||
__clear_bit(index, map);
|
||||
}
|
||||
|
||||
static inline void bitmap_set(unsigned long *map, unsigned int start, int len)
|
||||
{
|
||||
unsigned int index = start;
|
||||
for_each_clear_bit_from(index, map, start + len)
|
||||
__set_bit(index, map);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Added in 2.6.24
|
||||
#ifndef ACCESS_ONCE
|
||||
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
|
||||
@@ -456,17 +418,6 @@ static inline NvU64 NV_GETTIME(void)
|
||||
#define PAGE_ALIGNED(addr) (((addr) & (PAGE_SIZE - 1)) == 0)
|
||||
#endif
|
||||
|
||||
// Added in 2.6.37 via commit e1ca7788dec6773b1a2bce51b7141948f2b8bccf
|
||||
#if !defined(NV_VZALLOC_PRESENT)
|
||||
static inline void *vzalloc(unsigned long size)
|
||||
{
|
||||
void *p = vmalloc(size);
|
||||
if (p)
|
||||
memset(p, 0, size);
|
||||
return p;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Changed in 3.17 via commit 743162013d40ca612b4cb53d3a200dff2d9ab26e
|
||||
#if (NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT == 3)
|
||||
#define UVM_WAIT_ON_BIT_LOCK(word, bit, mode) \
|
||||
@@ -522,28 +473,12 @@ static bool radix_tree_empty(struct radix_tree_root *tree)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(NV_USLEEP_RANGE_PRESENT)
|
||||
static void __sched usleep_range(unsigned long min, unsigned long max)
|
||||
{
|
||||
unsigned min_msec = min / 1000;
|
||||
unsigned max_msec = max / 1000;
|
||||
|
||||
if (min_msec != 0)
|
||||
msleep(min_msec);
|
||||
else if (max_msec != 0)
|
||||
msleep(max_msec);
|
||||
else
|
||||
msleep(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
struct mem_cgroup *new_memcg;
|
||||
struct mem_cgroup *old_memcg;
|
||||
} uvm_memcg_context_t;
|
||||
|
||||
|
||||
// cgroup support requires set_active_memcg(). set_active_memcg() is an
|
||||
// inline function that requires int_active_memcg per-cpu symbol when called
|
||||
// from interrupt context. int_active_memcg is only exported by commit
|
||||
@@ -585,29 +520,13 @@ typedef struct
|
||||
}
|
||||
#endif // NV_IS_EXPORT_SYMBOL_PRESENT_int_active_memcg
|
||||
|
||||
#if defined(NVCPU_X86) || defined(NVCPU_X86_64)
|
||||
#include <asm/pgtable_types.h>
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if !defined(PAGE_KERNEL_NOENC)
|
||||
#define PAGE_KERNEL_NOENC PAGE_KERNEL
|
||||
#endif
|
||||
|
||||
// Commit 1dff8083a024650c75a9c961c38082473ceae8cf (v4.7).
|
||||
//
|
||||
|
||||
@@ -27,12 +27,8 @@
|
||||
|
||||
const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
{
|
||||
|
||||
|
||||
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 26);
|
||||
|
||||
|
||||
switch (lock_order) {
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL_PM);
|
||||
@@ -48,14 +44,8 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_RM_API);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_RM_GPUS);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_BLOCK);
|
||||
|
||||
|
||||
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHUNK_MAPPING);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PAGE_TREE);
|
||||
|
||||
|
||||
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PUSH);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_PMA);
|
||||
@@ -344,7 +334,7 @@ bool __uvm_check_all_unlocked(uvm_thread_context_lock_t *uvm_context)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool __uvm_thread_check_all_unlocked()
|
||||
bool __uvm_thread_check_all_unlocked(void)
|
||||
{
|
||||
return __uvm_check_all_unlocked(uvm_thread_context_lock_get());
|
||||
}
|
||||
|
||||
@@ -276,16 +276,6 @@
|
||||
// Operations not allowed while holding the lock:
|
||||
// - GPU memory allocation which can evict memory (would require nesting
|
||||
// block locks)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// - Chunk mapping lock (gpu->root_chunk_mappings.bitlocks and
|
||||
// gpu->sysmem_mappings.bitlock)
|
||||
// Order: UVM_LOCK_ORDER_CHUNK_MAPPING
|
||||
@@ -320,20 +310,6 @@
|
||||
// Operations not allowed while holding this lock
|
||||
// - GPU memory allocation which can evict
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// - Concurrent push semaphore
|
||||
// Order: UVM_LOCK_ORDER_PUSH
|
||||
// Semaphore (uvm_semaphore_t)
|
||||
@@ -361,7 +337,9 @@
|
||||
//
|
||||
// - Channel lock
|
||||
// Order: UVM_LOCK_ORDER_CHANNEL
|
||||
// Spinlock (uvm_spinlock_t)
|
||||
// Spinlock (uvm_spinlock_t) or exclusive lock (mutex)
|
||||
//
|
||||
// Lock protecting the state of all the channels in a channel pool.
|
||||
//
|
||||
// - Tools global VA space list lock (g_tools_va_space_list_lock)
|
||||
// Order: UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST
|
||||
@@ -408,14 +386,8 @@ typedef enum
|
||||
UVM_LOCK_ORDER_RM_API,
|
||||
UVM_LOCK_ORDER_RM_GPUS,
|
||||
UVM_LOCK_ORDER_VA_BLOCK,
|
||||
|
||||
|
||||
|
||||
UVM_LOCK_ORDER_CHUNK_MAPPING,
|
||||
UVM_LOCK_ORDER_PAGE_TREE,
|
||||
|
||||
|
||||
|
||||
UVM_LOCK_ORDER_PUSH,
|
||||
UVM_LOCK_ORDER_PMM,
|
||||
UVM_LOCK_ORDER_PMM_PMA,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user