mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-28 20:19:48 +00:00
Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6dd092ddb7 | ||
|
|
4397463e73 | ||
|
|
e598191e8e | ||
|
|
1dc88ff75e | ||
|
|
811073c51e | ||
|
|
dac2350c7f | ||
|
|
9594cc0169 | ||
|
|
5f40a5aee5 | ||
|
|
758b4ee818 | ||
|
|
7c345b838b | ||
|
|
90eb10774f |
75
CHANGELOG.md
75
CHANGELOG.md
@@ -1,5 +1,78 @@
|
||||
# Changelog
|
||||
|
||||
## Release 530 Entries
|
||||
|
||||
### [530.41.03] 2023-03-23
|
||||
|
||||
### [530.30.02] 2023-02-28
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Add support for resizable BAR on Linux when NVreg_EnableResizableBar=1 module param is set. [#3](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/3) by @sjkelly
|
||||
|
||||
#### Added
|
||||
|
||||
- Support for power management features like Suspend, Hibernate and Resume.
|
||||
|
||||
## Release 525 Entries
|
||||
|
||||
### [525.89.02] 2023-02-08
|
||||
|
||||
### [525.85.12] 2023-01-30
|
||||
|
||||
### [525.85.05] 2023-01-19
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix build problems with Clang 15.0, [#377](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/377) by @ptr1337
|
||||
|
||||
### [525.78.01] 2023-01-05
|
||||
|
||||
### [525.60.13] 2022-12-05
|
||||
|
||||
### [525.60.11] 2022-11-28
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed nvenc compatibility with usermode clients [#104](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/104)
|
||||
|
||||
### [525.53] 2022-11-10
|
||||
|
||||
#### Changed
|
||||
|
||||
- GSP firmware is now distributed as multiple firmware files: this release has `gsp_tu10x.bin` and `gsp_ad10x.bin` replacing `gsp.bin` from previous releases.
|
||||
- Each file is named after a GPU architecture and supports GPUs from one or more architectures. This allows GSP firmware to better leverage each architecture's capabilities.
|
||||
- The .run installer will continue to install firmware to `/lib/firmware/nvidia/<version>` and the `nvidia.ko` kernel module will load the appropriate firmware for each GPU at runtime.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Add support for IBT (indirect branch tracking) on supported platforms, [#256](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/256) by @rnd-ash
|
||||
- Return EINVAL when [failing to] allocating memory, [#280](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/280) by @YusufKhan-gamedev
|
||||
- Fix various typos in nvidia/src/kernel, [#16](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/16) by @alexisgeoffrey
|
||||
- Added support for rotation in X11, Quadro Sync, Stereo, and YUV 4:2:0 on Turing.
|
||||
|
||||
## Release 520 Entries
|
||||
|
||||
### [520.61.07] 2022-10-20
|
||||
|
||||
### [520.56.06] 2022-10-12
|
||||
|
||||
#### Added
|
||||
|
||||
- Introduce support for GeForce RTX 4090 GPUs.
|
||||
|
||||
### [520.61.05] 2022-10-10
|
||||
|
||||
#### Added
|
||||
|
||||
- Introduce support for NVIDIA H100 GPUs.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix/Improve Makefile, [#308](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/308/) by @izenynn
|
||||
- Make nvLogBase2 more efficient, [#177](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/177/) by @DMaroo
|
||||
- nv-pci: fixed always true expression, [#195](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/195/) by @ValZapod
|
||||
|
||||
## Release 515 Entries
|
||||
|
||||
### [515.76] 2022-09-20
|
||||
@@ -9,6 +82,8 @@
|
||||
- Improved compatibility with new Linux kernel releases
|
||||
- Fixed possible excessive GPU power draw on an idle X11 or Wayland desktop when driving high resolutions or refresh rates
|
||||
|
||||
### [515.65.07] 2022-10-19
|
||||
|
||||
### [515.65.01] 2022-08-02
|
||||
|
||||
#### Fixed
|
||||
|
||||
26
Makefile
26
Makefile
@@ -6,9 +6,9 @@
|
||||
# To install the build kernel modules: run (as root) `make modules_install`
|
||||
###########################################################################
|
||||
|
||||
include utils.mk
|
||||
|
||||
all: modules
|
||||
###########################################################################
|
||||
# variables
|
||||
###########################################################################
|
||||
|
||||
nv_kernel_o = src/nvidia/$(OUTPUTDIR)/nv-kernel.o
|
||||
nv_kernel_o_binary = kernel-open/nvidia/nv-kernel.o_binary
|
||||
@@ -16,13 +16,20 @@ nv_kernel_o_binary = kernel-open/nvidia/nv-kernel.o_binary
|
||||
nv_modeset_kernel_o = src/nvidia-modeset/$(OUTPUTDIR)/nv-modeset-kernel.o
|
||||
nv_modeset_kernel_o_binary = kernel-open/nvidia-modeset/nv-modeset-kernel.o_binary
|
||||
|
||||
.PHONY: $(nv_kernel_o) $(nv_modeset_kernel_o) modules modules_install
|
||||
###########################################################################
|
||||
# rules
|
||||
###########################################################################
|
||||
|
||||
include utils.mk
|
||||
|
||||
.PHONY: all
|
||||
all: modules
|
||||
|
||||
###########################################################################
|
||||
# nv-kernel.o is the OS agnostic portion of nvidia.ko
|
||||
###########################################################################
|
||||
|
||||
.PHONY: $(nv_kernel_o)
|
||||
$(nv_kernel_o):
|
||||
$(MAKE) -C src/nvidia
|
||||
|
||||
@@ -34,6 +41,7 @@ $(nv_kernel_o_binary): $(nv_kernel_o)
|
||||
# nv-modeset-kernel.o is the OS agnostic portion of nvidia-modeset.ko
|
||||
###########################################################################
|
||||
|
||||
.PHONY: $(nv_modeset_kernel_o)
|
||||
$(nv_modeset_kernel_o):
|
||||
$(MAKE) -C src/nvidia-modeset
|
||||
|
||||
@@ -46,31 +54,33 @@ $(nv_modeset_kernel_o_binary): $(nv_modeset_kernel_o)
|
||||
# the kernel modules with kbuild.
|
||||
###########################################################################
|
||||
|
||||
.PHONY: modules
|
||||
modules: $(nv_kernel_o_binary) $(nv_modeset_kernel_o_binary)
|
||||
$(MAKE) -C kernel-open modules
|
||||
|
||||
|
||||
###########################################################################
|
||||
# Install the built kernel modules using kbuild.
|
||||
###########################################################################
|
||||
|
||||
.PHONY: modules_install
|
||||
modules_install:
|
||||
$(MAKE) -C kernel-open modules_install
|
||||
|
||||
|
||||
###########################################################################
|
||||
# clean
|
||||
###########################################################################
|
||||
|
||||
.PHONY: clean nvidia.clean nvidia-modeset.clean kernel-open.clean
|
||||
|
||||
.PHONY: clean
|
||||
clean: nvidia.clean nvidia-modeset.clean kernel-open.clean
|
||||
|
||||
.PHONY: nvidia.clean
|
||||
nvidia.clean:
|
||||
$(MAKE) -C src/nvidia clean
|
||||
|
||||
.PHONY: nvidia-modeset.clean
|
||||
nvidia-modeset.clean:
|
||||
$(MAKE) -C src/nvidia-modeset clean
|
||||
|
||||
.PHONY: kernel-open.clean
|
||||
kernel-open.clean:
|
||||
$(MAKE) -C kernel-open clean
|
||||
|
||||
66
README.md
66
README.md
@@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 515.76.
|
||||
version 530.41.03.
|
||||
|
||||
|
||||
## How to Build
|
||||
@@ -15,9 +15,9 @@ as root:
|
||||
|
||||
make modules_install -j$(nproc)
|
||||
|
||||
Note that the kernel modules built here must be used with gsp.bin
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
515.76 driver release. This can be achieved by installing
|
||||
530.41.03 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@@ -167,7 +167,7 @@ for the target kernel.
|
||||
## Compatible GPUs
|
||||
|
||||
The open-gpu-kernel-modules can be used on any Turing or later GPU
|
||||
(see the table below). However, in the 515.76 release,
|
||||
(see the table below). However, in the 530.41.03 release,
|
||||
GeForce and Workstation support is still considered alpha-quality.
|
||||
|
||||
To enable use of the open kernel modules on GeForce and Workstation GPUs,
|
||||
@@ -175,7 +175,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
|
||||
parameter to 1. For more details, see the NVIDIA GPU driver end user
|
||||
README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/515.76/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/530.41.03/README/kernel_open.html
|
||||
|
||||
In the below table, if three IDs are listed, the first is the PCI Device
|
||||
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
|
||||
@@ -645,13 +645,23 @@ Subsystem Device ID.
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 147F |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1622 |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1623 |
|
||||
| NVIDIA PG506-242 | 20B3 10DE 14A7 |
|
||||
| NVIDIA PG506-243 | 20B3 10DE 14A8 |
|
||||
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A7 |
|
||||
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A8 |
|
||||
| NVIDIA A100 80GB PCIe | 20B5 10DE 1533 |
|
||||
| NVIDIA A100 80GB PCIe | 20B5 10DE 1642 |
|
||||
| NVIDIA PG506-232 | 20B6 10DE 1492 |
|
||||
| NVIDIA A30 | 20B7 10DE 1532 |
|
||||
| NVIDIA A100-PCIE-40GB | 20F1 10DE 145F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179C |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179D |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179E |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A0 |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A1 |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A2 |
|
||||
| NVIDIA A800 80GB PCIe | 20F5 10DE 1799 |
|
||||
| NVIDIA A800 80GB PCIe LC | 20F5 10DE 179A |
|
||||
| NVIDIA GeForce GTX 1660 Ti | 2182 |
|
||||
| NVIDIA GeForce GTX 1660 | 2184 |
|
||||
| NVIDIA GeForce GTX 1650 SUPER | 2187 |
|
||||
@@ -685,6 +695,7 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 3090 Ti | 2203 |
|
||||
| NVIDIA GeForce RTX 3090 | 2204 |
|
||||
| NVIDIA GeForce RTX 3080 | 2206 |
|
||||
| NVIDIA GeForce RTX 3070 Ti | 2207 |
|
||||
| NVIDIA GeForce RTX 3080 Ti | 2208 |
|
||||
| NVIDIA GeForce RTX 3080 | 220A |
|
||||
| NVIDIA CMP 90HX | 220D |
|
||||
@@ -709,6 +720,13 @@ Subsystem Device ID.
|
||||
| NVIDIA A10 | 2236 10DE 1482 |
|
||||
| NVIDIA A10G | 2237 10DE 152F |
|
||||
| NVIDIA A10M | 2238 10DE 1677 |
|
||||
| NVIDIA H800 PCIe | 2322 10DE 17A4 |
|
||||
| NVIDIA H800 | 2324 10DE 17A6 |
|
||||
| NVIDIA H800 | 2324 10DE 17A8 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
|
||||
| NVIDIA H100 PCIe | 2331 10DE 1626 |
|
||||
| NVIDIA H100 | 2339 10DE 17FC |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 2414 |
|
||||
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
|
||||
| NVIDIA RTX A5500 Laptop GPU | 2438 |
|
||||
@@ -736,6 +754,7 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX A3000 12GB Laptop GPU | 24B9 |
|
||||
| NVIDIA RTX A4500 Laptop GPU | 24BA |
|
||||
| NVIDIA RTX A3000 12GB Laptop GPU | 24BB |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 24C9 |
|
||||
| NVIDIA GeForce RTX 3080 Laptop GPU | 24DC |
|
||||
| NVIDIA GeForce RTX 3070 Laptop GPU | 24DD |
|
||||
| NVIDIA GeForce RTX 3070 Ti Laptop GPU | 24E0 |
|
||||
@@ -751,12 +770,14 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX A2000 | 2531 103C 151D |
|
||||
| NVIDIA RTX A2000 | 2531 10DE 151D |
|
||||
| NVIDIA RTX A2000 | 2531 17AA 151D |
|
||||
| NVIDIA GeForce RTX 3060 | 2544 |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 2560 |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 2563 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 1028 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 103C 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 10DE 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 17AA 1611 |
|
||||
| NVIDIA GeForce RTX 3050 | 2582 |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25A0 |
|
||||
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 8928 |
|
||||
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 89F9 |
|
||||
@@ -769,6 +790,9 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 2050 | 25A7 |
|
||||
| NVIDIA GeForce RTX 2050 | 25A9 |
|
||||
| NVIDIA GeForce MX570 A | 25AA |
|
||||
| NVIDIA GeForce RTX 3050 4GB Laptop GPU | 25AB |
|
||||
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25AC |
|
||||
| NVIDIA GeForce RTX 2050 | 25AD |
|
||||
| NVIDIA A16 | 25B6 10DE 14A9 |
|
||||
| NVIDIA A2 | 25B6 10DE 157E |
|
||||
| NVIDIA RTX A2000 Laptop GPU | 25B8 |
|
||||
@@ -778,5 +802,33 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25E0 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E2 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E5 |
|
||||
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25EC |
|
||||
| NVIDIA GeForce RTX 2050 | 25ED |
|
||||
| NVIDIA RTX A1000 Embedded GPU | 25F9 |
|
||||
| NVIDIA RTX A2000 Embedded GPU | 25FA |
|
||||
| NVIDIA RTX A500 Embedded GPU | 25FB |
|
||||
| NVIDIA GeForce RTX 4090 | 2684 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 1028 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 17AA 16A1 |
|
||||
| NVIDIA L40 | 26B5 10DE 169D |
|
||||
| NVIDIA L40 | 26B5 10DE 17DA |
|
||||
| NVIDIA GeForce RTX 4080 | 2704 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2757 |
|
||||
| NVIDIA GeForce RTX 4070 Ti | 2782 |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27A0 |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 1028 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 10DE 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 17AA 16FA |
|
||||
| NVIDIA L4 | 27B8 10DE 16CA |
|
||||
| NVIDIA L4 | 27B8 10DE 16EE |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27E0 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2820 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2860 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
|
||||
|
||||
@@ -70,9 +70,13 @@ $(foreach _module, $(NV_KERNEL_MODULES), \
|
||||
|
||||
EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"515.76\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"530.41.03\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
endif
|
||||
|
||||
EXTRA_CFLAGS += -Wno-unused-function
|
||||
|
||||
@@ -229,6 +233,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_ioctl.h \
|
||||
drm/drm_device.h \
|
||||
drm/drm_mode_config.h \
|
||||
drm/drm_modeset_lock.h \
|
||||
dt-bindings/interconnect/tegra_icc_id.h \
|
||||
generated/autoconf.h \
|
||||
generated/compile.h \
|
||||
@@ -243,6 +248,8 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/log2.h \
|
||||
linux/of.h \
|
||||
linux/bug.h \
|
||||
linux/sched.h \
|
||||
linux/sched/mm.h \
|
||||
linux/sched/signal.h \
|
||||
linux/sched/task.h \
|
||||
linux/sched/task_stack.h \
|
||||
@@ -256,6 +263,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/platform/tegra/dce/dce-client-ipc.h \
|
||||
linux/nvhost.h \
|
||||
linux/nvhost_t194.h \
|
||||
linux/host1x-next.h \
|
||||
asm/book3s/64/hash-64k.h \
|
||||
asm/set_memory.h \
|
||||
asm/prom.h \
|
||||
@@ -286,7 +294,10 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/ioasid.h \
|
||||
linux/stdarg.h \
|
||||
linux/iosys-map.h \
|
||||
asm/coco.h
|
||||
asm/coco.h \
|
||||
linux/vfio_pci_core.h \
|
||||
soc/tegra/bpmp-abi.h \
|
||||
soc/tegra/bpmp.h
|
||||
|
||||
# Filename to store the define for the header in $(1); this is only consumed by
|
||||
# the rule below that concatenates all of these together.
|
||||
|
||||
@@ -101,13 +101,6 @@
|
||||
# define NV_ANDROID
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if defined(DceCore) && !defined(NV_DCECORE)
|
||||
# define NV_DCECORE
|
||||
#endif
|
||||
@@ -249,7 +242,7 @@
|
||||
#endif
|
||||
|
||||
/* For verification-only features not intended to be included in normal drivers */
|
||||
#if (defined(NV_MODS) || defined(NV_GSP_MODS)) && defined(DEBUG) && !defined(DISABLE_VERIF_FEATURES)
|
||||
#if defined(ENABLE_VERIF_FEATURES)
|
||||
#define NV_VERIF_FEATURES
|
||||
#endif
|
||||
|
||||
@@ -283,12 +276,6 @@
|
||||
#define NV_IS_MODS 0
|
||||
#endif
|
||||
|
||||
#if defined(NV_GSP_MODS)
|
||||
#define NV_IS_GSP_MODS 1
|
||||
#else
|
||||
#define NV_IS_GSP_MODS 0
|
||||
#endif
|
||||
|
||||
#if defined(NV_WINDOWS)
|
||||
#define NVOS_IS_WINDOWS 1
|
||||
#else
|
||||
@@ -355,15 +342,6 @@
|
||||
#define NVOS_IS_INTEGRITY 0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if defined(NVCPU_X86)
|
||||
#define NVCPU_IS_X86 1
|
||||
#else
|
||||
|
||||
132
kernel-open/common/inc/nv-firmware.h
Normal file
132
kernel-open/common/inc/nv-firmware.h
Normal file
@@ -0,0 +1,132 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NV_FIRMWARE_H
|
||||
#define NV_FIRMWARE_H
|
||||
|
||||
|
||||
|
||||
#include <nvtypes.h>
|
||||
#include <nvmisc.h>
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_TYPE_GSP,
|
||||
NV_FIRMWARE_TYPE_GSP_LOG
|
||||
} nv_firmware_type_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_CHIP_FAMILY_NULL = 0,
|
||||
NV_FIRMWARE_CHIP_FAMILY_TU10X = 1,
|
||||
NV_FIRMWARE_CHIP_FAMILY_TU11X = 2,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GA100 = 3,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GA10X = 4,
|
||||
NV_FIRMWARE_CHIP_FAMILY_AD10X = 5,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GH100 = 6,
|
||||
NV_FIRMWARE_CHIP_FAMILY_END,
|
||||
} nv_firmware_chip_family_t;
|
||||
|
||||
static inline const char *nv_firmware_chip_family_to_string(
|
||||
nv_firmware_chip_family_t fw_chip_family
|
||||
)
|
||||
{
|
||||
switch (fw_chip_family) {
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: return "gh100";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: return "ad10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X: return "ga10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: return "ga100";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: return "tu11x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X: return "tu10x";
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
return NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// The includer (presumably nv.c) may optionally define
|
||||
// NV_FIRMWARE_PATH_FOR_FILENAME(filename)
|
||||
// to return a string "path" given a gsp_*.bin or gsp_log_*.bin filename.
|
||||
//
|
||||
// The function nv_firmware_path will then be available.
|
||||
#if defined(NV_FIRMWARE_PATH_FOR_FILENAME)
|
||||
static inline const char *nv_firmware_path(
|
||||
nv_firmware_type_t fw_type,
|
||||
nv_firmware_chip_family_t fw_chip_family
|
||||
)
|
||||
{
|
||||
if (fw_type == NV_FIRMWARE_TYPE_GSP)
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_ga10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_tu10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
else if (fw_type == NV_FIRMWARE_TYPE_GSP_LOG)
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_ga10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_tu10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
#endif // defined(NV_FIRMWARE_PATH_FOR_FILENAME)
|
||||
|
||||
// The includer (presumably nv.c) may optionally define
|
||||
// NV_FIRMWARE_DECLARE_GSP_FILENAME(filename)
|
||||
// which will then be invoked (at the top-level) for each
|
||||
// gsp_*.bin (but not gsp_log_*.bin)
|
||||
#if defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
|
||||
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_ga10x.bin")
|
||||
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_tu10x.bin")
|
||||
#endif // defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
|
||||
|
||||
#endif // NV_FIRMWARE_DECLARE_GSP_FILENAME
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-22 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -91,6 +91,6 @@ static inline void _nv_hash_init(struct hlist_head *ht, unsigned int sz)
|
||||
* @key: the key of the objects to iterate over
|
||||
*/
|
||||
#define nv_hash_for_each_possible(name, obj, member, key) \
|
||||
nv_hlist_for_each_entry(obj, &name[NV_HASH_MIN(key, NV_HASH_BITS(name))], member)
|
||||
hlist_for_each_entry(obj, &name[NV_HASH_MIN(key, NV_HASH_BITS(name))], member)
|
||||
|
||||
#endif // __NV_HASH_H__
|
||||
|
||||
@@ -27,15 +27,13 @@
|
||||
#include <nv-kernel-interface-api.h>
|
||||
|
||||
// Enums for supported hypervisor types.
|
||||
// New hypervisor type should be added before OS_HYPERVISOR_CUSTOM_FORCED
|
||||
// New hypervisor type should be added before OS_HYPERVISOR_UNKNOWN
|
||||
typedef enum _HYPERVISOR_TYPE
|
||||
{
|
||||
OS_HYPERVISOR_XEN = 0,
|
||||
OS_HYPERVISOR_VMWARE,
|
||||
OS_HYPERVISOR_HYPERV,
|
||||
OS_HYPERVISOR_KVM,
|
||||
OS_HYPERVISOR_PARALLELS,
|
||||
OS_HYPERVISOR_CUSTOM_FORCED,
|
||||
OS_HYPERVISOR_UNKNOWN
|
||||
} HYPERVISOR_TYPE;
|
||||
|
||||
@@ -92,30 +90,6 @@ typedef enum VGPU_DEVICE_STATE_E
|
||||
NV_VGPU_DEV_IN_USE = 2
|
||||
} VGPU_DEVICE_STATE;
|
||||
|
||||
typedef enum _VMBUS_CMD_TYPE
|
||||
{
|
||||
VMBUS_CMD_TYPE_INVALID = 0,
|
||||
VMBUS_CMD_TYPE_SETUP = 1,
|
||||
VMBUS_CMD_TYPE_SENDPACKET = 2,
|
||||
VMBUS_CMD_TYPE_CLEANUP = 3,
|
||||
} VMBUS_CMD_TYPE;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU32 request_id;
|
||||
NvU32 page_count;
|
||||
NvU64 *pPfns;
|
||||
void *buffer;
|
||||
NvU32 bufferlen;
|
||||
} vmbus_send_packet_cmd_params;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU32 override_sint;
|
||||
NvU8 *nv_guid;
|
||||
} vmbus_setup_cmd_params;
|
||||
|
||||
/*
|
||||
* Function prototypes
|
||||
*/
|
||||
|
||||
@@ -104,7 +104,7 @@ typedef struct nv_ioctl_rm_api_version
|
||||
|
||||
#define NV_RM_API_VERSION_CMD_STRICT 0
|
||||
#define NV_RM_API_VERSION_CMD_RELAXED '1'
|
||||
#define NV_RM_API_VERSION_CMD_OVERRIDE '2'
|
||||
#define NV_RM_API_VERSION_CMD_QUERY '2'
|
||||
|
||||
#define NV_RM_API_VERSION_REPLY_UNRECOGNIZED 0
|
||||
#define NV_RM_API_VERSION_REPLY_RECOGNIZED 1
|
||||
|
||||
@@ -115,11 +115,6 @@ struct nv_kthread_q_item
|
||||
void *function_args;
|
||||
};
|
||||
|
||||
#if defined(NV_KTHREAD_CREATE_ON_NODE_PRESENT)
|
||||
#define NV_KTHREAD_Q_SUPPORTS_AFFINITY() 1
|
||||
#else
|
||||
#define NV_KTHREAD_Q_SUPPORTS_AFFINITY() 0
|
||||
#endif
|
||||
|
||||
#ifndef NUMA_NO_NODE
|
||||
#define NUMA_NO_NODE (-1)
|
||||
@@ -142,18 +137,12 @@ struct nv_kthread_q_item
|
||||
//
|
||||
// A short prefix of the qname arg will show up in []'s, via the ps(1) utility.
|
||||
//
|
||||
// The kernel thread stack is preferably allocated on the specified NUMA node if
|
||||
// NUMA-affinity (NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1) is supported, but
|
||||
// fallback to another node is possible because kernel allocators do not
|
||||
// The kernel thread stack is preferably allocated on the specified NUMA node,
|
||||
// but fallback to another node is possible because kernel allocators do not
|
||||
// guarantee affinity. Note that NUMA-affinity applies only to
|
||||
// the kthread stack. This API does not do anything about limiting the CPU
|
||||
// affinity of the kthread. That is left to the caller.
|
||||
//
|
||||
// On kernels, which do not support NUMA-aware kthread stack allocations
|
||||
// (NV_KTHTREAD_Q_SUPPORTS_AFFINITY() == 0), the API will return -ENOTSUPP
|
||||
// if the value supplied for 'preferred_node' is anything other than
|
||||
// NV_KTHREAD_NO_NODE.
|
||||
//
|
||||
// Reusing a queue: once a queue is initialized, it must be safely shut down
|
||||
// (see "Stopping the queue(s)", below), before it can be reused. So, for
|
||||
// a simple queue use case, the following will work:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -191,13 +191,6 @@
|
||||
*/
|
||||
#define NV_CURRENT_EUID() (__kuid_val(current->cred->euid))
|
||||
|
||||
#if !defined(NV_KUID_T_PRESENT)
|
||||
static inline uid_t __kuid_val(uid_t uid)
|
||||
{
|
||||
return uid;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_VGA_ARB)
|
||||
#include <linux/vgaarb.h>
|
||||
#endif
|
||||
@@ -234,18 +227,6 @@ static inline uid_t __kuid_val(uid_t uid)
|
||||
#include <asm-generic/pci-dma-compat.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_EFI_ENABLED_PRESENT) && defined(NV_EFI_ENABLED_ARGUMENT_COUNT)
|
||||
#if (NV_EFI_ENABLED_ARGUMENT_COUNT == 1)
|
||||
#define NV_EFI_ENABLED() efi_enabled(EFI_BOOT)
|
||||
#else
|
||||
#error "NV_EFI_ENABLED_ARGUMENT_COUNT value unrecognized!"
|
||||
#endif
|
||||
#elif (defined(NV_EFI_ENABLED_PRESENT) || defined(efi_enabled))
|
||||
#define NV_EFI_ENABLED() efi_enabled
|
||||
#else
|
||||
#define NV_EFI_ENABLED() 0
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_CRAY_XT)
|
||||
#include <cray/cray_nvidia.h>
|
||||
NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32,
|
||||
@@ -521,7 +502,7 @@ static inline void *nv_vmalloc(unsigned long size)
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void nv_vfree(void *ptr, NvU32 size)
|
||||
static inline void nv_vfree(void *ptr, NvU64 size)
|
||||
{
|
||||
NV_MEMDBG_REMOVE(ptr, size);
|
||||
vfree(ptr);
|
||||
@@ -592,11 +573,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
{
|
||||
if (node_id < 0 || node_id >= MAX_NUMNODES)
|
||||
return NV_FALSE;
|
||||
#if defined(NV_NODE_STATES_N_MEMORY_PRESENT)
|
||||
return node_state(node_id, N_MEMORY) ? NV_TRUE : NV_FALSE;
|
||||
#else
|
||||
return node_state(node_id, N_HIGH_MEMORY) ? NV_TRUE : NV_FALSE;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NV_KMALLOC(ptr, size) \
|
||||
@@ -606,6 +583,13 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KZALLOC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kzalloc(size, NV_GFP_KERNEL); \
|
||||
if (ptr) \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KMALLOC_ATOMIC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kmalloc(size, NV_GFP_ATOMIC); \
|
||||
@@ -649,6 +633,26 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
free_pages(ptr, order); \
|
||||
}
|
||||
|
||||
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
|
||||
{
|
||||
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
|
||||
#if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(NV_PGPROT_DECRYPTED_PRESENT)
|
||||
/*
|
||||
* When AMD memory encryption is enabled, device memory mappings with the
|
||||
* C-bit set read as 0xFF, so ensure the bit is cleared for user mappings.
|
||||
*
|
||||
* If cc_mkdec() is present, then pgprot_decrypted() can't be used.
|
||||
*/
|
||||
#if defined(NV_CC_MKDEC_PRESENT)
|
||||
prot = __pgprot(__sme_clr(pgprot_val(vm_prot)));
|
||||
#else
|
||||
prot = pgprot_decrypted(prot);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return prot;
|
||||
}
|
||||
|
||||
#if defined(PAGE_KERNEL_NOENC)
|
||||
#if defined(__pgprot_mask)
|
||||
#define NV_PAGE_KERNEL_NOCACHE_NOENC __pgprot_mask(__PAGE_KERNEL_NOCACHE)
|
||||
@@ -670,7 +674,8 @@ static inline NvUPtr nv_vmap(struct page **pages, NvU32 page_count,
|
||||
#if defined(PAGE_KERNEL_NOENC)
|
||||
if (unencrypted)
|
||||
{
|
||||
prot = cached ? PAGE_KERNEL_NOENC : NV_PAGE_KERNEL_NOCACHE_NOENC;
|
||||
prot = cached ? nv_adjust_pgprot(PAGE_KERNEL_NOENC, 0) :
|
||||
nv_adjust_pgprot(NV_PAGE_KERNEL_NOCACHE_NOENC, 0);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@@ -838,10 +843,8 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
|
||||
})
|
||||
#endif
|
||||
|
||||
#if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.4.9
|
||||
#if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.18-rc1 for aarch64
|
||||
#define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_stop_and_remove_bus_device(pci_dev)
|
||||
#elif defined(NV_PCI_REMOVE_BUS_DEVICE_PRESENT) // introduced in 2.6
|
||||
#define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_remove_bus_device(pci_dev)
|
||||
#endif
|
||||
|
||||
#define NV_PRINT_AT(nv_debug_level,at) \
|
||||
@@ -957,26 +960,6 @@ static inline int nv_remap_page_range(struct vm_area_struct *vma,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
|
||||
{
|
||||
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
|
||||
#if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(NV_PGPROT_DECRYPTED_PRESENT)
|
||||
/*
|
||||
* When AMD memory encryption is enabled, device memory mappings with the
|
||||
* C-bit set read as 0xFF, so ensure the bit is cleared for user mappings.
|
||||
*
|
||||
* If cc_mkdec() is present, then pgprot_decrypted() can't be used.
|
||||
*/
|
||||
#if defined(NV_CC_MKDEC_PRESENT)
|
||||
prot = __pgprot(__sme_clr(pgprot_val(vm_prot)));
|
||||
#else
|
||||
prot = pgprot_decrypted(prot);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return prot;
|
||||
}
|
||||
|
||||
static inline int nv_io_remap_page_range(struct vm_area_struct *vma,
|
||||
NvU64 phys_addr, NvU64 size, NvU32 extra_prot)
|
||||
{
|
||||
@@ -1037,6 +1020,32 @@ static inline vm_fault_t nv_insert_pfn(struct vm_area_struct *vma,
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
/* Converts BAR index to Linux specific PCI BAR index */
|
||||
static inline NvU8 nv_bar_index_to_os_bar_index
|
||||
(
|
||||
struct pci_dev *dev,
|
||||
NvU8 nv_bar_index
|
||||
)
|
||||
{
|
||||
NvU8 bar_index = 0;
|
||||
NvU8 i;
|
||||
|
||||
BUG_ON(nv_bar_index >= NV_GPU_NUM_BARS);
|
||||
|
||||
for (i = 0; i < nv_bar_index; i++)
|
||||
{
|
||||
if (NV_PCI_RESOURCE_FLAGS(dev, bar_index) & PCI_BASE_ADDRESS_MEM_TYPE_64)
|
||||
{
|
||||
bar_index += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
bar_index++;
|
||||
}
|
||||
}
|
||||
|
||||
return bar_index;
|
||||
}
|
||||
|
||||
#define NV_PAGE_MASK (NvU64)(long)PAGE_MASK
|
||||
|
||||
@@ -1113,11 +1122,14 @@ static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
|
||||
{
|
||||
nvidia_stack_t *sp = NULL;
|
||||
#if defined(NVCPU_X86_64)
|
||||
sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache);
|
||||
if (sp == NULL)
|
||||
return -ENOMEM;
|
||||
sp->size = sizeof(sp->stack);
|
||||
sp->top = sp->stack + sp->size;
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache);
|
||||
if (sp == NULL)
|
||||
return -ENOMEM;
|
||||
sp->size = sizeof(sp->stack);
|
||||
sp->top = sp->stack + sp->size;
|
||||
}
|
||||
#endif
|
||||
*stack = sp;
|
||||
return 0;
|
||||
@@ -1126,7 +1138,7 @@ static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
|
||||
static inline void nv_kmem_cache_free_stack(nvidia_stack_t *stack)
|
||||
{
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (stack != NULL)
|
||||
if (stack != NULL && rm_is_altstack_in_use())
|
||||
{
|
||||
NV_KMEM_CACHE_FREE(stack, nvidia_stack_t_cache);
|
||||
}
|
||||
@@ -1161,16 +1173,6 @@ typedef struct nvidia_pte_s {
|
||||
unsigned int page_count;
|
||||
} nvidia_pte_t;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
typedef struct nv_alloc_s {
|
||||
struct nv_alloc_s *next;
|
||||
struct device *dev;
|
||||
@@ -1181,7 +1183,7 @@ typedef struct nv_alloc_s {
|
||||
NvBool zeroed : 1;
|
||||
NvBool aliased : 1;
|
||||
NvBool user : 1;
|
||||
NvBool node0 : 1;
|
||||
NvBool node : 1;
|
||||
NvBool peer_io : 1;
|
||||
NvBool physical : 1;
|
||||
NvBool unencrypted : 1;
|
||||
@@ -1195,6 +1197,7 @@ typedef struct nv_alloc_s {
|
||||
unsigned int pid;
|
||||
struct page **user_pages;
|
||||
NvU64 guest_id; /* id of guest VM */
|
||||
NvS32 node_id; /* Node id for memory allocation when node is set in flags */
|
||||
void *import_priv;
|
||||
struct sg_table *import_sgt;
|
||||
} nv_alloc_t;
|
||||
@@ -1370,8 +1373,7 @@ typedef struct nv_dma_map_s {
|
||||
* xen_swiotlb_map_sg_attrs may try to route to the SWIOTLB. We must only use
|
||||
* single-page sg elements on Xen Server.
|
||||
*/
|
||||
#if defined(NV_SG_ALLOC_TABLE_FROM_PAGES_PRESENT) && \
|
||||
!defined(NV_DOM0_KERNEL_PRESENT)
|
||||
#if !defined(NV_DOM0_KERNEL_PRESENT)
|
||||
#define NV_ALLOC_DMA_SUBMAP_SCATTERLIST(dm, sm, i) \
|
||||
((sg_alloc_table_from_pages(&sm->sgt, \
|
||||
&dm->pages[NV_DMA_SUBMAP_IDX_TO_PAGE_IDX(i)], \
|
||||
@@ -1413,34 +1415,6 @@ struct os_wait_queue {
|
||||
struct completion q;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* To report error in msi/msix when unhandled count reaches a threshold
|
||||
*/
|
||||
@@ -1464,18 +1438,23 @@ struct nv_dma_device {
|
||||
NvBool nvlink;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
|
||||
/*
|
||||
* acpi data storage structure
|
||||
*
|
||||
* This structure retains the pointer to the device,
|
||||
* and any other baggage we want to carry along
|
||||
*
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
nvidia_stack_t *sp;
|
||||
struct acpi_device *device;
|
||||
struct acpi_handle *handle;
|
||||
void *notifier_data;
|
||||
int notify_handler_installed;
|
||||
} nv_acpi_t;
|
||||
#endif
|
||||
|
||||
/* linux-specific version of old nv_state_t */
|
||||
/* this is a general os-specific state structure. the first element *must* be
|
||||
@@ -1492,11 +1471,6 @@ typedef struct nv_linux_state_s {
|
||||
/* IBM-NPU info associated with this GPU */
|
||||
nv_ibmnpu_info_t *npu;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* NUMA node information for the platforms where GPU memory is presented
|
||||
* as a NUMA node to the kernel */
|
||||
struct {
|
||||
@@ -1576,25 +1550,13 @@ typedef struct nv_linux_state_s {
|
||||
/* Per-device notifier block for ACPI events */
|
||||
struct notifier_block acpi_nb;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
|
||||
nv_acpi_t* nv_acpi_object;
|
||||
#endif
|
||||
|
||||
/* Lock serializing ISRs for different SOC vectors */
|
||||
nv_spinlock_t soc_isr_lock;
|
||||
void *soc_bh_mutex;
|
||||
|
||||
struct nv_timer snapshot_timer;
|
||||
nv_spinlock_t snapshot_timer_lock;
|
||||
@@ -1640,24 +1602,6 @@ extern struct rw_semaphore nv_system_pm_lock;
|
||||
|
||||
extern NvBool nv_ats_supported;
|
||||
|
||||
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
|
||||
/*
|
||||
* acpi data storage structure
|
||||
*
|
||||
* This structure retains the pointer to the device,
|
||||
* and any other baggage we want to carry along
|
||||
*
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
nvidia_stack_t *sp;
|
||||
struct acpi_device *device;
|
||||
struct acpi_handle *handle;
|
||||
int notify_handler_installed;
|
||||
} nv_acpi_t;
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* file-private data
|
||||
* hide a pointer to our data structures in a file-private ptr
|
||||
@@ -1714,6 +1658,27 @@ static inline nv_linux_file_private_t *nv_get_nvlfp_from_nvfp(nv_file_private_t
|
||||
|
||||
#define NV_STATE_PTR(nvl) &(((nv_linux_state_t *)(nvl))->nv_state)
|
||||
|
||||
static inline nvidia_stack_t *nv_nvlfp_get_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which)
|
||||
{
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
down(&nvlfp->fops_sp_lock[which]);
|
||||
return nvlfp->fops_sp[which];
|
||||
}
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void nv_nvlfp_put_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which)
|
||||
{
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
up(&nvlfp->fops_sp_lock[which]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NV_ATOMIC_READ(data) atomic_read(&(data))
|
||||
#define NV_ATOMIC_SET(data,val) atomic_set(&(data), (val))
|
||||
@@ -1760,12 +1725,10 @@ static inline struct kmem_cache *nv_kmem_cache_create(const char *name, unsigned
|
||||
return cache;
|
||||
}
|
||||
|
||||
|
||||
#if defined(CONFIG_PCI_IOV)
|
||||
#define NV_PCI_SRIOV_SUPPORT
|
||||
#endif /* CONFIG_PCI_IOV */
|
||||
|
||||
|
||||
#define NV_PCIE_CFG_MAX_OFFSET 0x1000
|
||||
|
||||
#include "nv-proto.h"
|
||||
@@ -1788,6 +1751,7 @@ static inline NV_STATUS nv_check_gpu_state(nv_state_t *nv)
|
||||
|
||||
extern NvU32 NVreg_EnableUserNUMAManagement;
|
||||
extern NvU32 NVreg_RegisterPCIDriver;
|
||||
extern NvU32 NVreg_EnableResizableBar;
|
||||
|
||||
extern NvU32 num_probed_nv_devices;
|
||||
extern NvU32 num_nv_devices;
|
||||
@@ -1944,25 +1908,12 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
|
||||
#define NV_GET_UNUSED_FD_FLAGS(flags) (-1)
|
||||
#endif
|
||||
|
||||
#if defined(NV_SET_CLOSE_ON_EXEC_PRESENT)
|
||||
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) __set_close_on_exec(fd, fdt)
|
||||
#elif defined(NV_LINUX_TIME_H_PRESENT) && defined(FD_SET)
|
||||
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) FD_SET(fd, fdt->close_on_exec)
|
||||
#else
|
||||
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) __set_bit(fd, fdt->close_on_exec)
|
||||
#endif
|
||||
|
||||
#define MODULE_BASE_NAME "nvidia"
|
||||
#define MODULE_INSTANCE_NUMBER 0
|
||||
#define MODULE_INSTANCE_STRING ""
|
||||
#define MODULE_NAME MODULE_BASE_NAME MODULE_INSTANCE_STRING
|
||||
|
||||
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32, const char*);
|
||||
|
||||
static inline void nv_mutex_destroy(struct mutex *lock)
|
||||
{
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -73,21 +73,4 @@
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT)
|
||||
#if NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT == 3
|
||||
#define nv_hlist_for_each_entry(pos, head, member) \
|
||||
hlist_for_each_entry(pos, head, member)
|
||||
#else
|
||||
#if !defined(hlist_entry_safe)
|
||||
#define hlist_entry_safe(ptr, type, member) \
|
||||
(ptr) ? hlist_entry(ptr, type, member) : NULL
|
||||
#endif
|
||||
|
||||
#define nv_hlist_for_each_entry(pos, head, member) \
|
||||
for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member); \
|
||||
pos; \
|
||||
pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
|
||||
#endif
|
||||
#endif // NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT
|
||||
|
||||
#endif // __NV_LIST_HELPERS_H__
|
||||
|
||||
@@ -29,6 +29,25 @@
|
||||
typedef int vm_fault_t;
|
||||
#endif
|
||||
|
||||
/* pin_user_pages
|
||||
* Presence of pin_user_pages() also implies the presence of unpin-user_page().
|
||||
* Both were added in the v5.6-rc1
|
||||
*
|
||||
* pin_user_pages() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6-rc1 (2020-01-30)
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#if defined(NV_PIN_USER_PAGES_PRESENT)
|
||||
#define NV_PIN_USER_PAGES pin_user_pages
|
||||
#define NV_UNPIN_USER_PAGE unpin_user_page
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES NV_GET_USER_PAGES
|
||||
#define NV_UNPIN_USER_PAGE put_page
|
||||
#endif // NV_PIN_USER_PAGES_PRESENT
|
||||
|
||||
/* get_user_pages
|
||||
*
|
||||
* The 8-argument version of get_user_pages was deprecated by commit
|
||||
@@ -47,69 +66,57 @@ typedef int vm_fault_t;
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_HAS_TASK_STRUCT)
|
||||
#if defined(NV_GET_USER_PAGES_HAS_WRITE_AND_FORCE_ARGS)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, write, force, pages, vmas) \
|
||||
get_user_pages(current, current->mm, start, nr_pages, write, force, pages, vmas)
|
||||
#else
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
static inline long NV_GET_USER_PAGES(unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
return get_user_pages(current, current->mm, start, nr_pages, flags,
|
||||
pages, vmas);
|
||||
}
|
||||
#endif
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
|
||||
#define NV_GET_USER_PAGES get_user_pages
|
||||
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages, vmas) \
|
||||
get_user_pages(current, current->mm, start, nr_pages, flags, pages, vmas)
|
||||
#else
|
||||
#if defined(NV_GET_USER_PAGES_HAS_WRITE_AND_FORCE_ARGS)
|
||||
#define NV_GET_USER_PAGES get_user_pages
|
||||
static inline long NV_GET_USER_PAGES(unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
{
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE)
|
||||
return get_user_pages(start, nr_pages, write, force, pages, vmas);
|
||||
#else
|
||||
#include <linux/mm.h>
|
||||
// NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE
|
||||
return get_user_pages(current, current->mm, start, nr_pages, write,
|
||||
force, pages, vmas);
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE
|
||||
}
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS
|
||||
|
||||
static inline long NV_GET_USER_PAGES(unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
/* pin_user_pages_remote
|
||||
*
|
||||
* pin_user_pages_remote() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6 (2020-01-30)
|
||||
*
|
||||
* pin_user_pages_remote() removed 'tsk' parameter by commit
|
||||
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
|
||||
* in v5.9-rc1 (2020-08-11). *
|
||||
*
|
||||
*/
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
return get_user_pages(start, nr_pages, flags, pages, vmas);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#if defined(NV_PIN_USER_PAGES_REMOTE_PRESENT)
|
||||
#if defined (NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK)
|
||||
#define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
pin_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES_REMOTE pin_user_pages_remote
|
||||
#endif // NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES_REMOTE NV_GET_USER_PAGES_REMOTE
|
||||
#endif // NV_PIN_USER_PAGES_REMOTE_PRESENT
|
||||
|
||||
/*
|
||||
* get_user_pages_remote() was added by commit 1e9877902dc7
|
||||
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6 (2016-02-12).
|
||||
*
|
||||
* The very next commit cde70140fed8 ("mm/gup: Overload get_user_pages()
|
||||
* functions") deprecated the 8-argument version of get_user_pages for the
|
||||
* non-remote case (calling get_user_pages with current and current->mm).
|
||||
*
|
||||
* The guidelines are: call NV_GET_USER_PAGES_REMOTE if you need the 8-argument
|
||||
* version that uses something other than current and current->mm. Use
|
||||
* NV_GET_USER_PAGES if you are refering to current and current->mm.
|
||||
*
|
||||
* Note that get_user_pages_remote() requires the caller to hold a reference on
|
||||
* the task_struct (if non-NULL and if this API has tsk argument) and the mm_struct.
|
||||
* This will always be true when using current and current->mm. If the kernel passes
|
||||
@@ -131,71 +138,55 @@ typedef int vm_fault_t;
|
||||
*/
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_PRESENT)
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_WRITE_AND_FORCE_ARGS)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas)
|
||||
|
||||
#else
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk,
|
||||
struct mm_struct *mm,
|
||||
// NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
struct vm_area_struct **vmas,
|
||||
int *locked)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_LOCKED_ARG)
|
||||
#if defined (NV_GET_USER_PAGES_REMOTE_HAS_TSK_ARG)
|
||||
return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
|
||||
pages, vmas, NULL);
|
||||
#else
|
||||
return get_user_pages_remote(mm, start, nr_pages, flags,
|
||||
pages, vmas, NULL);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
|
||||
pages, vmas);
|
||||
|
||||
#endif
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
return get_user_pages_remote(NULL, mm, start, nr_pages, write, force,
|
||||
pages, vmas);
|
||||
}
|
||||
#endif
|
||||
#endif // NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED
|
||||
#else
|
||||
#if defined(NV_GET_USER_PAGES_HAS_WRITE_AND_FORCE_ARGS)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages
|
||||
#else
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk,
|
||||
struct mm_struct *mm,
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE)
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
struct vm_area_struct **vmas,
|
||||
int *locked)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
return get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
|
||||
return get_user_pages(NULL, mm, start, nr_pages, write, force, pages, vmas);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages(NULL, mm, start, nr_pages, flags, pages, vmas)
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE
|
||||
#endif // NV_GET_USER_PAGES_REMOTE_PRESENT
|
||||
|
||||
/*
|
||||
* The .virtual_address field was effectively renamed to .address, by these
|
||||
@@ -270,4 +261,22 @@ static inline struct rw_semaphore *nv_mmap_get_lock(struct mm_struct *mm)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
{
|
||||
#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
vm_flags_set(vma, flags);
|
||||
#else
|
||||
vma->vm_flags |= flags;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
{
|
||||
#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
vm_flags_clear(vma, flags);
|
||||
#else
|
||||
vma->vm_flags &= ~flags;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // __NV_MM_H__
|
||||
|
||||
@@ -27,16 +27,8 @@
|
||||
#include <linux/pci.h>
|
||||
#include "nv-linux.h"
|
||||
|
||||
#if defined(NV_DEV_IS_PCI_PRESENT)
|
||||
#define nv_dev_is_pci(dev) dev_is_pci(dev)
|
||||
#else
|
||||
/*
|
||||
* Non-PCI devices are only supported on kernels which expose the
|
||||
* dev_is_pci() function. For older kernels, we only support PCI
|
||||
* devices, hence returning true to take all the PCI code paths.
|
||||
*/
|
||||
#define nv_dev_is_pci(dev) (true)
|
||||
#endif
|
||||
#define NV_GPU_BAR1 1
|
||||
#define NV_GPU_BAR3 3
|
||||
|
||||
int nv_pci_register_driver(void);
|
||||
void nv_pci_unregister_driver(void);
|
||||
|
||||
@@ -74,21 +74,8 @@ typedef struct file_operations nv_proc_ops_t;
|
||||
__entry; \
|
||||
})
|
||||
|
||||
/*
|
||||
* proc_mkdir_mode exists in Linux 2.6.9, but isn't exported until Linux 3.0.
|
||||
* Use the older interface instead unless the newer interface is necessary.
|
||||
*/
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
# define NV_PROC_MKDIR_MODE(name, mode, parent) \
|
||||
proc_mkdir_mode(name, mode, parent)
|
||||
#else
|
||||
# define NV_PROC_MKDIR_MODE(name, mode, parent) \
|
||||
({ \
|
||||
struct proc_dir_entry *__entry; \
|
||||
__entry = create_proc_entry(name, mode, parent); \
|
||||
__entry; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#define NV_CREATE_PROC_DIR(name,parent) \
|
||||
({ \
|
||||
@@ -104,16 +91,6 @@ typedef struct file_operations nv_proc_ops_t;
|
||||
#define NV_PDE_DATA(inode) PDE_DATA(inode)
|
||||
#endif
|
||||
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
# define NV_REMOVE_PROC_ENTRY(entry) \
|
||||
proc_remove(entry);
|
||||
#else
|
||||
# define NV_REMOVE_PROC_ENTRY(entry) \
|
||||
remove_proc_entry(entry->name, entry->parent);
|
||||
#endif
|
||||
|
||||
void nv_procfs_unregister_all(struct proc_dir_entry *entry,
|
||||
struct proc_dir_entry *delimiter);
|
||||
#define NV_DEFINE_SINGLE_PROCFS_FILE_HELPER(name, lock) \
|
||||
static int nv_procfs_open_##name( \
|
||||
struct inode *inode, \
|
||||
|
||||
@@ -27,9 +27,6 @@
|
||||
#include "nv-pci.h"
|
||||
#include "nv-register-module.h"
|
||||
|
||||
|
||||
|
||||
|
||||
extern const char *nv_device_name;
|
||||
extern nvidia_module_t nv_fops;
|
||||
|
||||
@@ -57,8 +54,6 @@ void nv_free_contig_pages (nv_alloc_t *);
|
||||
NV_STATUS nv_alloc_system_pages (nv_state_t *, nv_alloc_t *);
|
||||
void nv_free_system_pages (nv_alloc_t *);
|
||||
|
||||
void nv_address_space_init_once (struct address_space *mapping);
|
||||
|
||||
int nv_uvm_init (void);
|
||||
void nv_uvm_exit (void);
|
||||
NV_STATUS nv_uvm_suspend (void);
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
#include <nvstatus.h>
|
||||
#include "nv_stdarg.h"
|
||||
#include <nv-caps.h>
|
||||
#include <nv-firmware.h>
|
||||
#include <nv-ioctl.h>
|
||||
#include <nvmisc.h>
|
||||
|
||||
@@ -47,11 +48,6 @@ extern nv_cap_t *nvidia_caps_root;
|
||||
|
||||
extern const NvBool nv_is_rm_firmware_supported_os;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#include <nv-kernel-interface-api.h>
|
||||
|
||||
/* NVIDIA's reserved major character device number (Linux). */
|
||||
@@ -165,8 +161,14 @@ typedef enum _TEGRASOC_WHICH_CLK
|
||||
TEGRASOC_WHICH_CLK_MAUD,
|
||||
TEGRASOC_WHICH_CLK_AZA_2XBIT,
|
||||
TEGRASOC_WHICH_CLK_AZA_BIT,
|
||||
TEGRA234_CLK_MIPI_CAL,
|
||||
TEGRA234_CLK_UART_FST_MIPI_CAL,
|
||||
TEGRASOC_WHICH_CLK_MIPI_CAL,
|
||||
TEGRASOC_WHICH_CLK_UART_FST_MIPI_CAL,
|
||||
TEGRASOC_WHICH_CLK_SOR0_DIV,
|
||||
TEGRASOC_WHICH_CLK_DISP_ROOT,
|
||||
TEGRASOC_WHICH_CLK_HUB_ROOT,
|
||||
TEGRASOC_WHICH_CLK_PLLA_DISP,
|
||||
TEGRASOC_WHICH_CLK_PLLA_DISPHUB,
|
||||
TEGRASOC_WHICH_CLK_PLLA,
|
||||
TEGRASOC_WHICH_CLK_MAX, // TEGRASOC_WHICH_CLK_MAX is defined for boundary checks only.
|
||||
} TEGRASOC_WHICH_CLK;
|
||||
|
||||
@@ -286,6 +288,7 @@ typedef struct nv_usermap_access_params_s
|
||||
NvU64 access_size;
|
||||
NvU64 remap_prot_extra;
|
||||
NvBool contig;
|
||||
NvU32 caching;
|
||||
} nv_usermap_access_params_t;
|
||||
|
||||
/*
|
||||
@@ -303,14 +306,16 @@ typedef struct nv_alloc_mapping_context_s {
|
||||
NvU64 remap_prot_extra;
|
||||
NvU32 prot;
|
||||
NvBool valid;
|
||||
NvU32 caching;
|
||||
} nv_alloc_mapping_context_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_SOC_IRQ_DISPLAY_TYPE,
|
||||
NV_SOC_IRQ_DISPLAY_TYPE = 0x1,
|
||||
NV_SOC_IRQ_DPAUX_TYPE,
|
||||
NV_SOC_IRQ_GPIO_TYPE,
|
||||
NV_SOC_IRQ_HDACODEC_TYPE,
|
||||
NV_SOC_IRQ_TCPC2DISP_TYPE,
|
||||
NV_SOC_IRQ_INVALID_TYPE
|
||||
} nv_soc_irq_type_t;
|
||||
|
||||
@@ -325,12 +330,16 @@ typedef struct nv_soc_irq_info_s {
|
||||
NvU32 gpio_num;
|
||||
NvU32 dpaux_instance;
|
||||
} irq_data;
|
||||
NvS32 ref_count;
|
||||
} nv_soc_irq_info_t;
|
||||
|
||||
#define NV_MAX_SOC_IRQS 6
|
||||
#define NV_MAX_DPAUX_NUM_DEVICES 4
|
||||
#define NV_MAX_SOC_DPAUX_NUM_DEVICES 2 // From SOC_DEV_MAPPING
|
||||
|
||||
#define NV_IGPU_LEGACY_STALL_IRQ 70
|
||||
#define NV_IGPU_MAX_STALL_IRQS 3
|
||||
#define NV_IGPU_MAX_NONSTALL_IRQS 1
|
||||
/*
|
||||
* per device state
|
||||
*/
|
||||
@@ -367,6 +376,8 @@ typedef struct nv_state_t
|
||||
nv_aperture_t *hdacodec_regs;
|
||||
nv_aperture_t *mipical_regs;
|
||||
nv_aperture_t *fb, ud;
|
||||
nv_aperture_t *simregs;
|
||||
nv_aperture_t *emc_regs;
|
||||
|
||||
NvU32 num_dpaux_instance;
|
||||
NvU32 interrupt_line;
|
||||
@@ -375,10 +386,17 @@ typedef struct nv_state_t
|
||||
NvS32 current_soc_irq;
|
||||
NvU32 num_soc_irqs;
|
||||
NvU32 hdacodec_irq;
|
||||
NvU32 tcpc2disp_irq;
|
||||
NvU8 *soc_dcb_blob;
|
||||
NvU32 soc_dcb_size;
|
||||
NvU32 disp_sw_soc_chip_id;
|
||||
NvBool soc_is_dpalt_mode_supported;
|
||||
|
||||
NvU32 igpu_stall_irq[NV_IGPU_MAX_STALL_IRQS];
|
||||
NvU32 igpu_nonstall_irq;
|
||||
NvU32 num_stall_irqs;
|
||||
NvU64 dma_mask;
|
||||
|
||||
NvBool primary_vga;
|
||||
|
||||
NvU32 sim_env;
|
||||
@@ -424,9 +442,6 @@ typedef struct nv_state_t
|
||||
/* Variable to force allocation of 32-bit addressable memory */
|
||||
NvBool force_dma32_alloc;
|
||||
|
||||
/* Variable to track if device has entered dynamic power state */
|
||||
NvBool dynamic_power_entered;
|
||||
|
||||
/* PCI power state should be D0 during system suspend */
|
||||
NvBool d0_state_in_suspend;
|
||||
|
||||
@@ -456,6 +471,12 @@ typedef struct nv_state_t
|
||||
|
||||
NvBool printed_openrm_enable_unsupported_gpus_error;
|
||||
|
||||
/* Check if NVPCF DSM function is implemented under NVPCF or GPU device scope */
|
||||
NvBool nvpcf_dsm_in_gpu_scope;
|
||||
|
||||
/* Bool to check if the device received a shutdown notification */
|
||||
NvBool is_shutdown;
|
||||
|
||||
} nv_state_t;
|
||||
|
||||
// These define need to be in sync with defines in system.h
|
||||
@@ -464,6 +485,10 @@ typedef struct nv_state_t
|
||||
#define OS_TYPE_SUNOS 0x3
|
||||
#define OS_TYPE_VMWARE 0x4
|
||||
|
||||
#define NVFP_TYPE_NONE 0x0
|
||||
#define NVFP_TYPE_REFCOUNTED 0x1
|
||||
#define NVFP_TYPE_REGISTERED 0x2
|
||||
|
||||
struct nv_file_private_t
|
||||
{
|
||||
NvHandle *handles;
|
||||
@@ -473,6 +498,7 @@ struct nv_file_private_t
|
||||
|
||||
nv_file_private_t *ctl_nvfp;
|
||||
void *ctl_nvfp_priv;
|
||||
NvU32 register_or_refcount;
|
||||
};
|
||||
|
||||
// Forward define the gpu ops structures
|
||||
@@ -504,8 +530,9 @@ typedef struct UvmGpuChannelResourceBindParams_tag *nvgpuChannelResourceBindPar
|
||||
typedef struct UvmGpuPagingChannelAllocParams_tag nvgpuPagingChannelAllocParams_t;
|
||||
typedef struct UvmGpuPagingChannel_tag *nvgpuPagingChannelHandle_t;
|
||||
typedef struct UvmGpuPagingChannelInfo_tag *nvgpuPagingChannelInfo_t;
|
||||
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU32, NvU64 *, NvU32, NvU64, NvU64);
|
||||
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64);
|
||||
typedef enum UvmPmaGpuMemoryType_tag nvgpuGpuMemoryType_t;
|
||||
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU32, NvU64 *, NvU32, NvU64, NvU64, nvgpuGpuMemoryType_t);
|
||||
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemoryType_t);
|
||||
|
||||
/*
|
||||
* flags
|
||||
@@ -520,7 +547,7 @@ typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64);
|
||||
#define NV_FLAG_USES_MSIX 0x0040
|
||||
#define NV_FLAG_PASSTHRU 0x0080
|
||||
#define NV_FLAG_SUSPENDED 0x0100
|
||||
// Unused 0x0200
|
||||
#define NV_FLAG_SOC_IGPU 0x0200
|
||||
// Unused 0x0400
|
||||
#define NV_FLAG_PERSISTENT_SW_STATE 0x0800
|
||||
#define NV_FLAG_IN_RECOVERY 0x1000
|
||||
@@ -557,30 +584,21 @@ typedef enum
|
||||
NV_POWER_STATE_RUNNING
|
||||
} nv_power_state_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_GSP,
|
||||
NV_FIRMWARE_GSP_LOG
|
||||
} nv_firmware_t;
|
||||
|
||||
#define NV_PRIMARY_VGA(nv) ((nv)->primary_vga)
|
||||
|
||||
#define NV_IS_CTL_DEVICE(nv) ((nv)->flags & NV_FLAG_CONTROL)
|
||||
#define NV_IS_SOC_DISPLAY_DEVICE(nv) \
|
||||
((nv)->flags & NV_FLAG_SOC_DISPLAY)
|
||||
|
||||
#define NV_IS_SOC_IGPU_DEVICE(nv) \
|
||||
((nv)->flags & NV_FLAG_SOC_IGPU)
|
||||
|
||||
#define NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv) \
|
||||
(((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0)
|
||||
|
||||
#define NV_SOC_IS_ISO_IOMMU_PRESENT(nv) \
|
||||
((nv)->iso_iommu_present)
|
||||
|
||||
/*
|
||||
* NVIDIA ACPI event ID to be passed into the core NVIDIA driver for
|
||||
* AC/DC event.
|
||||
*/
|
||||
#define NV_SYSTEM_ACPI_BATTERY_POWER_EVENT 0x8002
|
||||
|
||||
/*
|
||||
* GPU add/remove events
|
||||
*/
|
||||
@@ -592,8 +610,6 @@ typedef enum
|
||||
* to core NVIDIA driver for ACPI events.
|
||||
*/
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_DISPLAY_SWITCH_DEFAULT 0
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_POWER_EVENT_AC 0
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_POWER_EVENT_BATTERY 1
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_DOCK_EVENT_UNDOCKED 0
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_DOCK_EVENT_DOCKED 1
|
||||
|
||||
@@ -604,14 +620,18 @@ typedef enum
|
||||
#define NV_EVAL_ACPI_METHOD_NVIF 0x01
|
||||
#define NV_EVAL_ACPI_METHOD_WMMX 0x02
|
||||
|
||||
#define NV_I2C_CMD_READ 1
|
||||
#define NV_I2C_CMD_WRITE 2
|
||||
#define NV_I2C_CMD_SMBUS_READ 3
|
||||
#define NV_I2C_CMD_SMBUS_WRITE 4
|
||||
#define NV_I2C_CMD_SMBUS_QUICK_WRITE 5
|
||||
#define NV_I2C_CMD_SMBUS_QUICK_READ 6
|
||||
#define NV_I2C_CMD_SMBUS_BLOCK_READ 7
|
||||
#define NV_I2C_CMD_SMBUS_BLOCK_WRITE 8
|
||||
typedef enum {
|
||||
NV_I2C_CMD_READ = 1,
|
||||
NV_I2C_CMD_WRITE,
|
||||
NV_I2C_CMD_SMBUS_READ,
|
||||
NV_I2C_CMD_SMBUS_WRITE,
|
||||
NV_I2C_CMD_SMBUS_QUICK_WRITE,
|
||||
NV_I2C_CMD_SMBUS_QUICK_READ,
|
||||
NV_I2C_CMD_SMBUS_BLOCK_READ,
|
||||
NV_I2C_CMD_SMBUS_BLOCK_WRITE,
|
||||
NV_I2C_CMD_BLOCK_READ,
|
||||
NV_I2C_CMD_BLOCK_WRITE
|
||||
} nv_i2c_cmd_t;
|
||||
|
||||
// Flags needed by OSAllocPagesNode
|
||||
#define NV_ALLOC_PAGES_NODE_NONE 0x0
|
||||
@@ -627,18 +647,15 @@ typedef enum
|
||||
static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((offset >= nv->regs->cpu_address) &&
|
||||
|
||||
|
||||
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
|
||||
}
|
||||
|
||||
static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((nv->fb) && (offset >= nv->fb->cpu_address) &&
|
||||
|
||||
|
||||
|
||||
return ((nv->fb) && (nv->fb->size != 0) &&
|
||||
(offset >= nv->fb->cpu_address) &&
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
|
||||
}
|
||||
|
||||
@@ -646,9 +663,7 @@ static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
|
||||
(offset >= nv->ud.cpu_address) &&
|
||||
|
||||
|
||||
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
|
||||
}
|
||||
|
||||
@@ -657,9 +672,7 @@ static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
|
||||
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
|
||||
(offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
|
||||
|
||||
|
||||
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
|
||||
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
|
||||
}
|
||||
@@ -731,7 +744,7 @@ nv_state_t* NV_API_CALL nv_get_ctl_state (void);
|
||||
void NV_API_CALL nv_set_dma_address_size (nv_state_t *, NvU32 );
|
||||
|
||||
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU32, NvU32, NvU64, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvBool, NvU32, NvBool, NvBool, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_free_pages (nv_state_t *, NvU32, NvBool, NvU32, void *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_register_user_pages (nv_state_t *, NvU64, NvU64 *, void *, void **);
|
||||
@@ -793,13 +806,13 @@ NV_STATUS NV_API_CALL nv_pci_trigger_recovery (nv_state_t *);
|
||||
NvBool NV_API_CALL nv_requires_dma_remap (nv_state_t *);
|
||||
|
||||
NvBool NV_API_CALL nv_is_rm_firmware_active(nv_state_t *);
|
||||
const void*NV_API_CALL nv_get_firmware(nv_state_t *, nv_firmware_t, const void **, NvU32 *);
|
||||
const void*NV_API_CALL nv_get_firmware(nv_state_t *, nv_firmware_type_t, nv_firmware_chip_family_t, const void **, NvU32 *);
|
||||
void NV_API_CALL nv_put_firmware(const void *);
|
||||
|
||||
nv_file_private_t* NV_API_CALL nv_get_file_private(NvS32, NvBool, void **);
|
||||
void NV_API_CALL nv_put_file_private(void *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU32 *, NvU32 *, NvS32 *);
|
||||
NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU32 *, NvS32 *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_ibmnpu_genreg_info(nv_state_t *, NvU64 *, NvU64 *, void**);
|
||||
NV_STATUS NV_API_CALL nv_get_ibmnpu_relaxed_ordering_mode(nv_state_t *nv, NvBool *mode);
|
||||
@@ -834,6 +847,7 @@ NV_STATUS NV_API_CALL nv_acquire_fabric_mgmt_cap (int, int*);
|
||||
int NV_API_CALL nv_cap_drv_init(void);
|
||||
void NV_API_CALL nv_cap_drv_exit(void);
|
||||
NvBool NV_API_CALL nv_is_gpu_accessible(nv_state_t *);
|
||||
NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
|
||||
|
||||
NvU32 NV_API_CALL nv_get_os_type(void);
|
||||
|
||||
@@ -850,11 +864,9 @@ void NV_API_CALL nv_dma_release_dma_buf (void *, nv_dma_buf_t *);
|
||||
|
||||
void NV_API_CALL nv_schedule_uvm_isr (nv_state_t *);
|
||||
|
||||
|
||||
NvBool NV_API_CALL nv_platform_supports_s0ix (void);
|
||||
NvBool NV_API_CALL nv_s2idle_pm_configured (void);
|
||||
|
||||
|
||||
NvBool NV_API_CALL nv_is_chassis_notebook (void);
|
||||
void NV_API_CALL nv_allow_runtime_suspend (nv_state_t *nv);
|
||||
void NV_API_CALL nv_disallow_runtime_suspend (nv_state_t *nv);
|
||||
@@ -864,45 +876,6 @@ typedef void (*nvTegraDceClientIpcCallback)(NvU32, NvU32, NvU32, void *, void *)
|
||||
NV_STATUS NV_API_CALL nv_get_num_phys_pages (void *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL nv_get_phys_pages (void *, void *, NvU32 *);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------
|
||||
*
|
||||
@@ -963,11 +936,11 @@ NvBool NV_API_CALL rm_is_supported_pci_device(NvU8 pci_class,
|
||||
|
||||
void NV_API_CALL rm_i2c_remove_adapters (nvidia_stack_t *, nv_state_t *);
|
||||
NvBool NV_API_CALL rm_i2c_is_smbus_capable (nvidia_stack_t *, nv_state_t *, void *);
|
||||
NV_STATUS NV_API_CALL rm_i2c_transfer (nvidia_stack_t *, nv_state_t *, void *, NvU8, NvU8, NvU8, NvU32, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_i2c_transfer (nvidia_stack_t *, nv_state_t *, void *, nv_i2c_cmd_t, NvU8, NvU8, NvU32, NvU8 *);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_perform_version_check (nvidia_stack_t *, void *, NvU32);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_system_event (nvidia_stack_t *, NvU32, NvU32);
|
||||
void NV_API_CALL rm_power_source_change_event (nvidia_stack_t *, NvU32);
|
||||
|
||||
void NV_API_CALL rm_disable_gpu_state_persistence (nvidia_stack_t *sp, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU64 *, NvU64 *, NvU64 *, NvU64 *, NvU64, NvU64, NvU64, NvU64, void (*)(void *), void *);
|
||||
@@ -990,7 +963,7 @@ NV_STATUS NV_API_CALL rm_log_gpu_crash (nv_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_kernel_rmapi_op(nvidia_stack_t *sp, void *ops_cmd);
|
||||
NvBool NV_API_CALL rm_get_device_remove_flag(nvidia_stack_t *sp, NvU32 gpu_id);
|
||||
NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults_unlocked(nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_handle_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
NvBool NV_API_CALL rm_gpu_need_4k_page_isolation(nv_state_t *);
|
||||
NvBool NV_API_CALL rm_is_chipset_io_coherent(nv_stack_t *);
|
||||
NvBool NV_API_CALL rm_init_event_locks(nvidia_stack_t *, nv_state_t *);
|
||||
@@ -1016,12 +989,14 @@ const char* NV_API_CALL rm_get_dynamic_power_management_status(nvidia_stack_t *,
|
||||
const char* NV_API_CALL rm_get_gpu_gcx_support(nvidia_stack_t *, nv_state_t *, NvBool);
|
||||
|
||||
void NV_API_CALL rm_acpi_notify(nvidia_stack_t *, nv_state_t *, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_get_clientnvpcf_power_limits(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *);
|
||||
void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
|
||||
|
||||
NvBool NV_API_CALL rm_is_altstack_in_use(void);
|
||||
|
||||
/* vGPU VFIO specific functions */
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32, NvBool *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 **, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32);
|
||||
@@ -1034,6 +1009,10 @@ NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_acces
|
||||
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);
|
||||
void NV_API_CALL nv_flush_coherent_cpu_cache_range(nv_state_t *nv, NvU64 cpu_virtual, NvU64 size);
|
||||
|
||||
#if defined(NV_VMWARE)
|
||||
const void* NV_API_CALL rm_get_firmware(nv_firmware_type_t fw_type, const void **fw_buf, NvU32 *fw_size);
|
||||
#endif
|
||||
|
||||
/* Callbacks should occur roughly every 10ms. */
|
||||
#define NV_SNAPSHOT_TIMER_HZ 100
|
||||
void NV_API_CALL nv_start_snapshot_timer(void (*snapshot_callback)(void *context));
|
||||
@@ -1045,17 +1024,15 @@ static inline const NvU8 *nv_get_cached_uuid(nv_state_t *nv)
|
||||
return nv->nv_uuid_cache.valid ? nv->nv_uuid_cache.uuid : NULL;
|
||||
}
|
||||
|
||||
/* nano second resolution timer callback structure */
|
||||
typedef struct nv_nano_timer nv_nano_timer_t;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* nano timer functions */
|
||||
void NV_API_CALL nv_create_nano_timer(nv_state_t *, void *pTmrEvent, nv_nano_timer_t **);
|
||||
void NV_API_CALL nv_start_nano_timer(nv_state_t *nv, nv_nano_timer_t *, NvU64 timens);
|
||||
NV_STATUS NV_API_CALL rm_run_nano_timer_callback(nvidia_stack_t *, nv_state_t *, void *pTmrEvent);
|
||||
void NV_API_CALL nv_cancel_nano_timer(nv_state_t *, nv_nano_timer_t *);
|
||||
void NV_API_CALL nv_destroy_nano_timer(nv_state_t *nv, nv_nano_timer_t *);
|
||||
|
||||
#if defined(NVCPU_X86_64)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -201,7 +201,8 @@ void nvUvmInterfaceAddressSpaceDestroy(uvmGpuAddressSpaceHandle vaSpace);
|
||||
and will return a unique GPU virtual address.
|
||||
|
||||
The default page size will be the small page size (as returned by query
|
||||
caps). The Alignment will also be enforced to small page size(64K/128K).
|
||||
caps). The physical alignment will also be enforced to small page
|
||||
size(64K/128K).
|
||||
|
||||
Arguments:
|
||||
vaSpace[IN] - Pointer to vaSpace object
|
||||
@@ -211,15 +212,15 @@ void nvUvmInterfaceAddressSpaceDestroy(uvmGpuAddressSpaceHandle vaSpace);
|
||||
contains below given fields
|
||||
|
||||
allocInfo Members:
|
||||
rangeBegin[IN] - Allocation will be made between rangeBegin
|
||||
rangeEnd[IN] and rangeEnd(both inclusive). Default will be
|
||||
no-range limitation.
|
||||
gpuPhysOffset[OUT] - Physical offset of allocation returned only
|
||||
if contiguous allocation is requested.
|
||||
pageSize[IN] - Override the default page size (see above).
|
||||
alignment[IN] - gpuPointer GPU VA alignment. 0 means 4KB
|
||||
alignment.
|
||||
bContiguousPhysAlloc[IN] - Flag to request contiguous allocation. Default
|
||||
will follow the vidHeapControl default policy.
|
||||
bHandleProvided [IN] - Flag to signify that the client has provided
|
||||
the handle for phys allocation.
|
||||
bMemGrowsDown[IN]
|
||||
bPersistentVidmem[IN] - Allocate persistent vidmem.
|
||||
hPhysHandle[IN/OUT] - The handle will be used in allocation if provided.
|
||||
If not provided; allocator will return the handle
|
||||
it used eventually.
|
||||
@@ -247,7 +248,6 @@ NV_STATUS nvUvmInterfaceMemoryAllocFB(uvmGpuAddressSpaceHandle vaSpace,
|
||||
and will return a unique GPU virtual address.
|
||||
|
||||
The default page size will be the small page size (as returned by query caps)
|
||||
The Alignment will also be enforced to small page size.
|
||||
|
||||
Arguments:
|
||||
vaSpace[IN] - Pointer to vaSpace object
|
||||
@@ -257,15 +257,15 @@ NV_STATUS nvUvmInterfaceMemoryAllocFB(uvmGpuAddressSpaceHandle vaSpace,
|
||||
contains below given fields
|
||||
|
||||
allocInfo Members:
|
||||
rangeBegin[IN] - Allocation will be made between rangeBegin
|
||||
rangeEnd[IN] and rangeEnd(both inclusive). Default will be
|
||||
no-range limitation.
|
||||
gpuPhysOffset[OUT] - Physical offset of allocation returned only
|
||||
if contiguous allocation is requested.
|
||||
pageSize[IN] - Override the default page size (see above).
|
||||
alignment[IN] - gpuPointer GPU VA alignment. 0 means 4KB
|
||||
alignment.
|
||||
bContiguousPhysAlloc[IN] - Flag to request contiguous allocation. Default
|
||||
will follow the vidHeapControl default policy.
|
||||
bHandleProvided [IN] - Flag to signify that the client has provided
|
||||
the handle for phys allocation.
|
||||
bMemGrowsDown[IN]
|
||||
bPersistentVidmem[IN] - Allocate persistent vidmem.
|
||||
hPhysHandle[IN/OUT] - The handle will be used in allocation if provided.
|
||||
If not provided; allocator will return the handle
|
||||
it used eventually.
|
||||
@@ -331,10 +331,14 @@ typedef NV_STATUS (*uvmPmaEvictPagesCallback)(void *callbackData,
|
||||
NvU64 *pPages,
|
||||
NvU32 count,
|
||||
NvU64 physBegin,
|
||||
NvU64 physEnd);
|
||||
NvU64 physEnd,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type);
|
||||
|
||||
// Mirrors pmaEvictRangeCb_t, see its documentation in pma.h.
|
||||
typedef NV_STATUS (*uvmPmaEvictRangeCallback)(void *callbackData, NvU64 physBegin, NvU64 physEnd);
|
||||
typedef NV_STATUS (*uvmPmaEvictRangeCallback)(void *callbackData,
|
||||
NvU64 physBegin,
|
||||
NvU64 physEnd,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfacePmaRegisterEvictionCallbacks
|
||||
@@ -671,14 +675,16 @@ NV_STATUS nvUvmInterfaceUnsetPageDirectory(uvmGpuAddressSpaceHandle vaSpace);
|
||||
For duplication of physical memory use nvUvmInterfaceDupMemory.
|
||||
|
||||
Arguments:
|
||||
srcVaSpace[IN] - Source VA space.
|
||||
srcAddress[IN] - GPU VA in the source VA space. The provided address
|
||||
should match one previously returned by
|
||||
nvUvmInterfaceMemoryAllocFB or
|
||||
nvUvmInterfaceMemoryAllocSys.
|
||||
dstVaSpace[IN] - Destination VA space where the new mapping will be
|
||||
created.
|
||||
dstAddress[OUT] - Pointer to the GPU VA in the destination VA space.
|
||||
srcVaSpace[IN] - Source VA space.
|
||||
srcAddress[IN] - GPU VA in the source VA space. The provided address
|
||||
should match one previously returned by
|
||||
nvUvmInterfaceMemoryAllocFB or
|
||||
nvUvmInterfaceMemoryAllocSys.
|
||||
dstVaSpace[IN] - Destination VA space where the new mapping will be
|
||||
created.
|
||||
dstVaAlignment[IN] - Alignment of the GPU VA in the destination VA
|
||||
space. 0 means 4KB alignment.
|
||||
dstAddress[OUT] - Pointer to the GPU VA in the destination VA space.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - If any of the inputs is invalid, or the source
|
||||
@@ -692,6 +698,7 @@ NV_STATUS nvUvmInterfaceUnsetPageDirectory(uvmGpuAddressSpaceHandle vaSpace);
|
||||
NV_STATUS nvUvmInterfaceDupAllocation(uvmGpuAddressSpaceHandle srcVaSpace,
|
||||
NvU64 srcAddress,
|
||||
uvmGpuAddressSpaceHandle dstVaSpace,
|
||||
NvU64 dstVaAlignment,
|
||||
NvU64 *dstAddress);
|
||||
|
||||
/*******************************************************************************
|
||||
@@ -914,6 +921,23 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
void *pFaultBuffer,
|
||||
NvU32 *numFaults);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceFlushReplayableFaultBuffer
|
||||
|
||||
This function sends an RPC to GSP in order to flush the HW replayable fault buffer.
|
||||
|
||||
NOTES:
|
||||
- This function DOES NOT acquire the RM API or GPU locks. That is because
|
||||
it is called during fault servicing, which could produce deadlocks.
|
||||
|
||||
Arguments:
|
||||
device[IN] - Device handle associated with the gpu
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceInitAccessCntrInfo
|
||||
|
||||
@@ -1047,11 +1071,13 @@ void nvUvmInterfaceP2pObjectDestroy(uvmGpuSessionHandle session,
|
||||
hMemory[IN] - Memory handle.
|
||||
offset [IN] - Offset from the beginning of the allocation
|
||||
where PTE mappings should begin.
|
||||
Should be aligned with pagesize associated
|
||||
Should be aligned with mappingPagesize
|
||||
in gpuExternalMappingInfo associated
|
||||
with the allocation.
|
||||
size [IN] - Length of the allocation for which PTEs
|
||||
should be built.
|
||||
Should be aligned with pagesize associated
|
||||
Should be aligned with mappingPagesize
|
||||
in gpuExternalMappingInfo associated
|
||||
with the allocation.
|
||||
size = 0 will be interpreted as the total size
|
||||
of the allocation.
|
||||
@@ -1068,10 +1094,6 @@ void nvUvmInterfaceP2pObjectDestroy(uvmGpuSessionHandle session,
|
||||
NV_ERR_NOT_READY - Returned when querying the PTEs requires a deferred setup
|
||||
which has not yet completed. It is expected that the caller
|
||||
will reattempt the call until a different code is returned.
|
||||
|
||||
|
||||
|
||||
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceGetExternalAllocPtes(uvmGpuAddressSpaceHandle vaSpace,
|
||||
NvHandle hMemory,
|
||||
@@ -1260,7 +1282,7 @@ void nvUvmInterfacePagingChannelDestroy(UvmGpuPagingChannelHandle channel);
|
||||
device[IN] - device under which paging channels were allocated
|
||||
dstAddress[OUT] - a virtual address that is valid (i.e. is mapped) in
|
||||
all the paging channels allocated under the given vaSpace.
|
||||
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - Invalid parameter/s is passed.
|
||||
NV_ERR_NOT_SUPPORTED - SR-IOV heavy mode is disabled.
|
||||
@@ -1361,8 +1383,6 @@ void nvUvmInterfacePagingChannelsUnmap(uvmGpuAddressSpaceHandle srcVaSpace,
|
||||
a. pre-allocated stack
|
||||
b. the fact that internal RPC infrastructure doesn't acquire GPU lock.
|
||||
Therefore, locking is the caller's responsibility.
|
||||
- This function DOES NOT sleep (does not allocate memory or acquire locks)
|
||||
so it can be invoked while holding a spinlock.
|
||||
|
||||
Arguments:
|
||||
channel[IN] - paging channel handle obtained via
|
||||
@@ -1373,7 +1393,7 @@ void nvUvmInterfacePagingChannelsUnmap(uvmGpuAddressSpaceHandle srcVaSpace,
|
||||
methodStreamSize[IN] - Size of methodStream, in bytes. The maximum push
|
||||
size is 128KB.
|
||||
|
||||
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - Invalid parameter/s is passed.
|
||||
NV_ERR_NOT_SUPPORTED - SR-IOV heavy mode is disabled.
|
||||
@@ -1382,136 +1402,4 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
|
||||
char *methodStream,
|
||||
NvU32 methodStreamSize);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif // _NV_UVM_INTERFACE_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -110,7 +110,7 @@ typedef struct UvmGpuMemoryInfo_tag
|
||||
NvBool deviceDescendant;
|
||||
|
||||
// Out: Page size associated with the phys alloc.
|
||||
NvU32 pageSize;
|
||||
NvU64 pageSize;
|
||||
|
||||
// Out: Set to TRUE, if the allocation is contiguous.
|
||||
NvBool contig;
|
||||
@@ -217,12 +217,6 @@ typedef struct UvmGpuChannelInstanceInfo_tag
|
||||
// Out: Type of the engine the channel is bound to
|
||||
NvU32 channelEngineType;
|
||||
|
||||
// Out: Channel handle required to ring the doorbell
|
||||
NvU32 workSubmissionToken;
|
||||
|
||||
// Out: Address of the doorbell
|
||||
volatile NvU32 *workSubmissionOffset;
|
||||
|
||||
// Out: Channel handle to be used in the CLEAR_FAULTED method
|
||||
NvU32 clearFaultedToken;
|
||||
|
||||
@@ -231,6 +225,10 @@ typedef struct UvmGpuChannelInstanceInfo_tag
|
||||
// Ampere+ GPUs
|
||||
volatile NvU32 *pChramChannelRegister;
|
||||
|
||||
// Out: Address of the Runlist PRI Base Register required to ring the
|
||||
// doorbell after clearing the faulted bit.
|
||||
volatile NvU32 *pRunlistPRIBaseRegister;
|
||||
|
||||
// Out: SMC engine id to which the GR channel is bound, or zero if the GPU
|
||||
// does not support SMC or it is a CE channel
|
||||
NvU32 smcEngineId;
|
||||
@@ -308,6 +306,7 @@ typedef struct UvmGpuChannelAllocParams_tag
|
||||
|
||||
// interpreted as UVM_GPU_CHANNEL_ENGINE_TYPE
|
||||
NvU32 engineType;
|
||||
|
||||
} UvmGpuChannelAllocParams;
|
||||
|
||||
typedef struct UvmGpuPagingChannelAllocParams_tag
|
||||
@@ -372,10 +371,7 @@ typedef enum
|
||||
UVM_LINK_TYPE_NVLINK_1,
|
||||
UVM_LINK_TYPE_NVLINK_2,
|
||||
UVM_LINK_TYPE_NVLINK_3,
|
||||
|
||||
|
||||
|
||||
|
||||
UVM_LINK_TYPE_NVLINK_4,
|
||||
} UVM_LINK_TYPE;
|
||||
|
||||
typedef struct UvmGpuCaps_tag
|
||||
@@ -413,7 +409,7 @@ typedef struct UvmGpuCaps_tag
|
||||
|
||||
typedef struct UvmGpuAddressSpaceInfo_tag
|
||||
{
|
||||
NvU32 bigPageSize;
|
||||
NvU64 bigPageSize;
|
||||
|
||||
NvBool atsEnabled;
|
||||
|
||||
@@ -433,19 +429,13 @@ typedef struct UvmGpuAddressSpaceInfo_tag
|
||||
|
||||
typedef struct UvmGpuAllocInfo_tag
|
||||
{
|
||||
NvU64 rangeBegin; // Allocation will be made between
|
||||
NvU64 rangeEnd; // rangeBegin & rangeEnd both included
|
||||
NvU64 gpuPhysOffset; // Returns gpuPhysOffset if contiguous requested
|
||||
NvU32 pageSize; // default is RM big page size - 64K or 128 K" else use 4K or 2M
|
||||
NvU64 alignment; // Alignment of allocation
|
||||
NvU64 pageSize; // default is RM big page size - 64K or 128 K" else use 4K or 2M
|
||||
NvU64 alignment; // Virtual alignment
|
||||
NvBool bContiguousPhysAlloc; // Flag to request contiguous physical allocation
|
||||
NvBool bMemGrowsDown; // Causes RM to reserve physical heap from top of FB
|
||||
NvBool bPersistentVidmem; // Causes RM to allocate persistent video memory
|
||||
NvHandle hPhysHandle; // Handle for phys allocation either provided or retrieved
|
||||
|
||||
|
||||
|
||||
|
||||
} UvmGpuAllocInfo;
|
||||
|
||||
typedef enum
|
||||
@@ -526,6 +516,13 @@ typedef struct UvmGpuExternalMappingInfo_tag
|
||||
// In: Size of the buffer to store PTEs (in bytes).
|
||||
NvU64 pteBufferSize;
|
||||
|
||||
// In: Page size for mapping
|
||||
// If this field is passed as 0, the page size
|
||||
// of the allocation is used for mapping.
|
||||
// nvUvmInterfaceGetExternalAllocPtes must pass
|
||||
// this field as zero.
|
||||
NvU64 mappingPageSize;
|
||||
|
||||
// In: Pointer to a buffer to store PTEs.
|
||||
// Out: The interface will fill the buffer with PTEs
|
||||
NvU64 *pteBuffer;
|
||||
@@ -576,10 +573,8 @@ typedef struct UvmPlatformInfo_tag
|
||||
// Out: ATS (Address Translation Services) is supported
|
||||
NvBool atsSupported;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Out: AMD SEV (Secure Encrypted Virtualization) is enabled
|
||||
NvBool sevEnabled;
|
||||
} UvmPlatformInfo;
|
||||
|
||||
typedef struct UvmGpuClientInfo_tag
|
||||
@@ -589,24 +584,6 @@ typedef struct UvmGpuClientInfo_tag
|
||||
NvHandle hSmcPartRef;
|
||||
} UvmGpuClientInfo;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define UVM_GPU_NAME_LENGTH 0x40
|
||||
|
||||
typedef struct UvmGpuInfo_tag
|
||||
@@ -671,10 +648,6 @@ typedef struct UvmGpuInfo_tag
|
||||
|
||||
UvmGpuClientInfo smcUserClientInfo;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} UvmGpuInfo;
|
||||
|
||||
typedef struct UvmGpuFbInfo_tag
|
||||
@@ -717,11 +690,6 @@ typedef struct UvmPmaStatistics_tag
|
||||
volatile NvU64 numPages2m; // PMA-wide 2MB pages count across all regions
|
||||
volatile NvU64 numFreePages64k; // PMA-wide free 64KB page count across all regions
|
||||
volatile NvU64 numFreePages2m; // PMA-wide free 2MB pages count across all regions
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} UvmPmaStatistics;
|
||||
|
||||
/*******************************************************************************
|
||||
@@ -865,10 +833,13 @@ typedef struct UvmGpuFaultInfo_tag
|
||||
|
||||
// Preallocated stack for functions called from the UVM isr bottom half
|
||||
void *isr_bh_sp;
|
||||
|
||||
} nonReplayable;
|
||||
NvHandle faultBufferHandle;
|
||||
} UvmGpuFaultInfo;
|
||||
|
||||
struct Device;
|
||||
|
||||
typedef struct UvmGpuPagingChannel_tag
|
||||
{
|
||||
struct gpuDevice *device;
|
||||
@@ -876,6 +847,7 @@ typedef struct UvmGpuPagingChannel_tag
|
||||
NvHandle channelHandle;
|
||||
NvHandle errorNotifierHandle;
|
||||
void *pushStreamSp;
|
||||
struct Device *pDevice;
|
||||
} UvmGpuPagingChannel, *UvmGpuPagingChannelHandle;
|
||||
|
||||
typedef struct UvmGpuAccessCntrInfo_tag
|
||||
@@ -936,6 +908,16 @@ typedef struct UvmGpuAccessCntrConfig_tag
|
||||
NvU32 threshold;
|
||||
} UvmGpuAccessCntrConfig;
|
||||
|
||||
//
|
||||
// When modifying this enum, make sure they are compatible with the mirrored
|
||||
// MEMORY_PROTECTION enum in phys_mem_allocator.h.
|
||||
//
|
||||
typedef enum UvmPmaGpuMemoryType_tag
|
||||
{
|
||||
UVM_PMA_GPU_MEMORY_TYPE_UNPROTECTED = 0,
|
||||
UVM_PMA_GPU_MEMORY_TYPE_PROTECTED = 1
|
||||
} UVM_PMA_GPU_MEMORY_TYPE;
|
||||
|
||||
typedef UvmGpuChannelInfo gpuChannelInfo;
|
||||
typedef UvmGpuChannelAllocParams gpuChannelAllocParams;
|
||||
typedef UvmGpuCaps gpuCaps;
|
||||
@@ -961,10 +943,4 @@ typedef UvmGpuPagingChannelInfo gpuPagingChannelInfo;
|
||||
typedef UvmGpuPagingChannelAllocParams gpuPagingChannelAllocParams;
|
||||
typedef UvmPmaAllocationOptions gpuPmaAllocationOptions;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif // _NV_UVM_TYPES_H_
|
||||
|
||||
@@ -150,9 +150,7 @@ typedef struct NvSyncPointFenceRec {
|
||||
|* *|
|
||||
\***************************************************************************/
|
||||
|
||||
#if !defined(XAPIGEN) /* NvOffset is XAPIGEN builtin type, so skip typedef */
|
||||
typedef NvU64 NvOffset; /* GPU address */
|
||||
#endif
|
||||
|
||||
#define NvOffset_HI32(n) ((NvU32)(((NvU64)(n)) >> 32))
|
||||
#define NvOffset_LO32(n) ((NvU32)((NvU64)(n)))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -29,6 +29,7 @@
|
||||
#include <nvlimits.h>
|
||||
|
||||
#define NVKMS_MAX_SUBDEVICES NV_MAX_SUBDEVICES
|
||||
#define NVKMS_MAX_HEADS_PER_DISP NV_MAX_HEADS
|
||||
|
||||
#define NVKMS_LEFT 0
|
||||
#define NVKMS_RIGHT 1
|
||||
@@ -530,4 +531,78 @@ typedef struct {
|
||||
NvBool noncoherent;
|
||||
} NvKmsDispIOCoherencyModes;
|
||||
|
||||
enum NvKmsInputColorSpace {
|
||||
/* Unknown colorspace; no de-gamma will be applied */
|
||||
NVKMS_INPUT_COLORSPACE_NONE = 0,
|
||||
|
||||
/* Linear, Rec.709 [-0.5, 7.5) */
|
||||
NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR = 1,
|
||||
|
||||
/* PQ, Rec.2020 unity */
|
||||
NVKMS_INPUT_COLORSPACE_BT2100_PQ = 2,
|
||||
};
|
||||
|
||||
enum NvKmsOutputTf {
|
||||
/*
|
||||
* NVKMS itself won't apply any OETF (clients are still
|
||||
* free to provide a custom OLUT)
|
||||
*/
|
||||
NVKMS_OUTPUT_TF_NONE = 0,
|
||||
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR = 1,
|
||||
NVKMS_OUTPUT_TF_PQ = 2,
|
||||
};
|
||||
|
||||
/*!
|
||||
* HDR Static Metadata Type1 Descriptor as per CEA-861.3 spec.
|
||||
* This is expected to match exactly with the spec.
|
||||
*/
|
||||
struct NvKmsHDRStaticMetadata {
|
||||
/*!
|
||||
* Color primaries of the data.
|
||||
* These are coded as unsigned 16-bit values in units of 0.00002,
|
||||
* where 0x0000 represents zero and 0xC350 represents 1.0000.
|
||||
*/
|
||||
struct {
|
||||
NvU16 x, y;
|
||||
} displayPrimaries[3];
|
||||
|
||||
/*!
|
||||
* White point of colorspace data.
|
||||
* These are coded as unsigned 16-bit values in units of 0.00002,
|
||||
* where 0x0000 represents zero and 0xC350 represents 1.0000.
|
||||
*/
|
||||
struct {
|
||||
NvU16 x, y;
|
||||
} whitePoint;
|
||||
|
||||
/**
|
||||
* Maximum mastering display luminance.
|
||||
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
|
||||
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
|
||||
*/
|
||||
NvU16 maxDisplayMasteringLuminance;
|
||||
|
||||
/*!
|
||||
* Minimum mastering display luminance.
|
||||
* This value is coded as an unsigned 16-bit value in units of
|
||||
* 0.0001 cd/m2, where 0x0001 represents 0.0001 cd/m2 and 0xFFFF
|
||||
* represents 6.5535 cd/m2.
|
||||
*/
|
||||
NvU16 minDisplayMasteringLuminance;
|
||||
|
||||
/*!
|
||||
* Maximum content light level.
|
||||
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
|
||||
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
|
||||
*/
|
||||
NvU16 maxCLL;
|
||||
|
||||
/*!
|
||||
* Maximum frame-average light level.
|
||||
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
|
||||
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
|
||||
*/
|
||||
NvU16 maxFALL;
|
||||
};
|
||||
|
||||
#endif /* NVKMS_API_TYPES_H */
|
||||
|
||||
@@ -86,8 +86,9 @@ enum NvKmsSurfaceMemoryFormat {
|
||||
NvKmsSurfaceMemoryFormatY12___V12U12_N420 = 32,
|
||||
NvKmsSurfaceMemoryFormatY8___U8___V8_N444 = 33,
|
||||
NvKmsSurfaceMemoryFormatY8___U8___V8_N420 = 34,
|
||||
NvKmsSurfaceMemoryFormatRF16GF16BF16XF16 = 35,
|
||||
NvKmsSurfaceMemoryFormatMin = NvKmsSurfaceMemoryFormatI8,
|
||||
NvKmsSurfaceMemoryFormatMax = NvKmsSurfaceMemoryFormatY8___U8___V8_N420,
|
||||
NvKmsSurfaceMemoryFormatMax = NvKmsSurfaceMemoryFormatRF16GF16BF16XF16,
|
||||
};
|
||||
|
||||
typedef struct NvKmsSurfaceMemoryFormatInfo {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -149,6 +149,7 @@ struct NvKmsKapiDeviceResourcesInfo {
|
||||
} caps;
|
||||
|
||||
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
|
||||
NvBool supportsHDR[NVKMS_KAPI_LAYER_MAX];
|
||||
};
|
||||
|
||||
#define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
|
||||
@@ -218,6 +219,11 @@ struct NvKmsKapiLayerConfig {
|
||||
struct NvKmsRRParams rrParams;
|
||||
struct NvKmsKapiSyncpt syncptParams;
|
||||
|
||||
struct NvKmsHDRStaticMetadata hdrMetadata;
|
||||
NvBool hdrMetadataSpecified;
|
||||
|
||||
enum NvKmsOutputTf tf;
|
||||
|
||||
NvU8 minPresentInterval;
|
||||
NvBool tearing;
|
||||
|
||||
@@ -226,6 +232,8 @@ struct NvKmsKapiLayerConfig {
|
||||
|
||||
NvS16 dstX, dstY;
|
||||
NvU16 dstWidth, dstHeight;
|
||||
|
||||
enum NvKmsInputColorSpace inputColorSpace;
|
||||
};
|
||||
|
||||
struct NvKmsKapiLayerRequestedConfig {
|
||||
@@ -277,6 +285,8 @@ struct NvKmsKapiHeadModeSetConfig {
|
||||
NvKmsKapiDisplay displays[NVKMS_KAPI_MAX_CLONE_DISPLAYS];
|
||||
|
||||
struct NvKmsKapiDisplayMode mode;
|
||||
|
||||
NvBool vrrEnabled;
|
||||
};
|
||||
|
||||
struct NvKmsKapiHeadRequestedConfig {
|
||||
@@ -368,6 +378,9 @@ struct NvKmsKapiDynamicDisplayParams {
|
||||
/* [OUT] Connection status */
|
||||
NvU32 connected;
|
||||
|
||||
/* [OUT] VRR status */
|
||||
NvBool vrrSupported;
|
||||
|
||||
/* [IN/OUT] EDID of connected monitor/ Input to override EDID */
|
||||
struct {
|
||||
NvU16 bufferSize;
|
||||
@@ -416,6 +429,12 @@ struct NvKmsKapiCreateSurfaceParams {
|
||||
NvU8 log2GobsPerBlockY;
|
||||
};
|
||||
|
||||
enum NvKmsKapiAllocationType {
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT = 0,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_NOTIFIER = 1,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_OFFSCREEN = 2,
|
||||
};
|
||||
|
||||
struct NvKmsKapiFunctionsTable {
|
||||
|
||||
/*!
|
||||
@@ -478,6 +497,47 @@ struct NvKmsKapiFunctionsTable {
|
||||
*/
|
||||
void (*releaseOwnership)(struct NvKmsKapiDevice *device);
|
||||
|
||||
/*!
|
||||
* Grant modeset permissions for a display to fd. Only one (dispIndex, head,
|
||||
* display) is currently supported.
|
||||
*
|
||||
* \param [in] fd fd from opening /dev/nvidia-modeset.
|
||||
*
|
||||
* \param [in] device A device returned by allocateDevice().
|
||||
*
|
||||
* \param [in] head head of display.
|
||||
*
|
||||
* \param [in] display The display to grant.
|
||||
*
|
||||
* \return NV_TRUE on success, NV_FALSE on failure.
|
||||
*/
|
||||
NvBool (*grantPermissions)
|
||||
(
|
||||
NvS32 fd,
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvU32 head,
|
||||
NvKmsKapiDisplay display
|
||||
);
|
||||
|
||||
/*!
|
||||
* Revoke permissions previously granted. Only one (dispIndex, head,
|
||||
* display) is currently supported.
|
||||
*
|
||||
* \param [in] device A device returned by allocateDevice().
|
||||
*
|
||||
* \param [in] head head of display.
|
||||
*
|
||||
* \param [in] display The display to revoke.
|
||||
*
|
||||
* \return NV_TRUE on success, NV_FALSE on failure.
|
||||
*/
|
||||
NvBool (*revokePermissions)
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvU32 head,
|
||||
NvKmsKapiDisplay display
|
||||
);
|
||||
|
||||
/*!
|
||||
* Registers for notification, via
|
||||
* NvKmsKapiAllocateDeviceParams::eventCallback, of the events specified
|
||||
@@ -609,6 +669,8 @@ struct NvKmsKapiFunctionsTable {
|
||||
* \param [in] device A device allocated using allocateDevice().
|
||||
*
|
||||
* \param [in] layout BlockLinear or Pitch.
|
||||
*
|
||||
* \param [in] type Allocation type.
|
||||
*
|
||||
* \param [in] size Size, in bytes, of the memory to allocate.
|
||||
*
|
||||
@@ -624,6 +686,7 @@ struct NvKmsKapiFunctionsTable {
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
enum NvKmsSurfaceMemoryLayout layout,
|
||||
enum NvKmsKapiAllocationType type,
|
||||
NvU64 size,
|
||||
NvU8 *compressible
|
||||
);
|
||||
@@ -637,6 +700,8 @@ struct NvKmsKapiFunctionsTable {
|
||||
* \param [in] device A device allocated using allocateDevice().
|
||||
*
|
||||
* \param [in] layout BlockLinear or Pitch.
|
||||
*
|
||||
* \param [in] type Allocation type.
|
||||
*
|
||||
* \param [in] size Size, in bytes, of the memory to allocate.
|
||||
*
|
||||
@@ -652,6 +717,7 @@ struct NvKmsKapiFunctionsTable {
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
enum NvKmsSurfaceMemoryLayout layout,
|
||||
enum NvKmsKapiAllocationType type,
|
||||
NvU64 size,
|
||||
NvU8 *compressible
|
||||
);
|
||||
|
||||
@@ -31,13 +31,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* This is the maximum number of GPUs supported in a single system.
|
||||
*/
|
||||
|
||||
@@ -234,12 +234,14 @@ extern "C" {
|
||||
#define DRF_EXTENT(drf) (drf##_HIGH_FIELD)
|
||||
#define DRF_SHIFT(drf) ((drf##_LOW_FIELD) % 32U)
|
||||
#define DRF_SHIFT_RT(drf) ((drf##_HIGH_FIELD) % 32U)
|
||||
#define DRF_SIZE(drf) ((drf##_HIGH_FIELD)-(drf##_LOW_FIELD)+1U)
|
||||
#define DRF_MASK(drf) (0xFFFFFFFFU >> (31U - ((drf##_HIGH_FIELD) % 32U) + ((drf##_LOW_FIELD) % 32U)))
|
||||
#else
|
||||
#define DRF_BASE(drf) (NV_FALSE?drf) // much better
|
||||
#define DRF_EXTENT(drf) (NV_TRUE?drf) // much better
|
||||
#define DRF_SHIFT(drf) (((NvU32)DRF_BASE(drf)) % 32U)
|
||||
#define DRF_SHIFT_RT(drf) (((NvU32)DRF_EXTENT(drf)) % 32U)
|
||||
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
|
||||
#define DRF_MASK(drf) (0xFFFFFFFFU>>(31U - DRF_SHIFT_RT(drf) + DRF_SHIFT(drf)))
|
||||
#endif
|
||||
#define DRF_DEF(d,r,f,c) (((NvU32)(NV ## d ## r ## f ## c))<<DRF_SHIFT(NV ## d ## r ## f))
|
||||
@@ -249,12 +251,12 @@ extern "C" {
|
||||
#define DRF_EXTENT(drf) (1?drf) // much better
|
||||
#define DRF_SHIFT(drf) ((DRF_ISBIT(0,drf)) % 32)
|
||||
#define DRF_SHIFT_RT(drf) ((DRF_ISBIT(1,drf)) % 32)
|
||||
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
|
||||
#define DRF_MASK(drf) (0xFFFFFFFFU>>(31-((DRF_ISBIT(1,drf)) % 32)+((DRF_ISBIT(0,drf)) % 32)))
|
||||
#define DRF_DEF(d,r,f,c) ((NV ## d ## r ## f ## c)<<DRF_SHIFT(NV ## d ## r ## f))
|
||||
#define DRF_NUM(d,r,f,n) (((n)&DRF_MASK(NV ## d ## r ## f))<<DRF_SHIFT(NV ## d ## r ## f))
|
||||
#endif
|
||||
#define DRF_SHIFTMASK(drf) (DRF_MASK(drf)<<(DRF_SHIFT(drf)))
|
||||
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
|
||||
|
||||
#define DRF_VAL(d,r,f,v) (((v)>>DRF_SHIFT(NV ## d ## r ## f))&DRF_MASK(NV ## d ## r ## f))
|
||||
#endif
|
||||
@@ -907,6 +909,16 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
|
||||
return uAddr.p;
|
||||
}
|
||||
|
||||
// Get bit at pos (k) from x
|
||||
#define NV_BIT_GET(k, x) (((x) >> (k)) & 1)
|
||||
// Get bit at pos (n) from (hi) if >= 64, otherwise from (lo). This is paired with NV_BIT_SET_128 which sets the bit.
|
||||
#define NV_BIT_GET_128(n, lo, hi) (((n) < 64) ? NV_BIT_GET((n), (lo)) : NV_BIT_GET((n) - 64, (hi)))
|
||||
//
|
||||
// Set the bit at pos (b) for U64 which is < 128. Since the (b) can be >= 64, we need 2 U64 to store this.
|
||||
// Use (lo) if (b) is less than 64, and (hi) if >= 64.
|
||||
//
|
||||
#define NV_BIT_SET_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) |= NVBIT64(b); else (hi) |= NVBIT64( b & 0x3F ); }
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif //__cplusplus
|
||||
|
||||
@@ -24,11 +24,6 @@
|
||||
#ifndef SDK_NVSTATUS_H
|
||||
#define SDK_NVSTATUS_H
|
||||
|
||||
/* XAPIGEN - this file is not suitable for (nor needed by) xapigen. */
|
||||
/* Rather than #ifdef out every such include in every sdk */
|
||||
/* file, punt here. */
|
||||
#if !defined(XAPIGEN) /* rest of file */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@@ -125,6 +120,4 @@ const char *nvstatusToString(NV_STATUS nvStatusIn);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // XAPIGEN
|
||||
|
||||
#endif /* SDK_NVSTATUS_H */
|
||||
|
||||
@@ -24,11 +24,6 @@
|
||||
#ifndef SDK_NVSTATUSCODES_H
|
||||
#define SDK_NVSTATUSCODES_H
|
||||
|
||||
/* XAPIGEN - this file is not suitable for (nor needed by) xapigen. */
|
||||
/* Rather than #ifdef out every such include in every sdk */
|
||||
/* file, punt here. */
|
||||
#if !defined(XAPIGEN) /* rest of file */
|
||||
|
||||
NV_STATUS_CODE(NV_OK, 0x00000000, "Success")
|
||||
NV_STATUS_CODE(NV_ERR_GENERIC, 0x0000FFFF, "Failure: Generic Error")
|
||||
|
||||
@@ -153,6 +148,7 @@ NV_STATUS_CODE(NV_ERR_NVLINK_CLOCK_ERROR, 0x00000076, "Nvlink Clock
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_TRAINING_ERROR, 0x00000077, "Nvlink Training Error")
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_CONFIGURATION_ERROR, 0x00000078, "Nvlink Configuration Error")
|
||||
NV_STATUS_CODE(NV_ERR_RISCV_ERROR, 0x00000079, "Generic RISC-V assert or halt")
|
||||
NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT, 0x0000007A, "Fabric Manager is not loaded")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
@@ -164,6 +160,4 @@ NV_STATUS_CODE(NV_WARN_NOTHING_TO_DO, 0x00010006, "WARNING Noth
|
||||
NV_STATUS_CODE(NV_WARN_NULL_OBJECT, 0x00010007, "WARNING NULL object found")
|
||||
NV_STATUS_CODE(NV_WARN_OUT_OF_RANGE, 0x00010008, "WARNING value out of range")
|
||||
|
||||
#endif // XAPIGEN
|
||||
|
||||
#endif /* SDK_NVSTATUSCODES_H */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -125,6 +125,7 @@ NvU32 NV_API_CALL os_get_cpu_number (void);
|
||||
void NV_API_CALL os_disable_console_access (void);
|
||||
void NV_API_CALL os_enable_console_access (void);
|
||||
NV_STATUS NV_API_CALL os_registry_init (void);
|
||||
NvU64 NV_API_CALL os_get_max_user_va (void);
|
||||
NV_STATUS NV_API_CALL os_schedule (void);
|
||||
NV_STATUS NV_API_CALL os_alloc_spinlock (void **);
|
||||
void NV_API_CALL os_free_spinlock (void *);
|
||||
@@ -142,6 +143,14 @@ void NV_API_CALL os_free_semaphore (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_semaphore (void *);
|
||||
NV_STATUS NV_API_CALL os_cond_acquire_semaphore (void *);
|
||||
NV_STATUS NV_API_CALL os_release_semaphore (void *);
|
||||
void* NV_API_CALL os_alloc_rwlock (void);
|
||||
void NV_API_CALL os_free_rwlock (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_rwlock_read (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_rwlock_write (void *);
|
||||
NV_STATUS NV_API_CALL os_cond_acquire_rwlock_read (void *);
|
||||
NV_STATUS NV_API_CALL os_cond_acquire_rwlock_write(void *);
|
||||
void NV_API_CALL os_release_rwlock_read (void *);
|
||||
void NV_API_CALL os_release_rwlock_write (void *);
|
||||
NvBool NV_API_CALL os_semaphore_may_sleep (void);
|
||||
NV_STATUS NV_API_CALL os_get_version_info (os_version_info*);
|
||||
NvBool NV_API_CALL os_is_isr (void);
|
||||
@@ -172,7 +181,6 @@ NV_STATUS NV_API_CALL os_put_page (NvU64 address);
|
||||
NvU32 NV_API_CALL os_get_page_refcount (NvU64 address);
|
||||
NvU32 NV_API_CALL os_count_tail_pages (NvU64 address);
|
||||
void NV_API_CALL os_free_pages_phys (NvU64, NvU32);
|
||||
NV_STATUS NV_API_CALL os_call_nv_vmbus (NvU32, void *);
|
||||
NV_STATUS NV_API_CALL os_open_temporary_file (void **);
|
||||
void NV_API_CALL os_close_file (void *);
|
||||
NV_STATUS NV_API_CALL os_write_file (void *, NvU8 *, NvU64, NvU64);
|
||||
@@ -193,19 +201,12 @@ void NV_API_CALL os_nv_cap_destroy_entry (nv_cap_t *);
|
||||
int NV_API_CALL os_nv_cap_validate_and_dup_fd(const nv_cap_t *, int);
|
||||
void NV_API_CALL os_nv_cap_close_fd (int);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
enum os_pci_req_atomics_type {
|
||||
OS_INTF_PCIE_REQ_ATOMICS_32BIT,
|
||||
OS_INTF_PCIE_REQ_ATOMICS_64BIT,
|
||||
OS_INTF_PCIE_REQ_ATOMICS_128BIT
|
||||
};
|
||||
NV_STATUS NV_API_CALL os_enable_pci_req_atomics (void *, enum os_pci_req_atomics_type);
|
||||
|
||||
extern NvU32 os_page_size;
|
||||
extern NvU64 os_page_mask;
|
||||
@@ -245,11 +246,4 @@ int NV_API_CALL nv_printf(NvU32 debuglevel, const char *printf_format, ...);
|
||||
#define NV_LOCK_USER_PAGES_FLAGS_WRITE_NO 0x00000000
|
||||
#define NV_LOCK_USER_PAGES_FLAGS_WRITE_YES 0x00000001
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /* OS_INTERFACE_H */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -63,7 +63,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_query_caps(nvidia_stack_t *, nvgpuDeviceHandle
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_query_ces_caps(nvidia_stack_t *sp, nvgpuDeviceHandle_t, nvgpuCesCaps_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_gpu_info(nvidia_stack_t *, const NvProcessorUuid *pUuid, const nvgpuClientInfo_t *, nvgpuInfo_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_service_device_interrupts_rm(nvidia_stack_t *, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_dup_allocation(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuAddressSpaceHandle_t, NvU64 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_dup_allocation(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuAddressSpaceHandle_t, NvU64, NvU64 *);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_dup_memory (nvidia_stack_t *, nvgpuDeviceHandle_t, NvHandle, NvHandle, NvHandle *, nvgpuMemoryInfo_t);
|
||||
|
||||
@@ -74,6 +74,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_own_page_fault_intr(nvidia_stack_t *, nvgpuDevi
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, void *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
@@ -98,13 +99,4 @@ NV_STATUS NV_API_CALL rm_gpu_ops_paging_channels_map(nvidia_stack_t *, nvgpuAdd
|
||||
void NV_API_CALL rm_gpu_ops_paging_channels_unmap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, nvgpuPagingChannelHandle_t, char *, NvU32);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
25
kernel-open/count-lines.mk
Normal file
25
kernel-open/count-lines.mk
Normal file
@@ -0,0 +1,25 @@
|
||||
count:
|
||||
@echo "conftests:$(words $(ALL_CONFTESTS))" \
|
||||
"objects:$(words $(NV_OBJECTS_DEPEND_ON_CONFTEST))" \
|
||||
"modules:$(words $(NV_KERNEL_MODULES))"
|
||||
|
||||
.PHONY: count
|
||||
|
||||
# Include the top-level makefile to get $(NV_KERNEL_MODULES)
|
||||
include Makefile
|
||||
|
||||
# Set $(src) for the to-be-included nvidia*.Kbuild files
|
||||
src := $(CURDIR)
|
||||
|
||||
# Include nvidia*.Kbuild and append the nvidia*-y objects to ALL_OBJECTS
|
||||
$(foreach _module, $(NV_KERNEL_MODULES), \
|
||||
$(eval include $(_module)/$(_module).Kbuild) \
|
||||
)
|
||||
|
||||
# Concatenate all of the conftest lists; use $(sort ) to remove duplicates
|
||||
ALL_CONFTESTS := $(sort $(NV_CONFTEST_FUNCTION_COMPILE_TESTS) \
|
||||
$(NV_CONFTEST_GENERIC_COMPILE_TESTS) \
|
||||
$(NV_CONFTEST_MACRO_COMPILE_TESTS) \
|
||||
$(NV_CONFTEST_SYMBOL_COMPILE_TESTS) \
|
||||
$(NV_CONFTEST_TYPE_COMPILE_TESTS) \
|
||||
)
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-connector.h"
|
||||
#include "nvidia-drm-crtc.h"
|
||||
#include "nvidia-drm-utils.h"
|
||||
#include "nvidia-drm-encoder.h"
|
||||
|
||||
@@ -42,6 +43,7 @@
|
||||
|
||||
#include <drm/drm_atomic.h>
|
||||
#include <drm/drm_atomic_helper.h>
|
||||
#include <drm/drm_edid.h>
|
||||
|
||||
static void nv_drm_connector_destroy(struct drm_connector *connector)
|
||||
{
|
||||
@@ -98,7 +100,11 @@ __nv_drm_detect_encoder(struct NvKmsKapiDynamicDisplayParams *pDetectParams,
|
||||
break;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_CONNECTOR_HAS_OVERRIDE_EDID)
|
||||
if (connector->override_edid) {
|
||||
#else
|
||||
if (drm_edid_override_connector_update(connector) > 0) {
|
||||
#endif
|
||||
const struct drm_property_blob *edid = connector->edid_blob_ptr;
|
||||
|
||||
if (edid->length <= sizeof(pDetectParams->edid.buffer)) {
|
||||
@@ -118,6 +124,11 @@ __nv_drm_detect_encoder(struct NvKmsKapiDynamicDisplayParams *pDetectParams,
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_CONNECTOR_HAS_VRR_CAPABLE_PROPERTY)
|
||||
drm_connector_attach_vrr_capable_property(&nv_connector->base);
|
||||
drm_connector_set_vrr_capable_property(&nv_connector->base, pDetectParams->vrrSupported ? true : false);
|
||||
#endif
|
||||
|
||||
if (pDetectParams->connected) {
|
||||
if (!pDetectParams->overrideEdid && pDetectParams->edid.bufferSize) {
|
||||
|
||||
@@ -197,6 +208,11 @@ done:
|
||||
|
||||
nv_drm_free(pDetectParams);
|
||||
|
||||
if (status == connector_status_disconnected &&
|
||||
nv_connector->modeset_permission_filep) {
|
||||
nv_drm_connector_revoke_permissions(dev, nv_connector);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -362,6 +378,8 @@ nv_drm_connector_new(struct drm_device *dev,
|
||||
nv_connector->physicalIndex = physicalIndex;
|
||||
nv_connector->type = type;
|
||||
nv_connector->internal = internal;
|
||||
nv_connector->modeset_permission_filep = NULL;
|
||||
nv_connector->modeset_permission_crtc = NULL;
|
||||
|
||||
strcpy(nv_connector->dpAddress, dpAddress);
|
||||
|
||||
@@ -464,4 +482,26 @@ done:
|
||||
return connector;
|
||||
}
|
||||
|
||||
/*
|
||||
* Revoke the permissions on this connector.
|
||||
*/
|
||||
bool nv_drm_connector_revoke_permissions(struct drm_device *dev,
|
||||
struct nv_drm_connector* nv_connector)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
bool ret = true;
|
||||
|
||||
if (nv_connector->modeset_permission_crtc) {
|
||||
if (nv_connector->nv_detected_encoder) {
|
||||
ret = nvKms->revokePermissions(
|
||||
nv_dev->pDevice, nv_connector->modeset_permission_crtc->head,
|
||||
nv_connector->nv_detected_encoder->hDisplay);
|
||||
}
|
||||
nv_connector->modeset_permission_crtc->modeset_permission_filep = NULL;
|
||||
nv_connector->modeset_permission_crtc = NULL;
|
||||
}
|
||||
nv_connector->modeset_permission_filep = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -51,6 +51,20 @@ struct nv_drm_connector {
|
||||
|
||||
atomic_t connection_status_dirty;
|
||||
|
||||
/**
|
||||
* @modeset_permission_filep:
|
||||
*
|
||||
* The filep using this connector with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
|
||||
*/
|
||||
struct drm_file *modeset_permission_filep;
|
||||
|
||||
/**
|
||||
* @modeset_permission_crtc:
|
||||
*
|
||||
* The crtc using this connector with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
|
||||
*/
|
||||
struct nv_drm_crtc *modeset_permission_crtc;
|
||||
|
||||
struct drm_connector base;
|
||||
};
|
||||
|
||||
@@ -84,6 +98,9 @@ nv_drm_get_connector(struct drm_device *dev,
|
||||
NvBool internal,
|
||||
char dpAddress[NVKMS_DP_ADDRESS_STRING_LENGTH]);
|
||||
|
||||
bool nv_drm_connector_revoke_permissions(struct drm_device *dev,
|
||||
struct nv_drm_connector *nv_connector);
|
||||
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
|
||||
#endif /* __NVIDIA_DRM_CONNECTOR_H__ */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -46,6 +46,35 @@
|
||||
#include <linux/nvhost.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
static int
|
||||
nv_drm_atomic_replace_property_blob_from_id(struct drm_device *dev,
|
||||
struct drm_property_blob **blob,
|
||||
uint64_t blob_id,
|
||||
ssize_t expected_size)
|
||||
{
|
||||
struct drm_property_blob *new_blob = NULL;
|
||||
|
||||
if (blob_id != 0) {
|
||||
new_blob = drm_property_lookup_blob(dev, blob_id);
|
||||
if (new_blob == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((expected_size > 0) &&
|
||||
(new_blob->length != expected_size)) {
|
||||
drm_property_blob_put(new_blob);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
drm_property_replace_blob(blob, new_blob);
|
||||
drm_property_blob_put(new_blob);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void nv_drm_plane_destroy(struct drm_plane *plane)
|
||||
{
|
||||
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
|
||||
@@ -84,9 +113,6 @@ cursor_plane_req_config_update(struct drm_plane *plane,
|
||||
{
|
||||
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
|
||||
struct NvKmsKapiCursorRequestedConfig old_config = *req_config;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(plane_state);
|
||||
|
||||
if (plane_state->fb == NULL) {
|
||||
cursor_req_config_disable(req_config);
|
||||
@@ -186,7 +212,6 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(plane_state);
|
||||
int ret = 0;
|
||||
|
||||
if (plane_state->fb == NULL) {
|
||||
plane_req_config_disable(req_config);
|
||||
@@ -309,6 +334,9 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
nv_plane->defaultCompositionMode;
|
||||
#endif
|
||||
|
||||
req_config->config.inputColorSpace =
|
||||
nv_drm_plane_state->input_colorspace;
|
||||
|
||||
req_config->config.syncptParams.preSyncptSpecified = false;
|
||||
req_config->config.syncptParams.postSyncptRequested = false;
|
||||
|
||||
@@ -320,10 +348,10 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
#if defined(NV_NVHOST_DMA_FENCE_UNPACK_PRESENT)
|
||||
if (plane_state->fence != NULL) {
|
||||
ret = nvhost_dma_fence_unpack(
|
||||
plane_state->fence,
|
||||
&req_config->config.syncptParams.preSyncptId,
|
||||
&req_config->config.syncptParams.preSyncptValue);
|
||||
int ret = nvhost_dma_fence_unpack(
|
||||
plane_state->fence,
|
||||
&req_config->config.syncptParams.preSyncptId,
|
||||
&req_config->config.syncptParams.preSyncptValue);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
@@ -339,6 +367,60 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
if (nv_drm_plane_state->hdr_output_metadata != NULL) {
|
||||
struct hdr_output_metadata *hdr_metadata =
|
||||
nv_drm_plane_state->hdr_output_metadata->data;
|
||||
struct hdr_metadata_infoframe *info_frame =
|
||||
&hdr_metadata->hdmi_metadata_type1;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
uint32_t i;
|
||||
|
||||
if (hdr_metadata->metadata_type != HDMI_STATIC_METADATA_TYPE1) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Unsupported Metadata Type");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(info_frame->display_primaries); i ++) {
|
||||
req_config->config.hdrMetadata.displayPrimaries[i].x =
|
||||
info_frame->display_primaries[i].x;
|
||||
req_config->config.hdrMetadata.displayPrimaries[i].y =
|
||||
info_frame->display_primaries[i].y;
|
||||
}
|
||||
|
||||
req_config->config.hdrMetadata.whitePoint.x =
|
||||
info_frame->white_point.x;
|
||||
req_config->config.hdrMetadata.whitePoint.y =
|
||||
info_frame->white_point.y;
|
||||
req_config->config.hdrMetadata.maxDisplayMasteringLuminance =
|
||||
info_frame->max_display_mastering_luminance;
|
||||
req_config->config.hdrMetadata.minDisplayMasteringLuminance =
|
||||
info_frame->min_display_mastering_luminance;
|
||||
req_config->config.hdrMetadata.maxCLL =
|
||||
info_frame->max_cll;
|
||||
req_config->config.hdrMetadata.maxFALL =
|
||||
info_frame->max_fall;
|
||||
|
||||
req_config->config.hdrMetadataSpecified = true;
|
||||
|
||||
switch (info_frame->eotf) {
|
||||
case HDMI_EOTF_SMPTE_ST2084:
|
||||
req_config->config.tf = NVKMS_OUTPUT_TF_PQ;
|
||||
break;
|
||||
case HDMI_EOTF_TRADITIONAL_GAMMA_SDR:
|
||||
req_config->config.tf =
|
||||
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR;
|
||||
break;
|
||||
default:
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Unsupported EOTF");
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
req_config->config.hdrMetadataSpecified = false;
|
||||
req_config->config.tf = NVKMS_OUTPUT_TF_NONE;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Unconditionally mark the surface as changed, even if nothing changed,
|
||||
* so that we always get a flip event: a DRM client may flip with
|
||||
@@ -509,9 +591,21 @@ static int nv_drm_plane_atomic_set_property(
|
||||
nv_drm_plane_state->fd_user_ptr = u64_to_user_ptr(val);
|
||||
#endif
|
||||
return 0;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
} else if (property == nv_dev->nv_input_colorspace_property) {
|
||||
nv_drm_plane_state->input_colorspace = val;
|
||||
return 0;
|
||||
}
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
else if (property == nv_dev->nv_hdr_output_metadata_property) {
|
||||
return nv_drm_atomic_replace_property_blob_from_id(
|
||||
nv_dev->dev,
|
||||
&nv_drm_plane_state->hdr_output_metadata,
|
||||
val,
|
||||
sizeof(struct hdr_output_metadata));
|
||||
}
|
||||
#endif
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int nv_drm_plane_atomic_get_property(
|
||||
@@ -521,12 +615,26 @@ static int nv_drm_plane_atomic_get_property(
|
||||
uint64_t *val)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
const struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state_const(state);
|
||||
|
||||
if (property == nv_dev->nv_out_fence_property) {
|
||||
return 0;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
} else if (property == nv_dev->nv_input_colorspace_property) {
|
||||
*val = nv_drm_plane_state->input_colorspace;
|
||||
return 0;
|
||||
}
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
else if (property == nv_dev->nv_hdr_output_metadata_property) {
|
||||
const struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state_const(state);
|
||||
*val = nv_drm_plane_state->hdr_output_metadata ?
|
||||
nv_drm_plane_state->hdr_output_metadata->base.id : 0;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static struct drm_plane_state *
|
||||
@@ -544,6 +652,14 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
|
||||
__drm_atomic_helper_plane_duplicate_state(plane, &nv_plane_state->base);
|
||||
|
||||
nv_plane_state->fd_user_ptr = nv_old_plane_state->fd_user_ptr;
|
||||
nv_plane_state->input_colorspace = nv_old_plane_state->input_colorspace;
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
nv_plane_state->hdr_output_metadata = nv_old_plane_state->hdr_output_metadata;
|
||||
if (nv_plane_state->hdr_output_metadata) {
|
||||
drm_property_blob_get(nv_plane_state->hdr_output_metadata);
|
||||
}
|
||||
#endif
|
||||
|
||||
return &nv_plane_state->base;
|
||||
}
|
||||
@@ -557,6 +673,12 @@ static inline void __nv_drm_plane_atomic_destroy_state(
|
||||
#else
|
||||
__drm_atomic_helper_plane_destroy_state(state);
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(state);
|
||||
drm_property_blob_put(nv_drm_plane_state->hdr_output_metadata);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void nv_drm_plane_atomic_destroy_state(
|
||||
@@ -803,7 +925,8 @@ static const struct drm_crtc_helper_funcs nv_crtc_helper_funcs = {
|
||||
};
|
||||
|
||||
static void nv_drm_plane_install_properties(
|
||||
struct drm_plane *plane)
|
||||
struct drm_plane *plane,
|
||||
NvBool supportsHDR)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
|
||||
@@ -811,6 +934,19 @@ static void nv_drm_plane_install_properties(
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_out_fence_property, 0);
|
||||
}
|
||||
|
||||
if (nv_dev->nv_input_colorspace_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_input_colorspace_property,
|
||||
NVKMS_INPUT_COLORSPACE_NONE);
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
if (supportsHDR && nv_dev->nv_hdr_output_metadata_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_hdr_output_metadata_property, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -990,7 +1126,9 @@ nv_drm_plane_create(struct drm_device *dev,
|
||||
drm_plane_helper_add(plane, &nv_plane_helper_funcs);
|
||||
|
||||
if (plane_type != DRM_PLANE_TYPE_CURSOR) {
|
||||
nv_drm_plane_install_properties(plane);
|
||||
nv_drm_plane_install_properties(
|
||||
plane,
|
||||
pResInfo->supportsHDR[layer_idx]);
|
||||
}
|
||||
|
||||
__nv_drm_plane_create_alpha_blending_properties(
|
||||
@@ -1043,6 +1181,7 @@ static struct drm_crtc *__nv_drm_crtc_create(struct nv_drm_device *nv_dev,
|
||||
nv_crtc->head = head;
|
||||
INIT_LIST_HEAD(&nv_crtc->flip_list);
|
||||
spin_lock_init(&nv_crtc->flip_list_lock);
|
||||
nv_crtc->modeset_permission_filep = NULL;
|
||||
|
||||
ret = drm_crtc_init_with_planes(nv_dev->dev,
|
||||
&nv_crtc->base,
|
||||
@@ -1141,11 +1280,13 @@ void nv_drm_enumerate_crtcs_and_planes(
|
||||
}
|
||||
|
||||
for (layer = 0; layer < pResInfo->numLayers[i]; layer++) {
|
||||
struct drm_plane *overlay_plane = NULL;
|
||||
|
||||
if (layer == NVKMS_KAPI_LAYER_PRIMARY_IDX) {
|
||||
continue;
|
||||
}
|
||||
|
||||
struct drm_plane *overlay_plane =
|
||||
overlay_plane =
|
||||
nv_drm_plane_create(nv_dev->dev,
|
||||
DRM_PLANE_TYPE_OVERLAY,
|
||||
layer,
|
||||
@@ -1189,7 +1330,7 @@ int nv_drm_get_crtc_crc32_v2_ioctl(struct drm_device *dev,
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
crtc = nv_drm_crtc_find(dev, params->crtc_id);
|
||||
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
|
||||
if (!crtc) {
|
||||
return -ENOENT;
|
||||
}
|
||||
@@ -1217,7 +1358,7 @@ int nv_drm_get_crtc_crc32_ioctl(struct drm_device *dev,
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
crtc = nv_drm_crtc_find(dev, params->crtc_id);
|
||||
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
|
||||
if (!crtc) {
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -35,38 +35,9 @@
|
||||
|
||||
#include <drm/drm_crtc.h>
|
||||
|
||||
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE) || defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_ROTATE_* , DRM_REFLECT_* */
|
||||
#include <drm/drm_blend.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* */
|
||||
#include <uapi/drm/drm_mode.h>
|
||||
#endif
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvkms-kapi.h"
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/*
|
||||
* 19-05-2017 c2c446ad29437bb92b157423c632286608ebd3ec has added
|
||||
* DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* to UAPI and removed
|
||||
* DRM_ROTATE_* and DRM_MODE_REFLECT_*
|
||||
*/
|
||||
#if !defined(DRM_MODE_ROTATE_0)
|
||||
#define DRM_MODE_ROTATE_0 DRM_ROTATE_0
|
||||
#define DRM_MODE_ROTATE_90 DRM_ROTATE_90
|
||||
#define DRM_MODE_ROTATE_180 DRM_ROTATE_180
|
||||
#define DRM_MODE_ROTATE_270 DRM_ROTATE_270
|
||||
#define DRM_MODE_REFLECT_X DRM_REFLECT_X
|
||||
#define DRM_MODE_REFLECT_Y DRM_REFLECT_Y
|
||||
#define DRM_MODE_ROTATE_MASK DRM_ROTATE_MASK
|
||||
#define DRM_MODE_REFLECT_MASK DRM_REFLECT_MASK
|
||||
#endif
|
||||
|
||||
#endif //NV_DRM_ROTATION_AVAILABLE
|
||||
|
||||
struct nv_drm_crtc {
|
||||
NvU32 head;
|
||||
|
||||
@@ -85,6 +56,13 @@ struct nv_drm_crtc {
|
||||
*/
|
||||
spinlock_t flip_list_lock;
|
||||
|
||||
/**
|
||||
* @modeset_permission_filep:
|
||||
*
|
||||
* The filep using this crtc with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
|
||||
*/
|
||||
struct drm_file *modeset_permission_filep;
|
||||
|
||||
struct drm_crtc base;
|
||||
};
|
||||
|
||||
@@ -205,6 +183,10 @@ static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
|
||||
struct nv_drm_plane_state {
|
||||
struct drm_plane_state base;
|
||||
s32 __user *fd_user_ptr;
|
||||
enum NvKmsInputColorSpace input_colorspace;
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct drm_property_blob *hdr_output_metadata;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_state *state)
|
||||
@@ -212,6 +194,11 @@ static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_
|
||||
return container_of(state, struct nv_drm_plane_state, base);
|
||||
}
|
||||
|
||||
static inline const struct nv_drm_plane_state *to_nv_drm_plane_state_const(const struct drm_plane_state *state)
|
||||
{
|
||||
return container_of(state, const struct nv_drm_plane_state, base);
|
||||
}
|
||||
|
||||
static inline struct nv_drm_crtc *to_nv_crtc(struct drm_crtc *crtc)
|
||||
{
|
||||
if (crtc == NULL) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -30,7 +30,7 @@
|
||||
#include "nvidia-drm-connector.h"
|
||||
#include "nvidia-drm-gem.h"
|
||||
#include "nvidia-drm-crtc.h"
|
||||
#include "nvidia-drm-prime-fence.h"
|
||||
#include "nvidia-drm-fence.h"
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-gem-nvkms-memory.h"
|
||||
#include "nvidia-drm-gem-user-memory.h"
|
||||
@@ -86,6 +86,23 @@
|
||||
|
||||
static struct nv_drm_device *dev_list = NULL;
|
||||
|
||||
static const char* nv_get_input_colorspace_name(
|
||||
enum NvKmsInputColorSpace colorSpace)
|
||||
{
|
||||
switch (colorSpace) {
|
||||
case NVKMS_INPUT_COLORSPACE_NONE:
|
||||
return "None";
|
||||
case NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR:
|
||||
return "IEC 61966-2-2 linear FP";
|
||||
case NVKMS_INPUT_COLORSPACE_BT2100_PQ:
|
||||
return "ITU-R BT.2100-PQ YCbCr";
|
||||
default:
|
||||
/* We shoudn't hit this */
|
||||
WARN_ON("Unsupported input colorspace");
|
||||
return "None";
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
|
||||
static void nv_drm_output_poll_changed(struct drm_device *dev)
|
||||
@@ -240,10 +257,6 @@ nv_drm_init_mode_config(struct nv_drm_device *nv_dev,
|
||||
dev->mode_config.preferred_depth = 24;
|
||||
dev->mode_config.prefer_shadow = 1;
|
||||
|
||||
/* Currently unused. Update when needed. */
|
||||
|
||||
dev->mode_config.fb_base = 0;
|
||||
|
||||
#if defined(NV_DRM_CRTC_STATE_HAS_ASYNC_FLIP) || \
|
||||
defined(NV_DRM_CRTC_STATE_HAS_PAGEFLIP_FLAGS)
|
||||
dev->mode_config.async_page_flip = true;
|
||||
@@ -332,6 +345,15 @@ static void nv_drm_enumerate_encoders_and_connectors
|
||||
*/
|
||||
static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
{
|
||||
struct drm_prop_enum_list enum_list[3] = { };
|
||||
int i, len = 0;
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
enum_list[len].type = i;
|
||||
enum_list[len].name = nv_get_input_colorspace_name(i);
|
||||
len++;
|
||||
}
|
||||
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
if (!nv_dev->supportsSyncpts) {
|
||||
return 0;
|
||||
@@ -345,6 +367,23 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
}
|
||||
#endif
|
||||
|
||||
nv_dev->nv_input_colorspace_property =
|
||||
drm_property_create_enum(nv_dev->dev, 0, "NV_INPUT_COLORSPACE",
|
||||
enum_list, len);
|
||||
if (nv_dev->nv_input_colorspace_property == NULL) {
|
||||
NV_DRM_LOG_ERR("Failed to create NV_INPUT_COLORSPACE property");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
nv_dev->nv_hdr_output_metadata_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_HDR_STATIC_METADATA", 0);
|
||||
if (nv_dev->nv_hdr_output_metadata_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -667,6 +706,16 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_drm_dmabuf_supported_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
/* check the pDevice since this only gets set if modeset = 1
|
||||
* which is a requirement for the dma_buf extension to work
|
||||
*/
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
return nv_dev->pDevice ? 0 : -EINVAL;
|
||||
}
|
||||
|
||||
static
|
||||
int nv_drm_get_client_capability_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
@@ -696,6 +745,455 @@ int nv_drm_get_client_capability_ioctl(struct drm_device *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
static bool nv_drm_connector_is_dpy_id(struct drm_connector *connector,
|
||||
NvU32 dpyId)
|
||||
{
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
return nv_connector->nv_detected_encoder &&
|
||||
nv_connector->nv_detected_encoder->hDisplay == dpyId;
|
||||
}
|
||||
|
||||
static int nv_drm_get_dpy_id_for_connector_id_ioctl(struct drm_device *dev,
|
||||
void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_get_dpy_id_for_connector_id_params *params = data;
|
||||
// Importantly, drm_connector_lookup (with filep) will only return the
|
||||
// connector if we are master, a lessee with the connector, or not master at
|
||||
// all. It will return NULL if we are a lessee with other connectors.
|
||||
struct drm_connector *connector =
|
||||
nv_drm_connector_lookup(dev, filep, params->connectorId);
|
||||
struct nv_drm_connector *nv_connector;
|
||||
int ret = 0;
|
||||
|
||||
if (!connector) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nv_connector = to_nv_connector(connector);
|
||||
if (!nv_connector) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!nv_connector->nv_detected_encoder) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
params->dpyId = nv_connector->nv_detected_encoder->hDisplay;
|
||||
|
||||
done:
|
||||
nv_drm_connector_put(connector);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nv_drm_get_connector_id_for_dpy_id_ioctl(struct drm_device *dev,
|
||||
void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_get_connector_id_for_dpy_id_params *params = data;
|
||||
struct drm_connector *connector;
|
||||
int ret = -EINVAL;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
|
||||
/* Lookup for existing connector with same dpyId */
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
if (nv_drm_connector_is_dpy_id(connector, params->dpyId)) {
|
||||
params->connectorId = connector->base.id;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static NvU32 nv_drm_get_head_bit_from_connector(struct drm_connector *connector)
|
||||
{
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
|
||||
if (connector->state && connector->state->crtc) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(connector->state->crtc);
|
||||
return NVBIT(nv_crtc->head);
|
||||
} else if (nv_connector->nv_detected_encoder &&
|
||||
nv_connector->nv_detected_encoder->base.crtc) {
|
||||
struct nv_drm_crtc *nv_crtc =
|
||||
to_nv_crtc(nv_connector->nv_detected_encoder->base.crtc);
|
||||
return NVBIT(nv_crtc->head);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_drm_grant_permission_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_grant_permissions_params *params = data;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct nv_drm_connector *target_nv_connector = NULL;
|
||||
struct nv_drm_crtc *target_nv_crtc = NULL;
|
||||
struct drm_connector *connector, *target_connector = NULL;
|
||||
struct drm_crtc *crtc;
|
||||
NvU32 head = 0, freeHeadBits, targetHeadBit, possible_crtcs;
|
||||
int ret = 0;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
#endif
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
struct drm_modeset_acquire_ctx ctx;
|
||||
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
|
||||
ret);
|
||||
#else
|
||||
mutex_lock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
/* Get the connector for the dpyId. */
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
if (nv_drm_connector_is_dpy_id(connector, params->dpyId)) {
|
||||
target_connector =
|
||||
nv_drm_connector_lookup(dev, filep, connector->base.id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
// Importantly, drm_connector_lookup/drm_crtc_find (with filep) will only
|
||||
// return the object if we are master, a lessee with the object, or not
|
||||
// master at all. It will return NULL if we are a lessee with other objects.
|
||||
if (!target_connector) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
target_nv_connector = to_nv_connector(target_connector);
|
||||
possible_crtcs =
|
||||
target_nv_connector->nv_detected_encoder->base.possible_crtcs;
|
||||
|
||||
/* Target connector must not be previously granted. */
|
||||
if (target_nv_connector->modeset_permission_filep) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Add all heads that are owned and not already granted. */
|
||||
freeHeadBits = 0;
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (nv_drm_crtc_find(dev, filep, crtc->base.id) &&
|
||||
!nv_crtc->modeset_permission_filep &&
|
||||
(drm_crtc_mask(crtc) & possible_crtcs)) {
|
||||
freeHeadBits |= NVBIT(nv_crtc->head);
|
||||
}
|
||||
}
|
||||
|
||||
targetHeadBit = nv_drm_get_head_bit_from_connector(target_connector);
|
||||
if (targetHeadBit & freeHeadBits) {
|
||||
/* If a crtc is already being used by this connector, use it. */
|
||||
freeHeadBits = targetHeadBit;
|
||||
} else {
|
||||
/* Otherwise, remove heads that are in use by other connectors. */
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
freeHeadBits &= ~nv_drm_get_head_bit_from_connector(connector);
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Fail if no heads are available. */
|
||||
if (!freeHeadBits) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop through the crtc again and find a matching head.
|
||||
* Record the filep that is using the crtc and the connector.
|
||||
*/
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (freeHeadBits & NVBIT(nv_crtc->head)) {
|
||||
target_nv_crtc = nv_crtc;
|
||||
head = nv_crtc->head;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!nvKms->grantPermissions(params->fd, nv_dev->pDevice, head,
|
||||
params->dpyId)) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
target_nv_connector->modeset_permission_crtc = target_nv_crtc;
|
||||
target_nv_connector->modeset_permission_filep = filep;
|
||||
target_nv_crtc->modeset_permission_filep = filep;
|
||||
|
||||
done:
|
||||
if (target_connector) {
|
||||
nv_drm_connector_put(target_connector);
|
||||
}
|
||||
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
|
||||
#else
|
||||
mutex_unlock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool nv_drm_revoke_connector(struct nv_drm_device *nv_dev,
|
||||
struct nv_drm_connector *nv_connector)
|
||||
{
|
||||
bool ret = true;
|
||||
if (nv_connector->modeset_permission_crtc) {
|
||||
if (nv_connector->nv_detected_encoder) {
|
||||
ret = nvKms->revokePermissions(
|
||||
nv_dev->pDevice, nv_connector->modeset_permission_crtc->head,
|
||||
nv_connector->nv_detected_encoder->hDisplay);
|
||||
}
|
||||
nv_connector->modeset_permission_crtc->modeset_permission_filep = NULL;
|
||||
nv_connector->modeset_permission_crtc = NULL;
|
||||
}
|
||||
nv_connector->modeset_permission_filep = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nv_drm_revoke_permission(struct drm_device *dev,
|
||||
struct drm_file *filep, NvU32 dpyId)
|
||||
{
|
||||
struct drm_connector *connector;
|
||||
struct drm_crtc *crtc;
|
||||
int ret = 0;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
#endif
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
struct drm_modeset_acquire_ctx ctx;
|
||||
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
|
||||
ret);
|
||||
#else
|
||||
mutex_lock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If dpyId is set, only revoke those specific resources. Otherwise,
|
||||
* it is from closing the file so revoke all resources for that filep.
|
||||
*/
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
if (nv_connector->modeset_permission_filep == filep &&
|
||||
(!dpyId || nv_drm_connector_is_dpy_id(connector, dpyId))) {
|
||||
if (!nv_drm_connector_revoke_permissions(dev, nv_connector)) {
|
||||
ret = -EINVAL;
|
||||
// Continue trying to revoke as much as possible.
|
||||
}
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (nv_crtc->modeset_permission_filep == filep && !dpyId) {
|
||||
nv_crtc->modeset_permission_filep = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
|
||||
#else
|
||||
mutex_unlock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nv_drm_revoke_permission_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_revoke_permissions_params *params = data;
|
||||
if (!params->dpyId) {
|
||||
return -EINVAL;
|
||||
}
|
||||
return nv_drm_revoke_permission(dev, filep, params->dpyId);
|
||||
}
|
||||
|
||||
static void nv_drm_postclose(struct drm_device *dev, struct drm_file *filep)
|
||||
{
|
||||
/*
|
||||
* Some systems like android can reach here without initializing the
|
||||
* device, so check for that.
|
||||
*/
|
||||
if (dev->mode_config.num_crtc > 0 &&
|
||||
dev->mode_config.crtc_list.next != NULL &&
|
||||
dev->mode_config.crtc_list.prev != NULL &&
|
||||
dev->mode_config.num_connector > 0 &&
|
||||
dev->mode_config.connector_list.next != NULL &&
|
||||
dev->mode_config.connector_list.prev != NULL) {
|
||||
nv_drm_revoke_permission(dev, filep, 0);
|
||||
}
|
||||
}
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
|
||||
#if defined(NV_DRM_MASTER_HAS_LEASES)
|
||||
static struct drm_master *nv_drm_find_lessee(struct drm_master *master,
|
||||
int lessee_id)
|
||||
{
|
||||
int object;
|
||||
void *entry;
|
||||
|
||||
while (master->lessor != NULL) {
|
||||
master = master->lessor;
|
||||
}
|
||||
|
||||
idr_for_each_entry(&master->lessee_idr, entry, object)
|
||||
{
|
||||
if (object == lessee_id) {
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void nv_drm_get_revoked_objects(struct drm_device *dev,
|
||||
struct drm_file *filep, unsigned int cmd,
|
||||
unsigned long arg, int **objects,
|
||||
int *objects_count)
|
||||
{
|
||||
unsigned int ioc_size;
|
||||
struct drm_mode_revoke_lease revoke_lease;
|
||||
struct drm_master *lessor, *lessee;
|
||||
void *entry;
|
||||
int *objs;
|
||||
int obj, obj_count, obj_i;
|
||||
|
||||
ioc_size = _IOC_SIZE(cmd);
|
||||
if (ioc_size > sizeof(revoke_lease)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (copy_from_user(&revoke_lease, (void __user *)arg, ioc_size) != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
lessor = nv_drm_file_get_master(filep);
|
||||
if (lessor == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&dev->mode_config.idr_mutex);
|
||||
lessee = nv_drm_find_lessee(lessor, revoke_lease.lessee_id);
|
||||
|
||||
if (lessee == NULL) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
obj_count = 0;
|
||||
idr_for_each_entry(&lessee->leases, entry, obj) {
|
||||
++obj_count;
|
||||
}
|
||||
if (obj_count == 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
objs = nv_drm_calloc(obj_count, sizeof(int));
|
||||
if (objs == NULL) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
obj_i = 0;
|
||||
idr_for_each_entry(&lessee->leases, entry, obj) {
|
||||
objs[obj_i++] = obj;
|
||||
}
|
||||
*objects = objs;
|
||||
*objects_count = obj_count;
|
||||
|
||||
done:
|
||||
mutex_unlock(&dev->mode_config.idr_mutex);
|
||||
drm_master_put(&lessor);
|
||||
}
|
||||
|
||||
static bool nv_drm_is_in_objects(int object, int *objects, int objects_count)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < objects_count; ++i) {
|
||||
if (objects[i] == object) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void nv_drm_finish_revoking_objects(struct drm_device *dev,
|
||||
struct drm_file *filep, int *objects,
|
||||
int objects_count)
|
||||
{
|
||||
struct drm_connector *connector;
|
||||
struct drm_crtc *crtc;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
#endif
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
int ret = 0;
|
||||
struct drm_modeset_acquire_ctx ctx;
|
||||
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
|
||||
ret);
|
||||
#else
|
||||
mutex_lock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
if (nv_connector->modeset_permission_filep &&
|
||||
nv_drm_is_in_objects(connector->base.id, objects, objects_count)) {
|
||||
nv_drm_connector_revoke_permissions(dev, nv_connector);
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (nv_crtc->modeset_permission_filep &&
|
||||
nv_drm_is_in_objects(crtc->base.id, objects, objects_count)) {
|
||||
nv_crtc->modeset_permission_filep = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
|
||||
#else
|
||||
mutex_unlock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
}
|
||||
#endif /* NV_DRM_MASTER_HAS_LEASES */
|
||||
|
||||
#if defined(NV_DRM_BUS_PRESENT)
|
||||
|
||||
#if defined(NV_DRM_BUS_HAS_GET_IRQ)
|
||||
@@ -727,12 +1225,50 @@ static struct drm_bus nv_drm_bus = {
|
||||
|
||||
#endif /* NV_DRM_BUS_PRESENT */
|
||||
|
||||
/*
|
||||
* Wrapper around drm_ioctl to hook in to upstream ioctl.
|
||||
*
|
||||
* Currently used to add additional handling to REVOKE_LEASE.
|
||||
*/
|
||||
static long nv_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
long retcode;
|
||||
|
||||
#if defined(NV_DRM_MASTER_HAS_LEASES)
|
||||
struct drm_file *file_priv = filp->private_data;
|
||||
struct drm_device *dev = file_priv->minor->dev;
|
||||
int *objects = NULL;
|
||||
int objects_count = 0;
|
||||
|
||||
if (cmd == DRM_IOCTL_MODE_REVOKE_LEASE) {
|
||||
// Save the revoked objects before revoking.
|
||||
nv_drm_get_revoked_objects(dev, file_priv, cmd, arg, &objects,
|
||||
&objects_count);
|
||||
}
|
||||
#endif
|
||||
|
||||
retcode = drm_ioctl(filp, cmd, arg);
|
||||
|
||||
#if defined(NV_DRM_MASTER_HAS_LEASES)
|
||||
if (cmd == DRM_IOCTL_MODE_REVOKE_LEASE && objects) {
|
||||
if (retcode == 0) {
|
||||
// If revoking was successful, finish revoking the objects.
|
||||
nv_drm_finish_revoking_objects(dev, file_priv, objects,
|
||||
objects_count);
|
||||
}
|
||||
nv_drm_free(objects);
|
||||
}
|
||||
#endif
|
||||
|
||||
return retcode;
|
||||
}
|
||||
|
||||
static const struct file_operations nv_drm_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
|
||||
.open = drm_open,
|
||||
.release = drm_release,
|
||||
.unlocked_ioctl = drm_ioctl,
|
||||
.unlocked_ioctl = nv_drm_ioctl,
|
||||
#if defined(CONFIG_COMPAT)
|
||||
.compat_ioctl = drm_compat_ioctl,
|
||||
#endif
|
||||
@@ -768,11 +1304,11 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_SUPPORTED,
|
||||
nv_drm_fence_supported_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_CONTEXT_CREATE,
|
||||
nv_drm_fence_context_create_ioctl,
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_PRIME_FENCE_CONTEXT_CREATE,
|
||||
nv_drm_prime_fence_context_create_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_FENCE_ATTACH,
|
||||
nv_drm_gem_fence_attach_ioctl,
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_PRIME_FENCE_ATTACH,
|
||||
nv_drm_gem_prime_fence_attach_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
#endif
|
||||
|
||||
@@ -798,6 +1334,21 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_IDENTIFY_OBJECT,
|
||||
nv_drm_gem_identify_object_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_DMABUF_SUPPORTED,
|
||||
nv_drm_dmabuf_supported_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID,
|
||||
nv_drm_get_dpy_id_for_connector_id_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID,
|
||||
nv_drm_get_connector_id_for_dpy_id_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GRANT_PERMISSIONS,
|
||||
nv_drm_grant_permission_ioctl,
|
||||
DRM_UNLOCKED|DRM_MASTER),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_REVOKE_PERMISSIONS,
|
||||
nv_drm_revoke_permission_ioctl,
|
||||
DRM_UNLOCKED|DRM_MASTER),
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
};
|
||||
|
||||
@@ -840,6 +1391,9 @@ static struct drm_driver nv_drm_driver = {
|
||||
|
||||
.load = nv_drm_load,
|
||||
.unload = nv_drm_unload,
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
.postclose = nv_drm_postclose,
|
||||
#endif
|
||||
|
||||
.fops = &nv_drm_fops,
|
||||
|
||||
|
||||
@@ -31,17 +31,28 @@
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nvidia-drm-gem.h"
|
||||
#include "nvidia-drm-prime-fence.h"
|
||||
#include "nvidia-drm-fence.h"
|
||||
#include "nvidia-dma-resv-helper.h"
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
|
||||
#include "nvidia-dma-fence-helper.h"
|
||||
|
||||
struct nv_drm_fence_context {
|
||||
struct nv_drm_device *nv_dev;
|
||||
struct nv_drm_fence_context;
|
||||
|
||||
struct nv_drm_fence_context_ops {
|
||||
void (*destroy)(struct nv_drm_fence_context *nv_fence_context);
|
||||
};
|
||||
|
||||
struct nv_drm_fence_context {
|
||||
const struct nv_drm_fence_context_ops *ops;
|
||||
|
||||
struct nv_drm_device *nv_dev;
|
||||
uint32_t context;
|
||||
};
|
||||
|
||||
struct nv_drm_prime_fence_context {
|
||||
struct nv_drm_fence_context base;
|
||||
|
||||
NvU64 fenceSemIndex; /* Index into semaphore surface */
|
||||
|
||||
@@ -53,10 +64,10 @@ struct nv_drm_fence_context {
|
||||
spinlock_t lock;
|
||||
|
||||
/*
|
||||
* Software signaling structures. __nv_drm_fence_context_new()
|
||||
* allocates channel event and __nv_drm_fence_context_destroy() frees it.
|
||||
* There are no simultaneous read/write access to 'cb', therefore it does
|
||||
* not require spin-lock protection.
|
||||
* Software signaling structures. __nv_drm_prime_fence_context_new()
|
||||
* allocates channel event and __nv_drm_prime_fence_context_destroy() frees
|
||||
* it. There are no simultaneous read/write access to 'cb', therefore it
|
||||
* does not require spin-lock protection.
|
||||
*/
|
||||
struct NvKmsKapiChannelEvent *cb;
|
||||
|
||||
@@ -79,7 +90,7 @@ struct nv_drm_prime_fence *to_nv_drm_prime_fence(nv_dma_fence_t *fence)
|
||||
}
|
||||
|
||||
static const char*
|
||||
nv_drm_gem_prime_fence_op_get_driver_name(nv_dma_fence_t *fence)
|
||||
nv_drm_gem_fence_op_get_driver_name(nv_dma_fence_t *fence)
|
||||
{
|
||||
return "NVIDIA";
|
||||
}
|
||||
@@ -122,7 +133,7 @@ nv_drm_gem_prime_fence_op_wait(nv_dma_fence_t *fence,
|
||||
}
|
||||
|
||||
static const nv_dma_fence_ops_t nv_drm_gem_prime_fence_ops = {
|
||||
.get_driver_name = nv_drm_gem_prime_fence_op_get_driver_name,
|
||||
.get_driver_name = nv_drm_gem_fence_op_get_driver_name,
|
||||
.get_timeline_name = nv_drm_gem_prime_fence_op_get_timeline_name,
|
||||
.enable_signaling = nv_drm_gem_prime_fence_op_enable_signaling,
|
||||
.release = nv_drm_gem_prime_fence_op_release,
|
||||
@@ -138,7 +149,7 @@ __nv_drm_prime_fence_signal(struct nv_drm_prime_fence *nv_fence)
|
||||
}
|
||||
|
||||
static void nv_drm_gem_prime_force_fence_signal(
|
||||
struct nv_drm_fence_context *nv_fence_context)
|
||||
struct nv_drm_prime_fence_context *nv_fence_context)
|
||||
{
|
||||
WARN_ON(!spin_is_locked(&nv_fence_context->lock));
|
||||
|
||||
@@ -158,7 +169,7 @@ static void nv_drm_gem_prime_fence_event
|
||||
NvU32 dataU32
|
||||
)
|
||||
{
|
||||
struct nv_drm_fence_context *nv_fence_context = dataPtr;
|
||||
struct nv_drm_prime_fence_context *nv_fence_context = dataPtr;
|
||||
|
||||
spin_lock(&nv_fence_context->lock);
|
||||
|
||||
@@ -187,11 +198,53 @@ static void nv_drm_gem_prime_fence_event
|
||||
spin_unlock(&nv_fence_context->lock);
|
||||
}
|
||||
|
||||
static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
struct nv_drm_device *nv_dev,
|
||||
struct drm_nvidia_fence_context_create_params *p)
|
||||
static inline struct nv_drm_prime_fence_context*
|
||||
to_prime_fence_context(struct nv_drm_fence_context *nv_fence_context) {
|
||||
return (struct nv_drm_prime_fence_context *)nv_fence_context;
|
||||
}
|
||||
|
||||
static void __nv_drm_prime_fence_context_destroy(
|
||||
struct nv_drm_fence_context *nv_fence_context)
|
||||
{
|
||||
struct nv_drm_fence_context *nv_fence_context;
|
||||
struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context =
|
||||
to_prime_fence_context(nv_fence_context);
|
||||
|
||||
/*
|
||||
* Free channel event before destroying the fence context, otherwise event
|
||||
* callback continue to get called.
|
||||
*/
|
||||
nvKms->freeChannelEvent(nv_dev->pDevice, nv_prime_fence_context->cb);
|
||||
|
||||
/* Force signal all pending fences and empty pending list */
|
||||
spin_lock(&nv_prime_fence_context->lock);
|
||||
|
||||
nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);
|
||||
|
||||
spin_unlock(&nv_prime_fence_context->lock);
|
||||
|
||||
/* Free nvkms resources */
|
||||
|
||||
nvKms->unmapMemory(nv_dev->pDevice,
|
||||
nv_prime_fence_context->pSemSurface,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
|
||||
(void *) nv_prime_fence_context->pLinearAddress);
|
||||
|
||||
nvKms->freeMemory(nv_dev->pDevice, nv_prime_fence_context->pSemSurface);
|
||||
|
||||
nv_drm_free(nv_fence_context);
|
||||
}
|
||||
|
||||
static struct nv_drm_fence_context_ops nv_drm_prime_fence_context_ops = {
|
||||
.destroy = __nv_drm_prime_fence_context_destroy,
|
||||
};
|
||||
|
||||
static inline struct nv_drm_prime_fence_context *
|
||||
__nv_drm_prime_fence_context_new(
|
||||
struct nv_drm_device *nv_dev,
|
||||
struct drm_nvidia_prime_fence_context_create_params *p)
|
||||
{
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context;
|
||||
struct NvKmsKapiMemory *pSemSurface;
|
||||
NvU32 *pLinearAddress;
|
||||
|
||||
@@ -225,9 +278,9 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
* event for it.
|
||||
*/
|
||||
|
||||
if ((nv_fence_context = nv_drm_calloc(
|
||||
if ((nv_prime_fence_context = nv_drm_calloc(
|
||||
1,
|
||||
sizeof(*nv_fence_context))) == NULL) {
|
||||
sizeof(*nv_prime_fence_context))) == NULL) {
|
||||
goto failed_alloc_fence_context;
|
||||
}
|
||||
|
||||
@@ -236,17 +289,18 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
* to check a return value.
|
||||
*/
|
||||
|
||||
*nv_fence_context = (struct nv_drm_fence_context) {
|
||||
.nv_dev = nv_dev,
|
||||
.context = nv_dma_fence_context_alloc(1),
|
||||
*nv_prime_fence_context = (struct nv_drm_prime_fence_context) {
|
||||
.base.ops = &nv_drm_prime_fence_context_ops,
|
||||
.base.nv_dev = nv_dev,
|
||||
.base.context = nv_dma_fence_context_alloc(1),
|
||||
.pSemSurface = pSemSurface,
|
||||
.pLinearAddress = pLinearAddress,
|
||||
.fenceSemIndex = p->index,
|
||||
};
|
||||
|
||||
INIT_LIST_HEAD(&nv_fence_context->pending);
|
||||
INIT_LIST_HEAD(&nv_prime_fence_context->pending);
|
||||
|
||||
spin_lock_init(&nv_fence_context->lock);
|
||||
spin_lock_init(&nv_prime_fence_context->lock);
|
||||
|
||||
/*
|
||||
* Except 'cb', the fence context should be completely initialized
|
||||
@@ -256,22 +310,22 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
* There are no simultaneous read/write access to 'cb', therefore it does
|
||||
* not require spin-lock protection.
|
||||
*/
|
||||
nv_fence_context->cb =
|
||||
nv_prime_fence_context->cb =
|
||||
nvKms->allocateChannelEvent(nv_dev->pDevice,
|
||||
nv_drm_gem_prime_fence_event,
|
||||
nv_fence_context,
|
||||
nv_prime_fence_context,
|
||||
p->event_nvkms_params_ptr,
|
||||
p->event_nvkms_params_size);
|
||||
if (!nv_fence_context->cb) {
|
||||
if (!nv_prime_fence_context->cb) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev,
|
||||
"Failed to allocate fence signaling event");
|
||||
goto failed_to_allocate_channel_event;
|
||||
}
|
||||
|
||||
return nv_fence_context;
|
||||
return nv_prime_fence_context;
|
||||
|
||||
failed_to_allocate_channel_event:
|
||||
nv_drm_free(nv_fence_context);
|
||||
nv_drm_free(nv_prime_fence_context);
|
||||
|
||||
failed_alloc_fence_context:
|
||||
|
||||
@@ -287,38 +341,8 @@ failed:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void __nv_drm_fence_context_destroy(
|
||||
struct nv_drm_fence_context *nv_fence_context)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
|
||||
|
||||
/*
|
||||
* Free channel event before destroying the fence context, otherwise event
|
||||
* callback continue to get called.
|
||||
*/
|
||||
nvKms->freeChannelEvent(nv_dev->pDevice, nv_fence_context->cb);
|
||||
|
||||
/* Force signal all pending fences and empty pending list */
|
||||
spin_lock(&nv_fence_context->lock);
|
||||
|
||||
nv_drm_gem_prime_force_fence_signal(nv_fence_context);
|
||||
|
||||
spin_unlock(&nv_fence_context->lock);
|
||||
|
||||
/* Free nvkms resources */
|
||||
|
||||
nvKms->unmapMemory(nv_dev->pDevice,
|
||||
nv_fence_context->pSemSurface,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
|
||||
(void *) nv_fence_context->pLinearAddress);
|
||||
|
||||
nvKms->freeMemory(nv_dev->pDevice, nv_fence_context->pSemSurface);
|
||||
|
||||
nv_drm_free(nv_fence_context);
|
||||
}
|
||||
|
||||
static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
|
||||
struct nv_drm_fence_context *nv_fence_context,
|
||||
static nv_dma_fence_t *__nv_drm_prime_fence_context_create_fence(
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context,
|
||||
unsigned int seqno)
|
||||
{
|
||||
struct nv_drm_prime_fence *nv_fence;
|
||||
@@ -329,14 +353,14 @@ static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock(&nv_fence_context->lock);
|
||||
spin_lock(&nv_prime_fence_context->lock);
|
||||
|
||||
/*
|
||||
* If seqno wrapped, force signal fences to make sure none of them
|
||||
* get stuck.
|
||||
*/
|
||||
if (seqno < nv_fence_context->last_seqno) {
|
||||
nv_drm_gem_prime_force_fence_signal(nv_fence_context);
|
||||
if (seqno < nv_prime_fence_context->last_seqno) {
|
||||
nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&nv_fence->list_entry);
|
||||
@@ -344,14 +368,17 @@ static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
|
||||
spin_lock_init(&nv_fence->lock);
|
||||
|
||||
nv_dma_fence_init(&nv_fence->base, &nv_drm_gem_prime_fence_ops,
|
||||
&nv_fence->lock, nv_fence_context->context,
|
||||
&nv_fence->lock, nv_prime_fence_context->base.context,
|
||||
seqno);
|
||||
|
||||
list_add_tail(&nv_fence->list_entry, &nv_fence_context->pending);
|
||||
/* The context maintains a reference to any pending fences. */
|
||||
nv_dma_fence_get(&nv_fence->base);
|
||||
|
||||
nv_fence_context->last_seqno = seqno;
|
||||
list_add_tail(&nv_fence->list_entry, &nv_prime_fence_context->pending);
|
||||
|
||||
spin_unlock(&nv_fence_context->lock);
|
||||
nv_prime_fence_context->last_seqno = seqno;
|
||||
|
||||
spin_unlock(&nv_prime_fence_context->lock);
|
||||
|
||||
out:
|
||||
return ret != 0 ? ERR_PTR(ret) : &nv_fence->base;
|
||||
@@ -385,12 +412,15 @@ static inline struct nv_drm_gem_fence_context *to_gem_fence_context(
|
||||
* because tear down sequence calls to flush all existing
|
||||
* worker thread.
|
||||
*/
|
||||
static void __nv_drm_gem_fence_context_free(struct nv_drm_gem_object *nv_gem)
|
||||
static void
|
||||
__nv_drm_gem_fence_context_free(struct nv_drm_gem_object *nv_gem)
|
||||
{
|
||||
struct nv_drm_gem_fence_context *nv_gem_fence_context =
|
||||
to_gem_fence_context(nv_gem);
|
||||
struct nv_drm_fence_context *nv_fence_context =
|
||||
nv_gem_fence_context->nv_fence_context;
|
||||
|
||||
__nv_drm_fence_context_destroy(nv_gem_fence_context->nv_fence_context);
|
||||
nv_fence_context->ops->destroy(nv_fence_context);
|
||||
|
||||
nv_drm_free(nv_gem_fence_context);
|
||||
}
|
||||
@@ -400,7 +430,8 @@ const struct nv_drm_gem_object_funcs nv_gem_fence_context_ops = {
|
||||
};
|
||||
|
||||
static inline
|
||||
struct nv_drm_gem_fence_context *__nv_drm_gem_object_fence_context_lookup(
|
||||
struct nv_drm_gem_fence_context *
|
||||
__nv_drm_gem_object_fence_context_lookup(
|
||||
struct drm_device *dev,
|
||||
struct drm_file *filp,
|
||||
u32 handle)
|
||||
@@ -416,11 +447,13 @@ struct nv_drm_gem_fence_context *__nv_drm_gem_object_fence_context_lookup(
|
||||
return to_gem_fence_context(nv_gem);
|
||||
}
|
||||
|
||||
int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
static int
|
||||
__nv_drm_gem_fence_context_create(struct drm_device *dev,
|
||||
struct nv_drm_fence_context *nv_fence_context,
|
||||
u32 *handle,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct drm_nvidia_fence_context_create_params *p = data;
|
||||
struct nv_drm_gem_fence_context *nv_gem_fence_context = NULL;
|
||||
|
||||
if ((nv_gem_fence_context = nv_drm_calloc(
|
||||
@@ -429,10 +462,7 @@ int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
goto done;
|
||||
}
|
||||
|
||||
if ((nv_gem_fence_context->nv_fence_context =
|
||||
__nv_drm_fence_context_new(nv_dev, p)) == NULL) {
|
||||
goto fence_context_new_failed;
|
||||
}
|
||||
nv_gem_fence_context->nv_fence_context = nv_fence_context;
|
||||
|
||||
nv_drm_gem_object_init(nv_dev,
|
||||
&nv_gem_fence_context->base,
|
||||
@@ -442,21 +472,45 @@ int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
|
||||
return nv_drm_gem_handle_create_drop_reference(filep,
|
||||
&nv_gem_fence_context->base,
|
||||
&p->handle);
|
||||
|
||||
fence_context_new_failed:
|
||||
nv_drm_free(nv_gem_fence_context);
|
||||
handle);
|
||||
|
||||
done:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct drm_nvidia_prime_fence_context_create_params *p = data;
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context =
|
||||
__nv_drm_prime_fence_context_new(nv_dev, p);
|
||||
int err;
|
||||
|
||||
if (!nv_prime_fence_context) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = __nv_drm_gem_fence_context_create(dev,
|
||||
&nv_prime_fence_context->base,
|
||||
&p->handle,
|
||||
filep);
|
||||
if (err) {
|
||||
__nv_drm_prime_fence_context_destroy(&nv_prime_fence_context->base);
|
||||
}
|
||||
|
||||
return err;
|
||||
|
||||
done:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct drm_nvidia_gem_fence_attach_params *p = data;
|
||||
struct drm_nvidia_gem_prime_fence_attach_params *p = data;
|
||||
|
||||
struct nv_drm_gem_object *nv_gem;
|
||||
struct nv_drm_gem_fence_context *nv_gem_fence_context;
|
||||
@@ -487,9 +541,22 @@ int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
goto fence_context_lookup_failed;
|
||||
}
|
||||
|
||||
if (IS_ERR(fence = __nv_drm_fence_context_create_fence(
|
||||
nv_gem_fence_context->nv_fence_context,
|
||||
p->sem_thresh))) {
|
||||
if (nv_gem_fence_context->nv_fence_context->ops !=
|
||||
&nv_drm_prime_fence_context_ops) {
|
||||
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Wrong fence context type: 0x%08x",
|
||||
p->fence_context_handle);
|
||||
|
||||
goto fence_context_create_fence_failed;
|
||||
}
|
||||
|
||||
fence = __nv_drm_prime_fence_context_create_fence(
|
||||
to_prime_fence_context(nv_gem_fence_context->nv_fence_context),
|
||||
p->sem_thresh);
|
||||
|
||||
if (IS_ERR(fence)) {
|
||||
ret = PTR_ERR(fence);
|
||||
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
@@ -512,6 +579,9 @@ int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
|
||||
nv_dma_resv_unlock(&nv_gem->resv);
|
||||
|
||||
/* dma_resv_add_excl_fence takes its own reference to the fence. */
|
||||
nv_dma_fence_put(fence);
|
||||
|
||||
fence_context_create_fence_failed:
|
||||
nv_drm_gem_object_unreference_unlocked(&nv_gem_fence_context->base);
|
||||
|
||||
@@ -35,11 +35,11 @@ struct drm_device;
|
||||
int nv_drm_fence_supported_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
|
||||
int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
|
||||
int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
|
||||
#endif /* NV_DRM_FENCE_AVAILABLE */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -40,9 +40,16 @@ static const u32 nvkms_to_drm_format[] = {
|
||||
[NvKmsSurfaceMemoryFormatR5G6B5] = DRM_FORMAT_RGB565,
|
||||
[NvKmsSurfaceMemoryFormatA8R8G8B8] = DRM_FORMAT_ARGB8888,
|
||||
[NvKmsSurfaceMemoryFormatX8R8G8B8] = DRM_FORMAT_XRGB8888,
|
||||
[NvKmsSurfaceMemoryFormatX8B8G8R8] = DRM_FORMAT_XBGR8888,
|
||||
[NvKmsSurfaceMemoryFormatA2B10G10R10] = DRM_FORMAT_ABGR2101010,
|
||||
[NvKmsSurfaceMemoryFormatX2B10G10R10] = DRM_FORMAT_XBGR2101010,
|
||||
[NvKmsSurfaceMemoryFormatA8B8G8R8] = DRM_FORMAT_ABGR8888,
|
||||
#if defined(DRM_FORMAT_ABGR16161616F)
|
||||
[NvKmsSurfaceMemoryFormatRF16GF16BF16AF16] = DRM_FORMAT_ABGR16161616F,
|
||||
#endif
|
||||
#if defined(DRM_FORMAT_XBGR16161616F)
|
||||
[NvKmsSurfaceMemoryFormatRF16GF16BF16XF16] = DRM_FORMAT_XBGR16161616F,
|
||||
#endif
|
||||
|
||||
[NvKmsSurfaceMemoryFormatY8_U8__Y8_V8_N422] = DRM_FORMAT_YUYV,
|
||||
[NvKmsSurfaceMemoryFormatU8_Y8__V8_Y8_N422] = DRM_FORMAT_UYVY,
|
||||
|
||||
@@ -95,7 +95,7 @@ static vm_fault_t __nv_drm_gem_nvkms_handle_vma_fault(
|
||||
pfn >>= PAGE_SHIFT;
|
||||
pfn += page_offset;
|
||||
} else {
|
||||
BUG_ON(page_offset > nv_nvkms_memory->pages_count);
|
||||
BUG_ON(page_offset >= nv_nvkms_memory->pages_count);
|
||||
pfn = page_to_pfn(nv_nvkms_memory->pages[page_offset]);
|
||||
}
|
||||
|
||||
@@ -201,7 +201,7 @@ static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
|
||||
nv_dev,
|
||||
"Cannot create sg_table for NvKmsKapiMemory 0x%p",
|
||||
nv_gem->pMemory);
|
||||
return NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
sg_table = nv_drm_prime_pages_to_sg(nv_dev->dev,
|
||||
@@ -285,11 +285,13 @@ int nv_drm_dumb_create(
|
||||
if (nv_dev->hasVideoMemory) {
|
||||
pMemory = nvKms->allocateVideoMemory(nv_dev->pDevice,
|
||||
NvKmsSurfaceMemoryLayoutPitch,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
|
||||
args->size,
|
||||
&compressible);
|
||||
} else {
|
||||
pMemory = nvKms->allocateSystemMemory(nv_dev->pDevice,
|
||||
NvKmsSurfaceMemoryLayoutPitch,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
|
||||
args->size,
|
||||
&compressible);
|
||||
}
|
||||
@@ -441,6 +443,7 @@ int nv_drm_gem_alloc_nvkms_memory_ioctl(struct drm_device *dev,
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory = NULL;
|
||||
struct NvKmsKapiMemory *pMemory;
|
||||
enum NvKmsSurfaceMemoryLayout layout;
|
||||
enum NvKmsKapiAllocationType type;
|
||||
int ret = 0;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
@@ -449,6 +452,7 @@ int nv_drm_gem_alloc_nvkms_memory_ioctl(struct drm_device *dev,
|
||||
}
|
||||
|
||||
if (p->__pad != 0) {
|
||||
ret = -EINVAL;
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "non-zero value in padding field");
|
||||
goto failed;
|
||||
}
|
||||
@@ -461,15 +465,19 @@ int nv_drm_gem_alloc_nvkms_memory_ioctl(struct drm_device *dev,
|
||||
|
||||
layout = p->block_linear ?
|
||||
NvKmsSurfaceMemoryLayoutBlockLinear : NvKmsSurfaceMemoryLayoutPitch;
|
||||
type = (p->flags & NV_GEM_ALLOC_NO_SCANOUT) ?
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_OFFSCREEN : NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT;
|
||||
|
||||
if (nv_dev->hasVideoMemory) {
|
||||
pMemory = nvKms->allocateVideoMemory(nv_dev->pDevice,
|
||||
layout,
|
||||
type,
|
||||
p->memory_size,
|
||||
&p->compressible);
|
||||
} else {
|
||||
pMemory = nvKms->allocateSystemMemory(nv_dev->pDevice,
|
||||
layout,
|
||||
type,
|
||||
p->memory_size,
|
||||
&p->compressible);
|
||||
}
|
||||
|
||||
@@ -92,9 +92,9 @@ static int __nv_drm_gem_user_memory_mmap(struct nv_drm_gem_object *nv_gem,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
vma->vm_flags &= ~VM_PFNMAP;
|
||||
vma->vm_flags &= ~VM_IO;
|
||||
vma->vm_flags |= VM_MIXEDMAP;
|
||||
nv_vm_flags_clear(vma, VM_PFNMAP);
|
||||
nv_vm_flags_clear(vma, VM_IO);
|
||||
nv_vm_flags_set(vma, VM_MIXEDMAP);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -112,8 +112,7 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
|
||||
|
||||
page_offset = vmf->pgoff - drm_vma_node_start(&gem->vma_node);
|
||||
|
||||
BUG_ON(page_offset > nv_user_memory->pages_count);
|
||||
|
||||
BUG_ON(page_offset >= nv_user_memory->pages_count);
|
||||
ret = vm_insert_page(vma, address, nv_user_memory->pages[page_offset]);
|
||||
switch (ret) {
|
||||
case 0:
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nvidia-drm-prime-fence.h"
|
||||
#include "nvidia-drm-fence.h"
|
||||
#include "nvidia-drm-gem.h"
|
||||
#include "nvidia-drm-gem-nvkms-memory.h"
|
||||
#include "nvidia-drm-gem-user-memory.h"
|
||||
@@ -299,7 +299,7 @@ int nv_drm_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
vma->vm_flags &= ~VM_MAYWRITE;
|
||||
nv_vm_flags_clear(vma, VM_MAYWRITE);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -28,6 +28,8 @@
|
||||
*/
|
||||
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-crtc.h"
|
||||
|
||||
#include "nvmisc.h"
|
||||
|
||||
@@ -148,6 +150,18 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
|
||||
goto free;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
nv_drm_for_each_plane(plane, dev) {
|
||||
plane_state = drm_atomic_get_plane_state(state, plane);
|
||||
if (IS_ERR(plane_state)) {
|
||||
ret = PTR_ERR(plane_state);
|
||||
goto free;
|
||||
}
|
||||
|
||||
plane_state->rotation = DRM_MODE_ROTATE_0;
|
||||
}
|
||||
#endif
|
||||
|
||||
nv_drm_for_each_connector_in_state(state, conn, conn_state, i) {
|
||||
ret = drm_atomic_set_crtc_for_connector(conn_state, NULL);
|
||||
if (ret < 0)
|
||||
|
||||
@@ -35,6 +35,35 @@
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE) || defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_ROTATE_* , DRM_REFLECT_* */
|
||||
#include <drm/drm_blend.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* */
|
||||
#include <uapi/drm/drm_mode.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/*
|
||||
* 19-05-2017 c2c446ad29437bb92b157423c632286608ebd3ec has added
|
||||
* DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* to UAPI and removed
|
||||
* DRM_ROTATE_* and DRM_REFLECT_*
|
||||
*/
|
||||
#if !defined(DRM_MODE_ROTATE_0)
|
||||
#define DRM_MODE_ROTATE_0 DRM_ROTATE_0
|
||||
#define DRM_MODE_ROTATE_90 DRM_ROTATE_90
|
||||
#define DRM_MODE_ROTATE_180 DRM_ROTATE_180
|
||||
#define DRM_MODE_ROTATE_270 DRM_ROTATE_270
|
||||
#define DRM_MODE_REFLECT_X DRM_REFLECT_X
|
||||
#define DRM_MODE_REFLECT_Y DRM_REFLECT_Y
|
||||
#define DRM_MODE_ROTATE_MASK DRM_ROTATE_MASK
|
||||
#define DRM_MODE_REFLECT_MASK DRM_REFLECT_MASK
|
||||
#endif
|
||||
|
||||
#endif //NV_DRM_ROTATION_AVAILABLE
|
||||
|
||||
/*
|
||||
* drm_dev_put() is added by commit 9a96f55034e41b4e002b767e9218d55f03bdff7d
|
||||
* (2017-09-26) and drm_dev_unref() is removed by
|
||||
@@ -277,11 +306,33 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
|
||||
for_each_plane_in_state(__state, plane, plane_state, __i)
|
||||
#endif
|
||||
|
||||
static inline struct drm_crtc *nv_drm_crtc_find(struct drm_device *dev,
|
||||
uint32_t id)
|
||||
static inline struct drm_connector *
|
||||
nv_drm_connector_lookup(struct drm_device *dev, struct drm_file *filep,
|
||||
uint32_t id)
|
||||
{
|
||||
#if !defined(NV_DRM_CONNECTOR_LOOKUP_PRESENT)
|
||||
return drm_connector_find(dev, id);
|
||||
#elif defined(NV_DRM_MODE_OBJECT_FIND_HAS_FILE_PRIV_ARG)
|
||||
return drm_connector_lookup(dev, filep, id);
|
||||
#else
|
||||
return drm_connector_lookup(dev, id);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_drm_connector_put(struct drm_connector *connector)
|
||||
{
|
||||
#if defined(NV_DRM_CONNECTOR_PUT_PRESENT)
|
||||
drm_connector_put(connector);
|
||||
#elif defined(NV_DRM_CONNECTOR_LOOKUP_PRESENT)
|
||||
drm_connector_unreference(connector);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct drm_crtc *
|
||||
nv_drm_crtc_find(struct drm_device *dev, struct drm_file *filep, uint32_t id)
|
||||
{
|
||||
#if defined(NV_DRM_MODE_OBJECT_FIND_HAS_FILE_PRIV_ARG)
|
||||
return drm_crtc_find(dev, NULL /* file_priv */, id);
|
||||
return drm_crtc_find(dev, filep, id);
|
||||
#else
|
||||
return drm_crtc_find(dev, id);
|
||||
#endif
|
||||
@@ -297,6 +348,30 @@ static inline struct drm_encoder *nv_drm_encoder_find(struct drm_device *dev,
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_DRM_AUTH_H_PRESENT)
|
||||
#include <drm/drm_auth.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_FILE_H_PRESENT)
|
||||
#include <drm/drm_file.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* drm_file_get_master() added by commit 56f0729a510f ("drm: protect drm_master
|
||||
* pointers in drm_lease.c") in v5.15 (2021-07-20)
|
||||
*/
|
||||
static inline struct drm_master *nv_drm_file_get_master(struct drm_file *filep)
|
||||
{
|
||||
#if defined(NV_DRM_FILE_GET_MASTER_PRESENT)
|
||||
return drm_file_get_master(filep);
|
||||
#else
|
||||
if (filep->master) {
|
||||
return drm_master_get(filep->master);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* drm_connector_for_each_possible_encoder() is added by commit
|
||||
* 83aefbb887b59df0b3520965c3701e01deacfc52 which was Signed-off-by:
|
||||
|
||||
@@ -34,8 +34,8 @@
|
||||
#define DRM_NVIDIA_GEM_IMPORT_USERSPACE_MEMORY 0x02
|
||||
#define DRM_NVIDIA_GET_DEV_INFO 0x03
|
||||
#define DRM_NVIDIA_FENCE_SUPPORTED 0x04
|
||||
#define DRM_NVIDIA_FENCE_CONTEXT_CREATE 0x05
|
||||
#define DRM_NVIDIA_GEM_FENCE_ATTACH 0x06
|
||||
#define DRM_NVIDIA_PRIME_FENCE_CONTEXT_CREATE 0x05
|
||||
#define DRM_NVIDIA_GEM_PRIME_FENCE_ATTACH 0x06
|
||||
#define DRM_NVIDIA_GET_CLIENT_CAPABILITY 0x08
|
||||
#define DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY 0x09
|
||||
#define DRM_NVIDIA_GEM_MAP_OFFSET 0x0a
|
||||
@@ -43,6 +43,11 @@
|
||||
#define DRM_NVIDIA_GET_CRTC_CRC32_V2 0x0c
|
||||
#define DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY 0x0d
|
||||
#define DRM_NVIDIA_GEM_IDENTIFY_OBJECT 0x0e
|
||||
#define DRM_NVIDIA_DMABUF_SUPPORTED 0x0f
|
||||
#define DRM_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID 0x10
|
||||
#define DRM_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID 0x11
|
||||
#define DRM_NVIDIA_GRANT_PERMISSIONS 0x12
|
||||
#define DRM_NVIDIA_REVOKE_PERMISSIONS 0x13
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_IMPORT_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IMPORT_NVKMS_MEMORY), \
|
||||
@@ -65,50 +70,69 @@
|
||||
#if defined(NV_LINUX)
|
||||
#define DRM_IOCTL_NVIDIA_FENCE_SUPPORTED \
|
||||
DRM_IO(DRM_COMMAND_BASE + DRM_NVIDIA_FENCE_SUPPORTED)
|
||||
#define DRM_IOCTL_NVIDIA_DMABUF_SUPPORTED \
|
||||
DRM_IO(DRM_COMMAND_BASE + DRM_NVIDIA_DMABUF_SUPPORTED)
|
||||
#else
|
||||
#define DRM_IOCTL_NVIDIA_FENCE_SUPPORTED 0
|
||||
#define DRM_IOCTL_NVIDIA_DMABUF_SUPPORTED 0
|
||||
#endif
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_FENCE_CONTEXT_CREATE \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_FENCE_CONTEXT_CREATE), \
|
||||
struct drm_nvidia_fence_context_create_params)
|
||||
#define DRM_IOCTL_NVIDIA_PRIME_FENCE_CONTEXT_CREATE \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_PRIME_FENCE_CONTEXT_CREATE),\
|
||||
struct drm_nvidia_prime_fence_context_create_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_FENCE_ATTACH \
|
||||
DRM_IOW((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_FENCE_ATTACH), \
|
||||
struct drm_nvidia_gem_fence_attach_params)
|
||||
#define DRM_IOCTL_NVIDIA_GEM_PRIME_FENCE_ATTACH \
|
||||
DRM_IOW((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_PRIME_FENCE_ATTACH), \
|
||||
struct drm_nvidia_gem_prime_fence_attach_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_CLIENT_CAPABILITY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CLIENT_CAPABILITY), \
|
||||
#define DRM_IOCTL_NVIDIA_GET_CLIENT_CAPABILITY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CLIENT_CAPABILITY), \
|
||||
struct drm_nvidia_get_client_capability_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32 \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32), \
|
||||
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32 \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32), \
|
||||
struct drm_nvidia_get_crtc_crc32_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32_V2 \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32_V2), \
|
||||
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32_V2 \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32_V2), \
|
||||
struct drm_nvidia_get_crtc_crc32_v2_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY), \
|
||||
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY), \
|
||||
struct drm_nvidia_gem_export_nvkms_memory_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_MAP_OFFSET \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_MAP_OFFSET), \
|
||||
#define DRM_IOCTL_NVIDIA_GEM_MAP_OFFSET \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_MAP_OFFSET), \
|
||||
struct drm_nvidia_gem_map_offset_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_ALLOC_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_ALLOC_NVKMS_MEMORY), \
|
||||
#define DRM_IOCTL_NVIDIA_GEM_ALLOC_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_ALLOC_NVKMS_MEMORY), \
|
||||
struct drm_nvidia_gem_alloc_nvkms_memory_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_DMABUF_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY), \
|
||||
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_DMABUF_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY), \
|
||||
struct drm_nvidia_gem_export_dmabuf_memory_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_IDENTIFY_OBJECT \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IDENTIFY_OBJECT), \
|
||||
#define DRM_IOCTL_NVIDIA_GEM_IDENTIFY_OBJECT \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IDENTIFY_OBJECT), \
|
||||
struct drm_nvidia_gem_identify_object_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID),\
|
||||
struct drm_nvidia_get_dpy_id_for_connector_id_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID),\
|
||||
struct drm_nvidia_get_connector_id_for_dpy_id_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GRANT_PERMISSIONS), \
|
||||
struct drm_nvidia_grant_permissions_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_REVOKE_PERMISSIONS \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_REVOKE_PERMISSIONS), \
|
||||
struct drm_nvidia_revoke_permissions_params)
|
||||
|
||||
struct drm_nvidia_gem_import_nvkms_memory_params {
|
||||
uint64_t mem_size; /* IN */
|
||||
|
||||
@@ -136,7 +160,7 @@ struct drm_nvidia_get_dev_info_params {
|
||||
uint32_t sector_layout; /* OUT */
|
||||
};
|
||||
|
||||
struct drm_nvidia_fence_context_create_params {
|
||||
struct drm_nvidia_prime_fence_context_create_params {
|
||||
uint32_t handle; /* OUT GEM handle to fence context */
|
||||
|
||||
uint32_t index; /* IN Index of semaphore to use for fencing */
|
||||
@@ -151,7 +175,7 @@ struct drm_nvidia_fence_context_create_params {
|
||||
uint64_t event_nvkms_params_size; /* IN */
|
||||
};
|
||||
|
||||
struct drm_nvidia_gem_fence_attach_params {
|
||||
struct drm_nvidia_gem_prime_fence_attach_params {
|
||||
uint32_t handle; /* IN GEM handle to attach fence to */
|
||||
uint32_t fence_context_handle; /* IN GEM handle to fence context on which fence is run on */
|
||||
uint32_t sem_thresh; /* IN Semaphore value to reach before signal */
|
||||
@@ -199,6 +223,8 @@ struct drm_nvidia_gem_map_offset_params {
|
||||
uint64_t offset; /* OUT Fake offset */
|
||||
};
|
||||
|
||||
#define NV_GEM_ALLOC_NO_SCANOUT (1 << 0)
|
||||
|
||||
struct drm_nvidia_gem_alloc_nvkms_memory_params {
|
||||
uint32_t handle; /* OUT */
|
||||
uint8_t block_linear; /* IN */
|
||||
@@ -206,6 +232,7 @@ struct drm_nvidia_gem_alloc_nvkms_memory_params {
|
||||
uint16_t __pad;
|
||||
|
||||
uint64_t memory_size; /* IN */
|
||||
uint32_t flags; /* IN */
|
||||
};
|
||||
|
||||
struct drm_nvidia_gem_export_dmabuf_memory_params {
|
||||
@@ -229,4 +256,23 @@ struct drm_nvidia_gem_identify_object_params {
|
||||
drm_nvidia_gem_object_type object_type; /* OUT GEM object type */
|
||||
};
|
||||
|
||||
struct drm_nvidia_get_dpy_id_for_connector_id_params {
|
||||
uint32_t connectorId; /* IN */
|
||||
uint32_t dpyId; /* OUT */
|
||||
};
|
||||
|
||||
struct drm_nvidia_get_connector_id_for_dpy_id_params {
|
||||
uint32_t dpyId; /* IN */
|
||||
uint32_t connectorId; /* OUT */
|
||||
};
|
||||
|
||||
struct drm_nvidia_grant_permissions_params {
|
||||
int32_t fd; /* IN */
|
||||
uint32_t dpyId; /* IN */
|
||||
};
|
||||
|
||||
struct drm_nvidia_revoke_permissions_params {
|
||||
uint32_t dpyId; /* IN */
|
||||
};
|
||||
|
||||
#endif /* _UAPI_NVIDIA_DRM_IOCTL_H_ */
|
||||
|
||||
@@ -47,6 +47,14 @@ module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);
|
||||
|
||||
void *nv_drm_calloc(size_t nmemb, size_t size)
|
||||
{
|
||||
size_t total_size = nmemb * size;
|
||||
//
|
||||
// Check for overflow.
|
||||
//
|
||||
if ((nmemb != 0) && ((total_size / nmemb) != size))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
return kzalloc(nmemb * size, GFP_KERNEL);
|
||||
}
|
||||
|
||||
@@ -93,8 +101,6 @@ int nv_drm_lock_user_pages(unsigned long address,
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct page **user_pages;
|
||||
const int write = 1;
|
||||
const int force = 0;
|
||||
int pages_pinned;
|
||||
|
||||
user_pages = nv_drm_calloc(pages_count, sizeof(*user_pages));
|
||||
@@ -105,7 +111,7 @@ int nv_drm_lock_user_pages(unsigned long address,
|
||||
|
||||
nv_mmap_read_lock(mm);
|
||||
|
||||
pages_pinned = NV_GET_USER_PAGES(address, pages_count, write, force,
|
||||
pages_pinned = NV_PIN_USER_PAGES(address, pages_count, FOLL_WRITE,
|
||||
user_pages, NULL);
|
||||
nv_mmap_read_unlock(mm);
|
||||
|
||||
@@ -123,7 +129,7 @@ failed:
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pages_pinned; i++) {
|
||||
put_page(user_pages[i]);
|
||||
NV_UNPIN_USER_PAGE(user_pages[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -138,8 +144,7 @@ void nv_drm_unlock_user_pages(unsigned long pages_count, struct page **pages)
|
||||
|
||||
for (i = 0; i < pages_count; i++) {
|
||||
set_page_dirty_lock(pages[i]);
|
||||
|
||||
put_page(pages[i]);
|
||||
NV_UNPIN_USER_PAGE(pages[i]);
|
||||
}
|
||||
|
||||
nv_drm_free(pages);
|
||||
@@ -174,16 +179,7 @@ static void __exit nv_linux_drm_exit(void)
|
||||
module_init(nv_linux_drm_init);
|
||||
module_exit(nv_linux_drm_exit);
|
||||
|
||||
#if defined(MODULE_LICENSE)
|
||||
|
||||
MODULE_LICENSE("Dual MIT/GPL");
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
#if defined(MODULE_INFO)
|
||||
MODULE_INFO(supported, "external");
|
||||
#endif
|
||||
#if defined(MODULE_VERSION)
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
#endif
|
||||
MODULE_INFO(supported, "external");
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
|
||||
@@ -93,9 +93,6 @@ static bool __will_generate_flip_event(struct drm_crtc *crtc,
|
||||
to_nv_crtc_state(new_crtc_state);
|
||||
struct drm_plane_state *old_plane_state = NULL;
|
||||
struct drm_plane *plane = NULL;
|
||||
struct drm_plane *primary_plane = crtc->primary;
|
||||
bool primary_event = false;
|
||||
bool overlay_event = false;
|
||||
int i;
|
||||
|
||||
if (!old_crtc_state->active && !new_crtc_state->active) {
|
||||
@@ -134,16 +131,19 @@ static int __nv_drm_put_back_post_fence_fd(
|
||||
const struct NvKmsKapiLayerReplyConfig *layer_reply_config)
|
||||
{
|
||||
int fd = layer_reply_config->postSyncptFd;
|
||||
int ret = 0;
|
||||
|
||||
if ((fd >= 0) && (plane_state->fd_user_ptr != NULL)) {
|
||||
if (put_user(fd, plane_state->fd_user_ptr)) {
|
||||
return -EFAULT;
|
||||
ret = copy_to_user(plane_state->fd_user_ptr, &fd, sizeof(fd));
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*! set back to Null and let set_property specify it again */
|
||||
plane_state->fd_user_ptr = NULL;
|
||||
}
|
||||
return 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __nv_drm_get_syncpt_data(
|
||||
@@ -274,6 +274,9 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
|
||||
|
||||
nv_new_crtc_state->nv_flip = NULL;
|
||||
}
|
||||
#if defined(NV_DRM_CRTC_STATE_HAS_VRR_ENABLED)
|
||||
requested_config->headRequestedConfig[nv_crtc->head].modeSetConfig.vrrEnabled = new_crtc_state->vrr_enabled;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -122,6 +122,11 @@ struct nv_drm_device {
|
||||
NvBool supportsSyncpts;
|
||||
|
||||
struct drm_property *nv_out_fence_property;
|
||||
struct drm_property *nv_input_colorspace_property;
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct drm_property *nv_hdr_output_metadata_property;
|
||||
#endif
|
||||
|
||||
struct nv_drm_device *next;
|
||||
};
|
||||
|
||||
@@ -16,7 +16,7 @@ NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-connector.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fb.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-modeset.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-prime-fence.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fence.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-linux.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-helper.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nv-pci-table.c
|
||||
@@ -59,11 +59,14 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages_remote
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages_remote
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_lookup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_state_ref_counting
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_driver_has_gem_prime_res_obj
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_connector_dpms
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_funcs_have_mode_in_name
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_has_vrr_capable_property
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_framebuffer_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
|
||||
@@ -100,6 +103,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_has_resv
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_async_flip
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_pageflip_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_vrr_enabled
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_modifiers_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_node_is_allowed_has_tag_arg
|
||||
@@ -115,6 +119,14 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_plane_atomic_check_has_atomic_state_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_device_has_pdev
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_no_vblank
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_config_has_allow_fb_modifiers
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_has_hdr_output_metadata
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_add_fence
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_reserve_fences
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fences_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_has_leases
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_lookup
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
|
||||
@@ -169,7 +169,6 @@ void nv_kthread_q_stop(nv_kthread_q_t *q)
|
||||
//
|
||||
// This function is never invoked when there is no NUMA preference (preferred
|
||||
// node is NUMA_NO_NODE).
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
nv_kthread_q_t *q,
|
||||
int preferred_node,
|
||||
@@ -217,7 +216,6 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
return thread[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
|
||||
{
|
||||
@@ -231,11 +229,7 @@ int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferr
|
||||
q->q_kthread = kthread_create(_main_loop, q, q_name);
|
||||
}
|
||||
else {
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
|
||||
#else
|
||||
return -ENOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (IS_ERR(q->q_kthread)) {
|
||||
|
||||
@@ -34,6 +34,9 @@
|
||||
#include <linux/file.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/freezer.h>
|
||||
|
||||
#include <acpi/video.h>
|
||||
|
||||
#include "nvstatus.h"
|
||||
|
||||
@@ -59,7 +62,7 @@
|
||||
|
||||
#define NVKMS_LOG_PREFIX "nvidia-modeset: "
|
||||
|
||||
static bool output_rounding_fix = false;
|
||||
static bool output_rounding_fix = true;
|
||||
module_param_named(output_rounding_fix, output_rounding_fix, bool, 0400);
|
||||
|
||||
/* These parameters are used for fault injection tests. Normally the defaults
|
||||
@@ -72,8 +75,26 @@ MODULE_PARM_DESC(malloc_verbose, "Report information about malloc calls on modul
|
||||
static bool malloc_verbose = false;
|
||||
module_param_named(malloc_verbose, malloc_verbose, bool, 0400);
|
||||
|
||||
/* This parameter is used to find the dpy override conf file */
|
||||
#define NVKMS_CONF_FILE_SPECIFIED (nvkms_conf != NULL)
|
||||
|
||||
MODULE_PARM_DESC(config_file,
|
||||
"Path to the nvidia-modeset configuration file "
|
||||
"(default: disabled)");
|
||||
static char *nvkms_conf = NULL;
|
||||
module_param_named(config_file, nvkms_conf, charp, 0400);
|
||||
|
||||
static atomic_t nvkms_alloc_called_count;
|
||||
|
||||
static bool force_api_to_hw_head_identity_mapping = false;
|
||||
module_param_named(force_api_to_hw_head_identity_mapping,
|
||||
force_api_to_hw_head_identity_mapping, bool, 0400);
|
||||
|
||||
NvBool nvkms_force_api_to_hw_head_identity_mappings(void)
|
||||
{
|
||||
return force_api_to_hw_head_identity_mapping;
|
||||
}
|
||||
|
||||
NvBool nvkms_output_rounding_fix(void)
|
||||
{
|
||||
return output_rounding_fix;
|
||||
@@ -85,110 +106,6 @@ NvBool nvkms_output_rounding_fix(void)
|
||||
* NVKMS interface for nvhost unit for sync point APIs.
|
||||
*************************************************************************/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef NVKMS_SYNCPT_STUBS_NEEDED
|
||||
/* Unsupported STUB for nvkms_syncpt APIs */
|
||||
NvBool nvkms_syncpt_op(
|
||||
@@ -284,7 +201,10 @@ static inline int nvkms_read_trylock_pm_lock(void)
|
||||
|
||||
static inline void nvkms_read_lock_pm_lock(void)
|
||||
{
|
||||
down_read(&nvkms_pm_lock);
|
||||
while (!down_read_trylock(&nvkms_pm_lock)) {
|
||||
try_to_freeze();
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void nvkms_read_unlock_pm_lock(void)
|
||||
@@ -1060,6 +980,12 @@ nvkms_register_backlight(NvU32 gpu_id, NvU32 display_id, void *drv_priv,
|
||||
struct nvkms_backlight_device *nvkms_bd = NULL;
|
||||
int i;
|
||||
|
||||
#if defined(NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE)
|
||||
if (!acpi_video_backlight_use_native()) {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
gpu_info = nvkms_alloc(NV_MAX_GPUS * sizeof(*gpu_info), NV_TRUE);
|
||||
if (gpu_info == NULL) {
|
||||
return NULL;
|
||||
@@ -1182,7 +1108,7 @@ failed:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void nvkms_close_common(struct nvkms_per_open *popen)
|
||||
void nvkms_close_pm_locked(struct nvkms_per_open *popen)
|
||||
{
|
||||
/*
|
||||
* Don't use down_interruptible(): we need to free resources
|
||||
@@ -1220,13 +1146,13 @@ void nvkms_close_common(struct nvkms_per_open *popen)
|
||||
nvkms_free(popen, sizeof(*popen));
|
||||
}
|
||||
|
||||
static void nvkms_close_deferred(void *data)
|
||||
static void nvkms_close_pm_unlocked(void *data)
|
||||
{
|
||||
struct nvkms_per_open *popen = data;
|
||||
|
||||
nvkms_read_lock_pm_lock();
|
||||
|
||||
nvkms_close_common(popen);
|
||||
nvkms_close_pm_locked(popen);
|
||||
|
||||
nvkms_read_unlock_pm_lock();
|
||||
}
|
||||
@@ -1234,11 +1160,11 @@ static void nvkms_close_deferred(void *data)
|
||||
static void nvkms_close_popen(struct nvkms_per_open *popen)
|
||||
{
|
||||
if (nvkms_read_trylock_pm_lock() == 0) {
|
||||
nvkms_close_common(popen);
|
||||
nvkms_close_pm_locked(popen);
|
||||
nvkms_read_unlock_pm_lock();
|
||||
} else {
|
||||
nv_kthread_q_item_init(&popen->deferred_close_q_item,
|
||||
nvkms_close_deferred,
|
||||
nvkms_close_pm_unlocked,
|
||||
popen);
|
||||
nvkms_queue_work(&nvkms_deferred_close_kthread_q,
|
||||
&popen->deferred_close_q_item);
|
||||
@@ -1291,7 +1217,7 @@ struct nvkms_per_open* nvkms_open_from_kapi
|
||||
|
||||
void nvkms_close_from_kapi(struct nvkms_per_open *popen)
|
||||
{
|
||||
nvkms_close_popen(popen);
|
||||
nvkms_close_pm_unlocked(popen);
|
||||
}
|
||||
|
||||
NvBool nvkms_ioctl_from_kapi
|
||||
@@ -1450,30 +1376,119 @@ static void nvkms_proc_exit(void)
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
proc_remove(nvkms_proc_dir);
|
||||
#else
|
||||
/*
|
||||
* On kernel versions without proc_remove(), we need to explicitly
|
||||
* remove each proc file beneath nvkms_proc_dir.
|
||||
* nvkms_proc_init() only creates files directly under
|
||||
* nvkms_proc_dir, so those are the only files we need to remove
|
||||
* here: warn if there is any deeper directory nesting.
|
||||
*/
|
||||
{
|
||||
struct proc_dir_entry *entry = nvkms_proc_dir->subdir;
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
}
|
||||
|
||||
while (entry != NULL) {
|
||||
struct proc_dir_entry *next = entry->next;
|
||||
WARN_ON(entry->subdir != NULL);
|
||||
remove_proc_entry(entry->name, entry->parent);
|
||||
entry = next;
|
||||
}
|
||||
/*************************************************************************
|
||||
* NVKMS Config File Read
|
||||
************************************************************************/
|
||||
static NvBool nvkms_fs_mounted(void)
|
||||
{
|
||||
return current->fs != NULL;
|
||||
}
|
||||
|
||||
static size_t nvkms_config_file_open
|
||||
(
|
||||
char *fname,
|
||||
char ** const buff
|
||||
)
|
||||
{
|
||||
int i = 0;
|
||||
struct file *file;
|
||||
struct inode *file_inode;
|
||||
size_t file_size = 0;
|
||||
size_t read_size = 0;
|
||||
#if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
|
||||
loff_t pos = 0;
|
||||
#endif
|
||||
|
||||
if (!nvkms_fs_mounted()) {
|
||||
printk(KERN_ERR NVKMS_LOG_PREFIX "ERROR: Filesystems not mounted\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
remove_proc_entry(nvkms_proc_dir->name, nvkms_proc_dir->parent);
|
||||
#endif /* NV_PROC_REMOVE_PRESENT */
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
file = filp_open(fname, O_RDONLY, 0);
|
||||
if (file == NULL || IS_ERR(file)) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Failed to open %s\n",
|
||||
fname);
|
||||
return 0;
|
||||
}
|
||||
|
||||
file_inode = file->f_inode;
|
||||
if (file_inode == NULL || IS_ERR(file_inode)) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Inode is invalid\n");
|
||||
goto done;
|
||||
}
|
||||
file_size = file_inode->i_size;
|
||||
if (file_size > NVKMS_READ_FILE_MAX_SIZE) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: File exceeds maximum size\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
*buff = nvkms_alloc(file_size, NV_FALSE);
|
||||
if (*buff == NULL) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Out of memory\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: Once we have access to GPL symbols, this can be replaced with
|
||||
* kernel_read_file for kernels >= 4.6
|
||||
*/
|
||||
while ((read_size < file_size) && (i++ < NVKMS_READ_FILE_MAX_LOOPS)) {
|
||||
#if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
|
||||
ssize_t ret = kernel_read(file, *buff + read_size,
|
||||
file_size - read_size, &pos);
|
||||
#else
|
||||
ssize_t ret = kernel_read(file, read_size,
|
||||
*buff + read_size,
|
||||
file_size - read_size);
|
||||
#endif
|
||||
if (ret <= 0) {
|
||||
break;
|
||||
}
|
||||
read_size += ret;
|
||||
}
|
||||
|
||||
if (read_size != file_size) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Failed to read %s\n",
|
||||
fname);
|
||||
goto done;
|
||||
}
|
||||
|
||||
filp_close(file, current->files);
|
||||
return file_size;
|
||||
|
||||
done:
|
||||
nvkms_free(*buff, file_size);
|
||||
filp_close(file, current->files);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* must be called with nvkms_lock locked */
|
||||
static void nvkms_read_config_file_locked(void)
|
||||
{
|
||||
char *buffer = NULL;
|
||||
size_t buf_size = 0;
|
||||
|
||||
/* only read the config file if the kernel parameter is set */
|
||||
if (!NVKMS_CONF_FILE_SPECIFIED) {
|
||||
return;
|
||||
}
|
||||
|
||||
buf_size = nvkms_config_file_open(nvkms_conf, &buffer);
|
||||
|
||||
if (buf_size == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (nvKmsReadConf(buffer, buf_size, nvkms_config_file_open)) {
|
||||
printk(KERN_INFO NVKMS_LOG_PREFIX "Successfully read %s\n",
|
||||
nvkms_conf);
|
||||
}
|
||||
|
||||
nvkms_free(buffer, buf_size);
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
@@ -1647,10 +1662,12 @@ static int __init nvkms_init(void)
|
||||
if (!nvKmsModuleLoad()) {
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
up(&nvkms_lock);
|
||||
if (ret != 0) {
|
||||
up(&nvkms_lock);
|
||||
goto fail_module_load;
|
||||
}
|
||||
nvkms_read_config_file_locked();
|
||||
up(&nvkms_lock);
|
||||
|
||||
nvkms_proc_init();
|
||||
|
||||
@@ -1734,16 +1751,7 @@ restart:
|
||||
module_init(nvkms_init);
|
||||
module_exit(nvkms_exit);
|
||||
|
||||
#if defined(MODULE_LICENSE)
|
||||
|
||||
MODULE_LICENSE("Dual MIT/GPL");
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
#if defined(MODULE_INFO)
|
||||
MODULE_INFO(supported, "external");
|
||||
#endif
|
||||
#if defined(MODULE_VERSION)
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
#endif
|
||||
MODULE_INFO(supported, "external");
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
|
||||
@@ -43,14 +43,9 @@ enum NvKmsSyncPtOp {
|
||||
NVKMS_SYNCPT_OP_ALLOC,
|
||||
NVKMS_SYNCPT_OP_GET,
|
||||
NVKMS_SYNCPT_OP_PUT,
|
||||
NVKMS_SYNCPT_OP_INCR_MAX,
|
||||
NVKMS_SYNCPT_OP_CPU_INCR,
|
||||
NVKMS_SYNCPT_OP_FD_TO_ID_AND_THRESH,
|
||||
NVKMS_SYNCPT_OP_ID_AND_THRESH_TO_FD,
|
||||
NVKMS_SYNCPT_OP_READ_MINVAL,
|
||||
NVKMS_SYNCPT_OP_READ_MAXVAL,
|
||||
NVKMS_SYNCPT_OP_SET_MIN_EQ_MAX,
|
||||
NVKMS_SYNCPT_OP_SET_MAXVAL,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
@@ -60,24 +55,10 @@ typedef struct {
|
||||
NvU32 id; /* out */
|
||||
} alloc;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} get;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} put;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 incr; /* in */
|
||||
NvU32 value; /* out */
|
||||
} incr_max;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} cpu_incr;
|
||||
|
||||
struct {
|
||||
NvS32 fd; /* in */
|
||||
NvU32 id; /* out */
|
||||
@@ -94,22 +75,9 @@ typedef struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 minval; /* out */
|
||||
} read_minval;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 maxval; /* out */
|
||||
} read_maxval;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} set_min_eq_max;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 val; /* in */
|
||||
} set_maxval;
|
||||
} NvKmsSyncPtOpParams;
|
||||
|
||||
NvBool nvkms_force_api_to_hw_head_identity_mappings(void);
|
||||
NvBool nvkms_output_rounding_fix(void);
|
||||
|
||||
void nvkms_call_rm (void *ops);
|
||||
|
||||
@@ -85,15 +85,11 @@ $(obj)/$(NVIDIA_MODESET_INTERFACE): $(addprefix $(obj)/,$(NVIDIA_MODESET_OBJECTS
|
||||
|
||||
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_MODESET_OBJECTS)
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_kthread_create_on_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
|
||||
|
||||
@@ -42,6 +42,20 @@ typedef void nvkms_procfs_proc_t(void *data,
|
||||
char *buffer, size_t size,
|
||||
nvkms_procfs_out_string_func_t *outString);
|
||||
|
||||
/* max number of loops to prevent hanging the kernel if an edge case is hit */
|
||||
#define NVKMS_READ_FILE_MAX_LOOPS 1000
|
||||
/* max size for any file read by the config system */
|
||||
#define NVKMS_READ_FILE_MAX_SIZE 8192
|
||||
|
||||
/*
|
||||
* The read file callback should allocate a buffer pointed to by *buff, fill it
|
||||
* with the contents of fname, and return the size of the buffer. Buffer is not
|
||||
* guaranteed to be null-terminated. The caller is responsible for freeing the
|
||||
* buffer with nvkms_free, not nvFree.
|
||||
*/
|
||||
typedef size_t nvkms_config_read_file_func_t(char *fname,
|
||||
char ** const buff);
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
nvkms_procfs_proc_t *func;
|
||||
@@ -74,6 +88,9 @@ void nvKmsResume(NvU32 gpuId);
|
||||
|
||||
void nvKmsGetProcFiles(const nvkms_procfs_file_t **ppProcFiles);
|
||||
|
||||
NvBool nvKmsReadConf(const char *buff, size_t size,
|
||||
nvkms_config_read_file_func_t readfile);
|
||||
|
||||
void nvKmsKapiHandleEventQueueChange
|
||||
(
|
||||
struct NvKmsKapiDevice *device
|
||||
|
||||
@@ -177,11 +177,7 @@ struct nvidia_p2p_page_table {
|
||||
* This means the pages underlying the range of GPU virtual memory
|
||||
* will persist until explicitly freed by nvidia_p2p_put_pages().
|
||||
* Persistent GPU memory mappings are not supported on PowerPC,
|
||||
|
||||
|
||||
|
||||
* MIG-enabled devices and vGPU.
|
||||
|
||||
* @param[in] data
|
||||
* A non-NULL opaque pointer to private data to be passed to the
|
||||
* callback function.
|
||||
|
||||
@@ -30,8 +30,18 @@ NVIDIA_PEERMEM_CFLAGS += -UDEBUG -U_DEBUG -DNDEBUG -DNV_BUILD_MODULE_INSTANCES=0
|
||||
# MOFED's Module.symvers is needed for the build
|
||||
# to find the additional ib_* symbols.
|
||||
#
|
||||
# Also, MOFED doesn't use kbuild ARCH names.
|
||||
# So adapt OFA_ARCH to match MOFED's conventions.
|
||||
#
|
||||
ifeq ($(ARCH), arm64)
|
||||
OFA_ARCH := aarch64
|
||||
else ifeq ($(ARCH), powerpc)
|
||||
OFA_ARCH := ppc64le
|
||||
else
|
||||
OFA_ARCH := $(ARCH)
|
||||
endif
|
||||
OFA_DIR := /usr/src/ofa_kernel
|
||||
OFA_CANDIDATES = $(OFA_DIR)/$(ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
|
||||
OFA_CANDIDATES = $(OFA_DIR)/$(OFA_ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
|
||||
MLNX_OFED_KERNEL := $(shell for d in $(OFA_CANDIDATES); do \
|
||||
if [ -d "$$d" ]; then \
|
||||
echo "$$d"; \
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2021-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -116,6 +116,14 @@ extern "C" {
|
||||
#define NVA16F_GP_ENTRY1_LEVEL_MAIN 0x00000000
|
||||
#define NVA16F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
|
||||
#define NVA16F_GP_ENTRY1_LENGTH 30:10
|
||||
#define NVA16F_GP_ENTRY1_SYNC 31:31
|
||||
#define NVA16F_GP_ENTRY1_SYNC_PROCEED 0x00000000
|
||||
#define NVA16F_GP_ENTRY1_SYNC_WAIT 0x00000001
|
||||
#define NVA16F_GP_ENTRY1_OPCODE 7:0
|
||||
#define NVA16F_GP_ENTRY1_OPCODE_NOP 0x00000000
|
||||
#define NVA16F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
|
||||
#define NVA16F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
|
||||
#define NVA16F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
|
||||
/* dma method formats */
|
||||
#define NVA16F_DMA_METHOD_ADDRESS 11:0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016 NVIDIA Corporation
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -69,6 +69,9 @@ extern "C" {
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE 25:25
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
|
||||
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
|
||||
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
|
||||
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
|
||||
|
||||
375
kernel-open/nvidia-uvm/clc86f.h
Normal file
375
kernel-open/nvidia-uvm/clc86f.h
Normal file
@@ -0,0 +1,375 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2012-2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#ifndef _clc86f_h_
|
||||
#define _clc86f_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
/* class HOPPER_CHANNEL_GPFIFO */
|
||||
/*
|
||||
* Documentation for HOPPER_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
|
||||
* chapter "User Control Registers". It is documented as device NV_UDMA.
|
||||
* The GPFIFO format itself is also documented in dev_pbdma.ref,
|
||||
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
|
||||
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
|
||||
*
|
||||
* Note there is no .mfs file for this class.
|
||||
*/
|
||||
#define HOPPER_CHANNEL_GPFIFO_A (0x0000C86F)
|
||||
|
||||
#define NVC86F_TYPEDEF HOPPER_CHANNELChannelGPFifoA
|
||||
|
||||
/* dma flow control data structure */
|
||||
typedef volatile struct Nvc86fControl_struct {
|
||||
NvU32 Ignored00[0x010]; /* 0000-003f*/
|
||||
NvU32 Put; /* put offset, read/write 0040-0043*/
|
||||
NvU32 Get; /* get offset, read only 0044-0047*/
|
||||
NvU32 Reference; /* reference value, read only 0048-004b*/
|
||||
NvU32 PutHi; /* high order put offset bits 004c-004f*/
|
||||
NvU32 Ignored01[0x002]; /* 0050-0057*/
|
||||
NvU32 TopLevelGet; /* top level get offset, read only 0058-005b*/
|
||||
NvU32 TopLevelGetHi; /* high order top level get bits 005c-005f*/
|
||||
NvU32 GetHi; /* high order get offset bits 0060-0063*/
|
||||
NvU32 Ignored02[0x007]; /* 0064-007f*/
|
||||
NvU32 Ignored03; /* used to be engine yield 0080-0083*/
|
||||
NvU32 Ignored04[0x001]; /* 0084-0087*/
|
||||
NvU32 GPGet; /* GP FIFO get offset, read only 0088-008b*/
|
||||
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
|
||||
NvU32 Ignored05[0x5c];
|
||||
} Nvc86fControl, HopperAControlGPFifo;
|
||||
|
||||
/* fields and values */
|
||||
#define NVC86F_NUMBER_OF_SUBCHANNELS (8)
|
||||
#define NVC86F_SET_OBJECT (0x00000000)
|
||||
#define NVC86F_SET_OBJECT_NVCLASS 15:0
|
||||
#define NVC86F_SET_OBJECT_ENGINE 20:16
|
||||
#define NVC86F_SET_OBJECT_ENGINE_SW 0x0000001f
|
||||
#define NVC86F_ILLEGAL (0x00000004)
|
||||
#define NVC86F_ILLEGAL_HANDLE 31:0
|
||||
#define NVC86F_NOP (0x00000008)
|
||||
#define NVC86F_NOP_HANDLE 31:0
|
||||
#define NVC86F_SEMAPHOREA (0x00000010)
|
||||
#define NVC86F_SEMAPHOREA_OFFSET_UPPER 7:0
|
||||
#define NVC86F_SEMAPHOREB (0x00000014)
|
||||
#define NVC86F_SEMAPHOREB_OFFSET_LOWER 31:2
|
||||
#define NVC86F_SEMAPHOREC (0x00000018)
|
||||
#define NVC86F_SEMAPHOREC_PAYLOAD 31:0
|
||||
#define NVC86F_SEMAPHORED (0x0000001C)
|
||||
#define NVC86F_SEMAPHORED_OPERATION 4:0
|
||||
#define NVC86F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
|
||||
#define NVC86F_SEMAPHORED_OPERATION_RELEASE 0x00000002
|
||||
#define NVC86F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
|
||||
#define NVC86F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
|
||||
#define NVC86F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
|
||||
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH 12:12
|
||||
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
|
||||
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
|
||||
#define NVC86F_SEMAPHORED_RELEASE_WFI 20:20
|
||||
#define NVC86F_SEMAPHORED_RELEASE_WFI_EN 0x00000000
|
||||
#define NVC86F_SEMAPHORED_RELEASE_WFI_DIS 0x00000001
|
||||
#define NVC86F_SEMAPHORED_RELEASE_SIZE 24:24
|
||||
#define NVC86F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
|
||||
#define NVC86F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
|
||||
#define NVC86F_SEMAPHORED_REDUCTION 30:27
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_MIN 0x00000000
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_MAX 0x00000001
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_XOR 0x00000002
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_AND 0x00000003
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_OR 0x00000004
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_ADD 0x00000005
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_INC 0x00000006
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_DEC 0x00000007
|
||||
#define NVC86F_SEMAPHORED_FORMAT 31:31
|
||||
#define NVC86F_SEMAPHORED_FORMAT_SIGNED 0x00000000
|
||||
#define NVC86F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
|
||||
#define NVC86F_NON_STALL_INTERRUPT (0x00000020)
|
||||
#define NVC86F_NON_STALL_INTERRUPT_HANDLE 31:0
|
||||
#define NVC86F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
|
||||
#define NVC86F_FB_FLUSH_HANDLE 31:0
|
||||
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
|
||||
// specifying the page address for a targeted TLB invalidate and the uTLB for
|
||||
// a targeted REPLAY_CANCEL for UVM.
|
||||
// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
|
||||
// rearranged fields.
|
||||
#define NVC86F_MEM_OP_A (0x00000028)
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID 5:0 // only relevant for REPLAY_CANCEL_TARGETED
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE 5:0 // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID 10:6 // only relevant for REPLAY_CANCEL_TARGETED
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE 7:6 // only relevant for invalidates with NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE for invalidating link TLB only, or non-link TLB only or all TLBs
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_ALL_TLBS 0
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_LINK_TLBS 1
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_NON_LINK_TLBS 2
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_RSVRVD 3
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID 8:0 // only relevant for REPLAY_CANCEL_VA_GLOBAL
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR 11:11
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN 0x00000001
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS 0x00000000
|
||||
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO 31:12
|
||||
#define NVC86F_MEM_OP_B (0x0000002c)
|
||||
#define NVC86F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI 31:0
|
||||
#define NVC86F_MEM_OP_C (0x00000030)
|
||||
#define NVC86F_MEM_OP_C_MEMBAR_TYPE 2:0
|
||||
#define NVC86F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR 0x00000000
|
||||
#define NVC86F_MEM_OP_C_MEMBAR_TYPE_MEMBAR 0x00000001
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB 0:0
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE 0x00000000
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL 0x00000001 // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_GPC 1:1
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE 0x00000000
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE 0x00000001
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY 4:2 // only relevant if GPC ENABLE
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE 0x00000000
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START 0x00000001
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL 0x00000002
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED 0x00000003
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL 0x00000004
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL 0x00000005
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE 6:5 // only relevant if GPC ENABLE
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE 0x00000000
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY 0x00000001
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE 0x00000002
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE 9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ 0
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE 1
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG 2
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD 3
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK 4
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL 5
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC 6
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL 7
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL 9:7 // Invalidate affects this level and all below
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL 0x00000000 // Invalidate tlb caches at all levels of the page table
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY 0x00000001
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0 0x00000002
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1 0x00000003
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2 0x00000004
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 0x00000005
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 0x00000006
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5 0x00000007
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE 11:10 // only relevant if PDB_ONE
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM 0x00000000
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT 0x00000002
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
|
||||
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO 31:12 // only relevant if PDB_ONE
|
||||
#define NVC86F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG 19:0
|
||||
// MEM_OP_D MUST be preceded by MEM_OPs A-C.
|
||||
#define NVC86F_MEM_OP_D (0x00000034)
|
||||
#define NVC86F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI 26:0 // only relevant if PDB_ONE
|
||||
#define NVC86F_MEM_OP_D_OPERATION 31:27
|
||||
#define NVC86F_MEM_OP_D_OPERATION_MEMBAR 0x00000005
|
||||
#define NVC86F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009
|
||||
#define NVC86F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED 0x0000000a
|
||||
#define NVC86F_MEM_OP_D_OPERATION_MMU_OPERATION 0x0000000b
|
||||
#define NVC86F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
|
||||
#define NVC86F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
|
||||
// CLEAN_LINES is an alias for Tegra/GPU IP usage
|
||||
#define NVC86F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
|
||||
#define NVC86F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
|
||||
#define NVC86F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
|
||||
#define NVC86F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015
|
||||
#define NVC86F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR 0x00000016
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE 1:0
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC 0x00000000
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC 0x00000001
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL 0x00000002
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED 0x00000003
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE 2:2
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC 0x00000000
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC 0x00000001
|
||||
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK 6:3
|
||||
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE 23:20
|
||||
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE_RESERVED 0x00000000
|
||||
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE_VIDMEM_ACCESS_BIT_DUMP 0x00000001
|
||||
#define NVC86F_SET_REFERENCE (0x00000050)
|
||||
#define NVC86F_SET_REFERENCE_COUNT 31:0
|
||||
#define NVC86F_SEM_ADDR_LO (0x0000005c)
|
||||
#define NVC86F_SEM_ADDR_LO_OFFSET 31:2
|
||||
#define NVC86F_SEM_ADDR_HI (0x00000060)
|
||||
#define NVC86F_SEM_ADDR_HI_OFFSET 24:0
|
||||
#define NVC86F_SEM_PAYLOAD_LO (0x00000064)
|
||||
#define NVC86F_SEM_PAYLOAD_LO_PAYLOAD 31:0
|
||||
#define NVC86F_SEM_PAYLOAD_HI (0x00000068)
|
||||
#define NVC86F_SEM_PAYLOAD_HI_PAYLOAD 31:0
|
||||
#define NVC86F_SEM_EXECUTE (0x0000006c)
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION 2:0
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ 0x00000002
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ 0x00000003
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_AND 0x00000004
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_NOR 0x00000005
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_REDUCTION 0x00000006
|
||||
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG 12:12
|
||||
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_WFI 20:20
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_WFI_EN 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
|
||||
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP 25:25
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION 30:27
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IMIN 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IMAX 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IXOR 0x00000002
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IAND 0x00000003
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IOR 0x00000004
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IADD 0x00000005
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_INC 0x00000006
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_DEC 0x00000007
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT 31:31
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED 0x00000001
|
||||
#define NVC86F_WFI (0x00000078)
|
||||
#define NVC86F_WFI_SCOPE 0:0
|
||||
#define NVC86F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
|
||||
#define NVC86F_WFI_SCOPE_CURRENT_VEID 0x00000000
|
||||
#define NVC86F_WFI_SCOPE_ALL 0x00000001
|
||||
#define NVC86F_YIELD (0x00000080)
|
||||
#define NVC86F_YIELD_OP 1:0
|
||||
#define NVC86F_YIELD_OP_NOP 0x00000000
|
||||
#define NVC86F_YIELD_OP_TSG 0x00000003
|
||||
#define NVC86F_CLEAR_FAULTED (0x00000084)
|
||||
// Note: RM provides the HANDLE as an opaque value; the internal detail fields
|
||||
// are intentionally not exposed to the driver through these defines.
|
||||
#define NVC86F_CLEAR_FAULTED_HANDLE 30:0
|
||||
#define NVC86F_CLEAR_FAULTED_TYPE 31:31
|
||||
#define NVC86F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
|
||||
#define NVC86F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
|
||||
#define NVC86F_QUADRO_VERIFY (0x000000a0)
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVC86F_GP_ENTRY__SIZE 8
|
||||
#define NVC86F_GP_ENTRY0_FETCH 0:0
|
||||
#define NVC86F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
|
||||
#define NVC86F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
|
||||
#define NVC86F_GP_ENTRY0_GET 31:2
|
||||
#define NVC86F_GP_ENTRY0_OPERAND 31:0
|
||||
#define NVC86F_GP_ENTRY0_PB_EXTENDED_BASE_OPERAND 24:8
|
||||
#define NVC86F_GP_ENTRY1_GET_HI 7:0
|
||||
#define NVC86F_GP_ENTRY1_LEVEL 9:9
|
||||
#define NVC86F_GP_ENTRY1_LEVEL_MAIN 0x00000000
|
||||
#define NVC86F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
|
||||
#define NVC86F_GP_ENTRY1_LENGTH 30:10
|
||||
#define NVC86F_GP_ENTRY1_SYNC 31:31
|
||||
#define NVC86F_GP_ENTRY1_SYNC_PROCEED 0x00000000
|
||||
#define NVC86F_GP_ENTRY1_SYNC_WAIT 0x00000001
|
||||
#define NVC86F_GP_ENTRY1_OPCODE 7:0
|
||||
#define NVC86F_GP_ENTRY1_OPCODE_NOP 0x00000000
|
||||
#define NVC86F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
|
||||
#define NVC86F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
|
||||
#define NVC86F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
#define NVC86F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE 0x00000004
|
||||
|
||||
/* dma method formats */
|
||||
#define NVC86F_DMA_METHOD_ADDRESS_OLD 12:2
|
||||
#define NVC86F_DMA_METHOD_ADDRESS 11:0
|
||||
#define NVC86F_DMA_SUBDEVICE_MASK 15:4
|
||||
#define NVC86F_DMA_METHOD_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_TERT_OP 17:16
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
|
||||
#define NVC86F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
|
||||
#define NVC86F_DMA_METHOD_COUNT_OLD 28:18
|
||||
#define NVC86F_DMA_METHOD_COUNT 28:16
|
||||
#define NVC86F_DMA_IMMD_DATA 28:16
|
||||
#define NVC86F_DMA_SEC_OP 31:29
|
||||
#define NVC86F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
|
||||
#define NVC86F_DMA_SEC_OP_INC_METHOD (0x00000001)
|
||||
#define NVC86F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
|
||||
#define NVC86F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
|
||||
#define NVC86F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
|
||||
#define NVC86F_DMA_SEC_OP_ONE_INC (0x00000005)
|
||||
#define NVC86F_DMA_SEC_OP_RESERVED6 (0x00000006)
|
||||
#define NVC86F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
|
||||
/* dma incrementing method format */
|
||||
#define NVC86F_DMA_INCR_ADDRESS 11:0
|
||||
#define NVC86F_DMA_INCR_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_INCR_COUNT 28:16
|
||||
#define NVC86F_DMA_INCR_OPCODE 31:29
|
||||
#define NVC86F_DMA_INCR_OPCODE_VALUE (0x00000001)
|
||||
#define NVC86F_DMA_INCR_DATA 31:0
|
||||
/* dma non-incrementing method format */
|
||||
#define NVC86F_DMA_NONINCR_ADDRESS 11:0
|
||||
#define NVC86F_DMA_NONINCR_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_NONINCR_COUNT 28:16
|
||||
#define NVC86F_DMA_NONINCR_OPCODE 31:29
|
||||
#define NVC86F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
|
||||
#define NVC86F_DMA_NONINCR_DATA 31:0
|
||||
/* dma increment-once method format */
|
||||
#define NVC86F_DMA_ONEINCR_ADDRESS 11:0
|
||||
#define NVC86F_DMA_ONEINCR_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_ONEINCR_COUNT 28:16
|
||||
#define NVC86F_DMA_ONEINCR_OPCODE 31:29
|
||||
#define NVC86F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
|
||||
#define NVC86F_DMA_ONEINCR_DATA 31:0
|
||||
/* dma no-operation format */
|
||||
#define NVC86F_DMA_NOP (0x00000000)
|
||||
/* dma immediate-data format */
|
||||
#define NVC86F_DMA_IMMD_ADDRESS 11:0
|
||||
#define NVC86F_DMA_IMMD_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_IMMD_DATA 28:16
|
||||
#define NVC86F_DMA_IMMD_OPCODE 31:29
|
||||
#define NVC86F_DMA_IMMD_OPCODE_VALUE (0x00000004)
|
||||
/* dma set sub-device mask format */
|
||||
#define NVC86F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC86F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC86F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
|
||||
/* dma store sub-device mask format */
|
||||
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
|
||||
/* dma use sub-device mask format */
|
||||
#define NVC86F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC86F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
|
||||
/* dma end-segment format */
|
||||
#define NVC86F_DMA_ENDSEG_OPCODE 31:29
|
||||
#define NVC86F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
|
||||
/* dma legacy incrementing/non-incrementing formats */
|
||||
#define NVC86F_DMA_ADDRESS 12:2
|
||||
#define NVC86F_DMA_SUBCH 15:13
|
||||
#define NVC86F_DMA_OPCODE3 17:16
|
||||
#define NVC86F_DMA_OPCODE3_NONE (0x00000000)
|
||||
#define NVC86F_DMA_COUNT 28:18
|
||||
#define NVC86F_DMA_OPCODE 31:29
|
||||
#define NVC86F_DMA_OPCODE_METHOD (0x00000000)
|
||||
#define NVC86F_DMA_OPCODE_NONINC_METHOD (0x00000002)
|
||||
#define NVC86F_DMA_DATA 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* _clc86f_h_ */
|
||||
430
kernel-open/nvidia-uvm/clc8b5.h
Normal file
430
kernel-open/nvidia-uvm/clc8b5.h
Normal file
@@ -0,0 +1,430 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 1993-2004 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#ifndef _clc8b5_h_
|
||||
#define _clc8b5_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define HOPPER_DMA_COPY_A (0x0000C8B5)
|
||||
|
||||
typedef volatile struct _clc8b5_tag0 {
|
||||
NvV32 Reserved00[0x40];
|
||||
NvV32 Nop; // 0x00000100 - 0x00000103
|
||||
NvV32 Reserved01[0xF];
|
||||
NvV32 PmTrigger; // 0x00000140 - 0x00000143
|
||||
NvV32 Reserved02[0x36];
|
||||
NvV32 SetMonitoredFenceType; // 0x0000021C - 0x0000021F
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseUpper; // 0x00000220 - 0x00000223
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseLower; // 0x00000224 - 0x00000227
|
||||
NvV32 Reserved03[0x6];
|
||||
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
|
||||
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
|
||||
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
|
||||
NvV32 SetSemaphorePayloadUpper; // 0x0000024C - 0x0000024F
|
||||
NvV32 Reserved04[0x1];
|
||||
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
|
||||
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
|
||||
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
|
||||
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
|
||||
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
|
||||
NvV32 Reserved05[0x26];
|
||||
NvV32 LaunchDma; // 0x00000300 - 0x00000303
|
||||
NvV32 Reserved06[0x3F];
|
||||
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
|
||||
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
|
||||
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
|
||||
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
|
||||
NvV32 PitchIn; // 0x00000410 - 0x00000413
|
||||
NvV32 PitchOut; // 0x00000414 - 0x00000417
|
||||
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
|
||||
NvV32 LineCount; // 0x0000041C - 0x0000041F
|
||||
NvV32 Reserved07[0x38];
|
||||
NvV32 SetSecureCopyMode; // 0x00000500 - 0x00000503
|
||||
NvV32 SetDecryptIv0; // 0x00000504 - 0x00000507
|
||||
NvV32 SetDecryptIv1; // 0x00000508 - 0x0000050B
|
||||
NvV32 SetDecryptIv2; // 0x0000050C - 0x0000050F
|
||||
NvV32 Reserved_SetAESCounter; // 0x00000510 - 0x00000513
|
||||
NvV32 SetDecryptAuthTagCompareAddrUpper; // 0x00000514 - 0x00000517
|
||||
NvV32 SetDecryptAuthTagCompareAddrLower; // 0x00000518 - 0x0000051B
|
||||
NvV32 Reserved08[0x5];
|
||||
NvV32 SetEncryptAuthTagAddrUpper; // 0x00000530 - 0x00000533
|
||||
NvV32 SetEncryptAuthTagAddrLower; // 0x00000534 - 0x00000537
|
||||
NvV32 SetEncryptIvAddrUpper; // 0x00000538 - 0x0000053B
|
||||
NvV32 SetEncryptIvAddrLower; // 0x0000053C - 0x0000053F
|
||||
NvV32 Reserved09[0x6F];
|
||||
NvV32 SetMemoryScrubParameters; // 0x000006FC - 0x000006FF
|
||||
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
|
||||
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
|
||||
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
|
||||
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
|
||||
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
|
||||
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
|
||||
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
|
||||
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
|
||||
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
|
||||
NvV32 Reserved10[0x1];
|
||||
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
|
||||
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
|
||||
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
|
||||
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
|
||||
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
|
||||
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
|
||||
NvV32 Reserved11[0x1];
|
||||
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
|
||||
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
|
||||
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
|
||||
NvV32 DstOriginY; // 0x00000750 - 0x00000753
|
||||
NvV32 Reserved12[0x270];
|
||||
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
|
||||
NvV32 Reserved13[0x3BA];
|
||||
} hopper_dma_copy_aControlPio;
|
||||
|
||||
#define NVC8B5_NOP (0x00000100)
|
||||
#define NVC8B5_NOP_PARAMETER 31:0
|
||||
#define NVC8B5_PM_TRIGGER (0x00000140)
|
||||
#define NVC8B5_PM_TRIGGER_V 31:0
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE (0x0000021C)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE 0:0
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE (0x00000000)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE_EXT (0x00000001)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER (0x00000220)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER_UPPER 24:0
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER (0x00000224)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER_LOWER 31:0
|
||||
#define NVC8B5_SET_SEMAPHORE_A (0x00000240)
|
||||
#define NVC8B5_SET_SEMAPHORE_A_UPPER 24:0
|
||||
#define NVC8B5_SET_SEMAPHORE_B (0x00000244)
|
||||
#define NVC8B5_SET_SEMAPHORE_B_LOWER 31:0
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_UPPER (0x0000024C)
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD 31:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_A (0x00000254)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_A_UPPER 24:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_B (0x00000258)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_B_LOWER 31:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C (0x0000025C)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE 2:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE (0x00000260)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET 1:0
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_FLA 9:9
|
||||
#define NVC8B5_SET_DST_PHYS_MODE (0x00000264)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET 1:0
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_FLA 9:9
|
||||
#define NVC8B5_LAUNCH_DMA (0x00000300)
|
||||
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_TYPE 25:25
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_NO_TIMESTAMP (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_WITH_TIMESTAMP (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE (0x00000003)
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
|
||||
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
|
||||
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE 10:10
|
||||
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE 11:11
|
||||
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_TYPE 12:12
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_TYPE 13:13
|
||||
#define NVC8B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDA (0x00000008)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDB (0x00000009)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMIN (0x0000000B)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMAX (0x0000000C)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDC (0x0000000D)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDD (0x0000000E)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDE (0x0000000F)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE 21:20
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_PROT2PROT (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_DEFAULT (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_SECURE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_NONPROT2NONPROT (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_RESERVED (0x00000003)
|
||||
#define NVC8B5_LAUNCH_DMA_VPRMODE 22:22
|
||||
#define NVC8B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE 23:23
|
||||
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC 26:26
|
||||
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE 27:27
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_ONE_WORD (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_TWO_WORD (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
|
||||
#define NVC8B5_OFFSET_IN_UPPER (0x00000400)
|
||||
#define NVC8B5_OFFSET_IN_UPPER_UPPER 24:0
|
||||
#define NVC8B5_OFFSET_IN_LOWER (0x00000404)
|
||||
#define NVC8B5_OFFSET_IN_LOWER_VALUE 31:0
|
||||
#define NVC8B5_OFFSET_OUT_UPPER (0x00000408)
|
||||
#define NVC8B5_OFFSET_OUT_UPPER_UPPER 24:0
|
||||
#define NVC8B5_OFFSET_OUT_LOWER (0x0000040C)
|
||||
#define NVC8B5_OFFSET_OUT_LOWER_VALUE 31:0
|
||||
#define NVC8B5_PITCH_IN (0x00000410)
|
||||
#define NVC8B5_PITCH_IN_VALUE 31:0
|
||||
#define NVC8B5_PITCH_OUT (0x00000414)
|
||||
#define NVC8B5_PITCH_OUT_VALUE 31:0
|
||||
#define NVC8B5_LINE_LENGTH_IN (0x00000418)
|
||||
#define NVC8B5_LINE_LENGTH_IN_VALUE 31:0
|
||||
#define NVC8B5_LINE_COUNT (0x0000041C)
|
||||
#define NVC8B5_LINE_COUNT_VALUE 31:0
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE (0x00000500)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_MODE 0:0
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_MODE_ENCRYPT (0x00000000)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_MODE_DECRYPT (0x00000001)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET 20:19
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_PEER_ID 23:21
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_FLA 24:24
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET 26:25
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_PEER_ID 29:27
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_FLA 30:30
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_END_OF_COPY 31:31
|
||||
#define NVC8B5_SET_DECRYPT_IV0 (0x00000504)
|
||||
#define NVC8B5_SET_DECRYPT_IV0_VALUE 31:0
|
||||
#define NVC8B5_SET_DECRYPT_IV1 (0x00000508)
|
||||
#define NVC8B5_SET_DECRYPT_IV1_VALUE 31:0
|
||||
#define NVC8B5_SET_DECRYPT_IV2 (0x0000050C)
|
||||
#define NVC8B5_SET_DECRYPT_IV2_VALUE 31:0
|
||||
#define NVC8B5_RESERVED_SET_AESCOUNTER (0x00000510)
|
||||
#define NVC8B5_RESERVED_SET_AESCOUNTER_VALUE 31:0
|
||||
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER (0x00000514)
|
||||
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER (0x00000518)
|
||||
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC8B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER (0x00000530)
|
||||
#define NVC8B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC8B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER (0x00000534)
|
||||
#define NVC8B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC8B5_SET_ENCRYPT_IV_ADDR_UPPER (0x00000538)
|
||||
#define NVC8B5_SET_ENCRYPT_IV_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC8B5_SET_ENCRYPT_IV_ADDR_LOWER (0x0000053C)
|
||||
#define NVC8B5_SET_ENCRYPT_IV_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS (0x000006FC)
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE 0:0
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_CONST_A (0x00000700)
|
||||
#define NVC8B5_SET_REMAP_CONST_A_V 31:0
|
||||
#define NVC8B5_SET_REMAP_CONST_B (0x00000704)
|
||||
#define NVC8B5_SET_REMAP_CONST_B_V 31:0
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS (0x00000708)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X 2:0
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y 6:4
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z 10:8
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W 14:12
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE (0x0000070C)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC8B5_SET_DST_WIDTH (0x00000710)
|
||||
#define NVC8B5_SET_DST_WIDTH_V 31:0
|
||||
#define NVC8B5_SET_DST_HEIGHT (0x00000714)
|
||||
#define NVC8B5_SET_DST_HEIGHT_V 31:0
|
||||
#define NVC8B5_SET_DST_DEPTH (0x00000718)
|
||||
#define NVC8B5_SET_DST_DEPTH_V 31:0
|
||||
#define NVC8B5_SET_DST_LAYER (0x0000071C)
|
||||
#define NVC8B5_SET_DST_LAYER_V 31:0
|
||||
#define NVC8B5_SET_DST_ORIGIN (0x00000720)
|
||||
#define NVC8B5_SET_DST_ORIGIN_X 15:0
|
||||
#define NVC8B5_SET_DST_ORIGIN_Y 31:16
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE (0x00000728)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC8B5_SET_SRC_WIDTH (0x0000072C)
|
||||
#define NVC8B5_SET_SRC_WIDTH_V 31:0
|
||||
#define NVC8B5_SET_SRC_HEIGHT (0x00000730)
|
||||
#define NVC8B5_SET_SRC_HEIGHT_V 31:0
|
||||
#define NVC8B5_SET_SRC_DEPTH (0x00000734)
|
||||
#define NVC8B5_SET_SRC_DEPTH_V 31:0
|
||||
#define NVC8B5_SET_SRC_LAYER (0x00000738)
|
||||
#define NVC8B5_SET_SRC_LAYER_V 31:0
|
||||
#define NVC8B5_SET_SRC_ORIGIN (0x0000073C)
|
||||
#define NVC8B5_SET_SRC_ORIGIN_X 15:0
|
||||
#define NVC8B5_SET_SRC_ORIGIN_Y 31:16
|
||||
#define NVC8B5_SRC_ORIGIN_X (0x00000744)
|
||||
#define NVC8B5_SRC_ORIGIN_X_VALUE 31:0
|
||||
#define NVC8B5_SRC_ORIGIN_Y (0x00000748)
|
||||
#define NVC8B5_SRC_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC8B5_DST_ORIGIN_X (0x0000074C)
|
||||
#define NVC8B5_DST_ORIGIN_X_VALUE 31:0
|
||||
#define NVC8B5_DST_ORIGIN_Y (0x00000750)
|
||||
#define NVC8B5_DST_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC8B5_PM_TRIGGER_END (0x00001114)
|
||||
#define NVC8B5_PM_TRIGGER_END_V 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
#endif // _clc8b5_h
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2021 NVIDIA Corporation
|
||||
Copyright (c) 2013-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -32,15 +32,8 @@
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100 (0x00000140)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100 (0x00000160)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100 (0x00000170)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100 (0x00000180)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100 (0x00000190)
|
||||
|
||||
/* valid ARCHITECTURE_GP10x implementation values */
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GP100 (0x00000000)
|
||||
|
||||
508
kernel-open/nvidia-uvm/hwref/hopper/gh100/dev_fault.h
Normal file
508
kernel-open/nvidia-uvm/hwref/hopper/gh100/dev_fault.h
Normal file
@@ -0,0 +1,508 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2003-2016 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#ifndef __gh100_dev_fault_h__
|
||||
#define __gh100_dev_fault_h__
|
||||
/* This file is autogenerated. Do not edit */
|
||||
#define NV_PFAULT /* ----G */
|
||||
#define NV_PFAULT_MMU_ENG_ID_GRAPHICS 384 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_DISPLAY 1 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_GSP 2 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_IFB 55 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_FLA 4 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1 256 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2 320 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_SEC 6 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_FSP 7 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF 10 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF0 10 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF1 11 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF2 12 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF3 13 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF4 14 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF5 15 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF6 16 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF7 17 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF8 18 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC 19 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC0 19 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC1 20 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC2 21 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC3 22 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC4 23 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC5 24 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC6 25 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC7 26 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG0 27 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG1 28 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG2 29 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG3 30 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG4 31 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG5 32 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG6 33 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG7 34 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_GRCOPY 43 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE0 43 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE1 44 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE2 45 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE3 46 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE4 47 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE5 48 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE6 49 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE7 50 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE8 51 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE9 52 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PWR_PMU 5 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PTP 3 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVENC0 35 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVENC1 36 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVENC2 37 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_OFA0 53 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PHYSICAL 56 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST0 64 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST1 65 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST2 66 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST3 67 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST4 68 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST5 69 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST6 70 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST7 71 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST8 72 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST9 73 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST10 74 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST11 75 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST12 76 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST13 77 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST14 78 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST15 79 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST16 80 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST17 81 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST18 82 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST19 83 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST20 84 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST21 85 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST22 86 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST23 87 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST24 88 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST25 89 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST26 90 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST27 91 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST28 92 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST29 93 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST30 94 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST31 95 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST32 96 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST33 97 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST34 98 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST35 99 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST36 100 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST37 101 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST38 102 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST39 103 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST40 104 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST41 105 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST42 106 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST43 107 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST44 108 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN0 256 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN1 257 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN2 258 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN3 259 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN4 260 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN5 261 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN6 262 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN7 263 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN8 264 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN9 265 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN10 266 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN11 267 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN12 268 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN13 269 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN14 270 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN15 271 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN16 272 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN17 273 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN18 274 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN19 275 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN20 276 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN21 277 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN22 278 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN23 279 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN24 280 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN25 281 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN26 282 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN27 283 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN28 284 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN29 285 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN30 286 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN31 287 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN32 288 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN33 289 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN34 290 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN35 291 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN36 292 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN37 293 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN38 294 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN39 295 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN40 296 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN41 297 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN42 298 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN43 299 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN44 300 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN45 301 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN46 302 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN47 303 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN48 304 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN49 305 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN50 306 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN51 307 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN52 308 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN53 309 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN54 310 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN55 311 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN56 312 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN57 313 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN58 314 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN59 315 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN60 316 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN61 317 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN62 318 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN63 319 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN0 320 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN1 321 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN2 322 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN3 323 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN4 324 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN5 325 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN6 326 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN7 327 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN8 328 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN9 329 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN10 330 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN11 331 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN12 332 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN13 333 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN14 334 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN15 335 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN16 336 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN17 337 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN18 338 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN19 339 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN20 340 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN21 341 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN22 342 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN23 343 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN24 344 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN25 345 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN26 346 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN27 347 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN28 348 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN29 349 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN30 350 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN31 351 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN32 352 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN33 353 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN34 354 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN35 355 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN36 356 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN37 357 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN38 358 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN39 359 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN40 360 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN41 361 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN42 362 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN43 363 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN44 364 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN45 365 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN46 366 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN47 367 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN48 368 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN49 369 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN50 370 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN51 371 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN52 372 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN53 373 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN54 374 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN55 375 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN56 376 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN57 377 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN58 378 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN59 379 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN60 380 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN61 381 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN62 382 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN63 383 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE 4:0 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_PDE 0x00000000 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_PDE_SIZE 0x00000001 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_PTE 0x00000002 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION 0x00000003 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK 0x00000004 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION 0x00000005 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_RO_VIOLATION 0x00000006 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_WO_VIOLATION 0x00000007 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION 0x00000008 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_WORK_CREATION 0x00000009 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE 0x0000000a /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_COMPRESSION_FAILURE 0x0000000b /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND 0x0000000c /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_REGION_VIOLATION 0x0000000d /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_POISONED 0x0000000e /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION 0x0000000f /* */
|
||||
#define NV_PFAULT_CLIENT 14:8 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_0 0x00000000 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_1 0x00000001 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_2 0x00000002 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_3 0x00000003 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_4 0x00000004 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_5 0x00000005 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_6 0x00000006 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_7 0x00000007 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_0 0x00000008 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_1 0x00000009 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_2 0x0000000A /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_3 0x0000000B /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_4 0x0000000C /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_5 0x0000000D /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_6 0x0000000E /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_7 0x0000000F /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_RAST 0x00000010 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_GCC 0x00000011 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_GPCCS 0x00000012 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PROP_0 0x00000013 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PROP_1 0x00000014 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PROP_2 0x00000015 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PROP_3 0x00000016 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_8 0x00000021 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_9 0x00000022 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_10 0x00000023 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_11 0x00000024 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_12 0x00000025 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_13 0x00000026 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_14 0x00000027 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_15 0x00000028 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_0 0x00000029 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_1 0x0000002A /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_2 0x0000002B /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_3 0x0000002C /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_4 0x0000002D /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_5 0x0000002E /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_6 0x0000002F /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_7 0x00000030 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_8 0x00000031 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_9 0x00000032 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_8 0x00000033 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_9 0x00000034 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_16 0x00000035 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_17 0x00000036 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_18 0x00000037 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_19 0x00000038 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_10 0x00000039 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_11 0x0000003A /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_10 0x0000003B /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_11 0x0000003C /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_20 0x0000003D /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_21 0x0000003E /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_22 0x0000003F /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_23 0x00000040 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_12 0x00000041 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_13 0x00000042 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_12 0x00000043 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_13 0x00000044 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_24 0x00000045 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_25 0x00000046 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_26 0x00000047 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_27 0x00000048 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_14 0x00000049 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_15 0x0000004A /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_14 0x0000004B /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_15 0x0000004C /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_28 0x0000004D /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_29 0x0000004E /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_30 0x0000004F /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_31 0x00000050 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_16 0x00000051 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_17 0x00000052 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_16 0x00000053 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_17 0x00000054 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_32 0x00000055 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_33 0x00000056 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_34 0x00000057 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_35 0x00000058 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_18 0x00000059 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_19 0x0000005A /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_18 0x0000005B /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_19 0x0000005C /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_36 0x0000005D /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_37 0x0000005E /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_38 0x0000005F /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_39 0x00000060 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_ROP_0 0x00000070 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_ROP_1 0x00000071 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_ROP_2 0x00000072 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_ROP_3 0x00000073 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_GPM 0x00000017 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_VIP 0x00000000 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE0 0x00000001 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE1 0x00000002 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_DNISO 0x00000003 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_DISPNISO 0x00000003 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE0 0x00000004 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE 0x00000004 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS0 0x00000005 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS 0x00000005 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HOST 0x00000006 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HOST_CPU 0x00000007 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HOST_CPU_NB 0x00000008 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ISO 0x00000009 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_MMU 0x0000000A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC0 0x0000000B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC 0x0000000B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE3 0x0000000C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVENC1 0x0000000D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NISO 0x0000000E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ACTRS 0x0000000E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_P2P 0x0000000F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PD 0x00000010 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PERF0 0x00000011 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PERF 0x00000011 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PMU 0x00000012 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD 0x00000013 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC 0x00000014 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC_NB 0x00000015 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SEC 0x00000016 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SSYNC 0x00000017 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_GRCOPY 0x00000018 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE2 0x00000018 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_XV 0x00000019 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_MMU_NB 0x0000001A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVENC0 0x0000001B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVENC 0x0000001B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_DFALCON 0x0000001C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED0 0x0000001D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED 0x0000001D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_AFALCON 0x0000001E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_DONT_CARE 0x0000001F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE0 0x00000020 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE1 0x00000021 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE2 0x00000022 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE3 0x00000023 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE4 0x00000024 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE5 0x00000025 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE6 0x00000026 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE7 0x00000027 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE8 0x00000028 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE9 0x00000029 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSHUB 0x0000002A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X0 0x0000002B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X1 0x0000002C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X2 0x0000002D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X3 0x0000002E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X4 0x0000002F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X5 0x00000030 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X6 0x00000031 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X7 0x00000032 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVENC2 0x00000033 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER0 0x00000034 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER1 0x00000035 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_DWBIF 0x00000036 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FBFALCON 0x00000037 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE_SHIM 0x00000038 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_GSP 0x00000039 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC1 0x0000003A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC2 0x0000003B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG0 0x0000003C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC3 0x0000003D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC4 0x0000003E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_OFA0 0x0000003F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE10 0x00000040 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE11 0x00000041 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE12 0x00000042 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE13 0x00000043 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE14 0x00000044 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE15 0x00000045 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X8 0x00000046 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X9 0x00000047 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X10 0x00000048 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X11 0x00000049 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X12 0x0000004A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X13 0x0000004B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X14 0x0000004C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X15 0x0000004D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE1 0x0000004E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE2 0x0000004F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE3 0x00000050 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE4 0x00000051 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE5 0x00000052 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE6 0x00000053 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE7 0x00000054 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS1 0x00000055 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS2 0x00000056 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS3 0x00000057 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS4 0x00000058 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS5 0x00000059 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS6 0x0000005A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS7 0x0000005B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED1 0x0000005C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED2 0x0000005D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED3 0x0000005E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED4 0x0000005F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED5 0x00000060 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED6 0x00000061 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED7 0x00000062 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ESC 0x00000063 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC5 0x0000006F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC6 0x00000070 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC7 0x00000071 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG1 0x00000072 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG2 0x00000073 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG3 0x00000074 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG4 0x00000075 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG5 0x00000076 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG6 0x00000077 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG7 0x00000078 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FSP 0x00000079 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE 19:16 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_READ 0x00000000 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_WRITE 0x00000001 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_ATOMIC 0x00000002 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_PREFETCH 0x00000003 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_READ 0x00000000 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_WRITE 0x00000001 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC 0x00000002 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_STRONG 0x00000002 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_PREFETCH 0x00000003 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_WEAK 0x00000004 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_PHYS_READ 0x00000008 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_PHYS_WRITE 0x00000009 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_PHYS_ATOMIC 0x0000000a /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_PHYS_PREFETCH 0x0000000b /* */
|
||||
#define NV_PFAULT_MMU_CLIENT_TYPE 20:20 /* */
|
||||
#define NV_PFAULT_MMU_CLIENT_TYPE_GPC 0x00000000 /* */
|
||||
#define NV_PFAULT_MMU_CLIENT_TYPE_HUB 0x00000001 /* */
|
||||
#define NV_PFAULT_GPC_ID 28:24 /* */
|
||||
#define NV_PFAULT_PROTECTED_MODE 29:29 /* */
|
||||
#define NV_PFAULT_REPLAYABLE_FAULT_EN 30:30 /* */
|
||||
#define NV_PFAULT_VALID 31:31 /* */
|
||||
#endif // __gh100_dev_fault_h__
|
||||
560
kernel-open/nvidia-uvm/hwref/hopper/gh100/dev_mmu.h
Normal file
560
kernel-open/nvidia-uvm/hwref/hopper/gh100/dev_mmu.h
Normal file
@@ -0,0 +1,560 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2003-2016 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#ifndef __gh100_dev_mmu_h__
|
||||
#define __gh100_dev_mmu_h__
|
||||
/* This file is autogenerated. Do not edit */
|
||||
#define NV_MMU_PDE /* ----G */
|
||||
#define NV_MMU_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_PDE_SIZE_FULL 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE_HALF 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
|
||||
#define NV_MMU_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_PDE__SIZE 8
|
||||
#define NV_MMU_PTE /* ----G */
|
||||
#define NV_MMU_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
|
||||
#define NV_MMU_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
|
||||
#define NV_MMU_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
|
||||
#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PTE_LOCK (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_PTE_LOCK_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_LOCK_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
|
||||
#define NV_MMU_PTE_READ_DISABLE (1*32+30):(1*32+30) /* RWXVF */
|
||||
#define NV_MMU_PTE_READ_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_READ_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_WRITE_DISABLE (1*32+31):(1*32+31) /* RWXVF */
|
||||
#define NV_MMU_PTE_WRITE_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_WRITE_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_PTE__SIZE 8
|
||||
#define NV_MMU_PTE_COMPTAGS_NONE 0x0 /* */
|
||||
#define NV_MMU_PTE_COMPTAGS_1 0x1 /* */
|
||||
#define NV_MMU_PTE_COMPTAGS_2 0x2 /* */
|
||||
#define NV_MMU_PTE_KIND (1*32+7):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_PTE_KIND_INVALID 0x07 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_PITCH 0x00 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_GENERIC_MEMORY 0x6 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z16 0x1 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8 0x2 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8Z24 0x3 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_ZF32_X24S8 0x4 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z24S8 0x5 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE 0x8 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE_DISABLE_PLC 0x9 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8_COMPRESSIBLE_DISABLE_PLC 0xA /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC 0xB /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC 0xC /* R---V */
|
||||
#define NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC 0xD /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC 0xE /* R---V */
|
||||
#define NV_MMU_PTE_KIND_SMSKED_MESSAGE 0xF /* R---V */
|
||||
#define NV_MMU_VER1_PDE /* ----G */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_SIZE_FULL 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE_HALF 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER1_PDE__SIZE 8
|
||||
#define NV_MMU_VER1_PTE /* ----G */
|
||||
#define NV_MMU_VER1_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_VER1_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_KIND (1*32+11):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER1_PTE__SIZE 8
|
||||
#define NV_MMU_VER1_PTE_COMPTAGS_NONE 0x0 /* */
|
||||
#define NV_MMU_VER1_PTE_COMPTAGS_1 0x1 /* */
|
||||
#define NV_MMU_VER1_PTE_COMPTAGS_2 0x2 /* */
|
||||
#define NV_MMU_NEW_PDE /* ----G */
|
||||
#define NV_MMU_NEW_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_NEW_PDE__SIZE 8
|
||||
#define NV_MMU_NEW_DUAL_PDE /* ----G */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
|
||||
#define NV_MMU_NEW_DUAL_PDE__SIZE 16
|
||||
#define NV_MMU_NEW_PTE /* ----G */
|
||||
#define NV_MMU_NEW_PTE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ENCRYPTED 4:4 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_NEW_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_NEW_PTE_PRIVILEGE 5:5 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_READ_ONLY 6:6 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_KIND 63:56 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_NEW_PTE__SIZE 8
|
||||
#define NV_MMU_VER2_PDE /* ----G */
|
||||
#define NV_MMU_VER2_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER2_PDE__SIZE 8
|
||||
#define NV_MMU_VER2_DUAL_PDE /* ----G */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
|
||||
#define NV_MMU_VER2_DUAL_PDE__SIZE 16
|
||||
#define NV_MMU_VER2_PTE /* ----G */
|
||||
#define NV_MMU_VER2_PTE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ENCRYPTED 4:4 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_VER2_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_VER2_PTE_PRIVILEGE 5:5 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_READ_ONLY 6:6 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_KIND 63:56 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER2_PTE__SIZE 8
|
||||
#define NV_MMU_VER3_PDE /* ----G */
|
||||
#define NV_MMU_VER3_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF 5:3 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_ADDRESS 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER3_PDE__SIZE 8
|
||||
#define NV_MMU_VER3_DUAL_PDE /* ----G */
|
||||
#define NV_MMU_VER3_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG 5:3 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG 51:8 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL 69:67 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SMALL 115:76 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
|
||||
#define NV_MMU_VER3_DUAL_PDE__SIZE 16
|
||||
#define NV_MMU_VER3_PTE /* ----G */
|
||||
#define NV_MMU_VER3_PTE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF 7:3 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_PCF_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_SPARSE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_MAPPING_NOWHERE 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_NO_VALID_4KB_PAGE 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACE 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACE 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACE 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACE 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACE 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACE 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACE 0x00000008 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACE 0x00000009 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACE 0x0000000A /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACE 0x0000000B /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACE 0x0000000C /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACE 0x0000000D /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACE 0x0000000E /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACE 0x0000000F /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACD 0x00000010 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACD 0x00000011 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACD 0x00000012 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACD 0x00000013 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACD 0x00000014 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACD 0x00000015 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACD 0x00000016 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACD 0x00000017 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACD 0x00000018 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACD 0x00000019 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACD 0x0000001A /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACD 0x0000001B /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACD 0x0000001C /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACD 0x0000001D /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACD 0x0000001E /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD 0x0000001F /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_KIND 11:8 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_SYS 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_PEER 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_VID 39:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID 63:(64-3) /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER3_PTE__SIZE 8
|
||||
#define NV_MMU_CLIENT /* ----G */
|
||||
#define NV_MMU_CLIENT_KIND 2:0 /* RWXVF */
|
||||
#define NV_MMU_CLIENT_KIND_Z16 0x1 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_S8 0x2 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_S8Z24 0x3 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_ZF32_X24S8 0x4 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_Z24S8 0x5 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_GENERIC_MEMORY 0x6 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_INVALID 0x7 /* R---V */
|
||||
#endif // __gh100_dev_mmu_h__
|
||||
@@ -481,16 +481,6 @@ static int _check_cpu_affinity_test(void)
|
||||
int result, node;
|
||||
nv_kthread_q_t local_q;
|
||||
|
||||
// If the API does not support CPU affinity, check whether the correct
|
||||
// error code is returned.
|
||||
// Non-affinitized queue allocation has been verified by previous test
|
||||
// so just ensure that the affinitized version also works.
|
||||
if (!NV_KTHREAD_Q_SUPPORTS_AFFINITY()) {
|
||||
result = nv_kthread_q_init_on_node(&local_q, "should_fail", 0);
|
||||
TEST_CHECK_RET(result == -ENOTSUPP);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for_each_online_node(node) {
|
||||
unsigned i;
|
||||
const unsigned max_i = 100;
|
||||
|
||||
@@ -169,7 +169,6 @@ void nv_kthread_q_stop(nv_kthread_q_t *q)
|
||||
//
|
||||
// This function is never invoked when there is no NUMA preference (preferred
|
||||
// node is NUMA_NO_NODE).
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
nv_kthread_q_t *q,
|
||||
int preferred_node,
|
||||
@@ -217,7 +216,6 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
return thread[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
|
||||
{
|
||||
@@ -231,11 +229,7 @@ int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferr
|
||||
q->q_kthread = kthread_create(_main_loop, q, q_name);
|
||||
}
|
||||
else {
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
|
||||
#else
|
||||
return -ENOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (IS_ERR(q->q_kthread)) {
|
||||
|
||||
@@ -52,6 +52,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
|
||||
@@ -66,6 +67,12 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_policy.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_utils.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_kvmalloc.c
|
||||
@@ -88,7 +95,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_rng.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_allocator_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_semaphore_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hmm_sanity_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_mem_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rm_mem_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_page_tree_test.c
|
||||
|
||||
@@ -36,7 +36,7 @@ NVIDIA_UVM_KO = nvidia-uvm/nvidia-uvm.ko
|
||||
#
|
||||
|
||||
ifeq ($(UVM_BUILD_TYPE),debug)
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG $(call cc-option,-Og,-O0) -g
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG -O1 -g
|
||||
else
|
||||
ifeq ($(UVM_BUILD_TYPE),develop)
|
||||
# -DDEBUG is required, in order to allow pr_devel() print statements to
|
||||
@@ -67,17 +67,11 @@ endif
|
||||
|
||||
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += address_space_init_once
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vzalloc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += wait_on_bit_lock_argument_count
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += bitmap_clear
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += usleep_range
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
@@ -87,18 +81,19 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_set
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += kuid_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += address_space
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages_remote
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages_remote
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
@@ -107,6 +102,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += make_device_exclusive_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_device_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -35,93 +35,95 @@
|
||||
#include "uvm_linux_ioctl.h"
|
||||
#include "uvm_hmm.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
|
||||
#define NVIDIA_UVM_DEVICE_NAME "nvidia-uvm"
|
||||
|
||||
static dev_t g_uvm_base_dev;
|
||||
static struct cdev g_uvm_cdev;
|
||||
static const struct file_operations uvm_fops;
|
||||
|
||||
// List of fault service contexts for CPU faults
|
||||
static LIST_HEAD(g_cpu_service_block_context_list);
|
||||
|
||||
static uvm_spinlock_t g_cpu_service_block_context_list_lock;
|
||||
|
||||
NV_STATUS uvm_service_block_context_init(void)
|
||||
bool uvm_file_is_nvidia_uvm(struct file *filp)
|
||||
{
|
||||
unsigned num_preallocated_contexts = 4;
|
||||
return (filp != NULL) && (filp->f_op == &uvm_fops);
|
||||
}
|
||||
|
||||
uvm_spin_lock_init(&g_cpu_service_block_context_list_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
|
||||
{
|
||||
unsigned long uptr;
|
||||
uvm_fd_type_t type;
|
||||
void *ptr;
|
||||
|
||||
// Pre-allocate some fault service contexts for the CPU and add them to the global list
|
||||
while (num_preallocated_contexts-- > 0) {
|
||||
uvm_service_block_context_t *service_context = uvm_kvmalloc(sizeof(*service_context));
|
||||
if (!service_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
|
||||
|
||||
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
|
||||
uptr = atomic_long_read_acquire((atomic_long_t *) (&filp->private_data));
|
||||
type = (uvm_fd_type_t)(uptr & UVM_FD_TYPE_MASK);
|
||||
ptr = (void *)(uptr & ~UVM_FD_TYPE_MASK);
|
||||
BUILD_BUG_ON(UVM_FD_COUNT > UVM_FD_TYPE_MASK + 1);
|
||||
|
||||
switch (type) {
|
||||
case UVM_FD_UNINITIALIZED:
|
||||
case UVM_FD_INITIALIZING:
|
||||
UVM_ASSERT(!ptr);
|
||||
break;
|
||||
|
||||
case UVM_FD_VA_SPACE:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
default:
|
||||
UVM_ASSERT(0);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_service_block_context_exit(void)
|
||||
{
|
||||
uvm_service_block_context_t *service_context, *service_context_tmp;
|
||||
|
||||
// Free fault service contexts for the CPU and add clear the global list
|
||||
list_for_each_entry_safe(service_context, service_context_tmp, &g_cpu_service_block_context_list,
|
||||
cpu_fault.service_context_list) {
|
||||
uvm_kvfree(service_context);
|
||||
}
|
||||
INIT_LIST_HEAD(&g_cpu_service_block_context_list);
|
||||
}
|
||||
|
||||
// Get a fault service context from the global list or allocate a new one if there are no
|
||||
// available entries
|
||||
static uvm_service_block_context_t *uvm_service_block_context_cpu_alloc(void)
|
||||
{
|
||||
uvm_service_block_context_t *service_context;
|
||||
|
||||
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
service_context = list_first_entry_or_null(&g_cpu_service_block_context_list, uvm_service_block_context_t,
|
||||
cpu_fault.service_context_list);
|
||||
|
||||
if (service_context)
|
||||
list_del(&service_context->cpu_fault.service_context_list);
|
||||
|
||||
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
if (!service_context)
|
||||
service_context = uvm_kvmalloc(sizeof(*service_context));
|
||||
|
||||
return service_context;
|
||||
}
|
||||
|
||||
// Put a fault service context in the global list
|
||||
static void uvm_service_block_context_cpu_free(uvm_service_block_context_t *service_context)
|
||||
{
|
||||
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
|
||||
|
||||
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
|
||||
if (ptr_val)
|
||||
*ptr_val = ptr;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
// Called when opening /dev/nvidia-uvm. This code doesn't take any UVM locks, so
|
||||
// there's no need to acquire g_uvm_global.pm.lock, but if that changes the PM
|
||||
// lock will need to be taken.
|
||||
static int uvm_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct address_space *mapping;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
|
||||
if (status == NV_OK) {
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
if (status != NV_OK)
|
||||
return -nv_status_to_errno(status);
|
||||
|
||||
status = uvm_va_space_create(inode, filp);
|
||||
mapping = uvm_kvmalloc(sizeof(*mapping));
|
||||
if (!mapping)
|
||||
return -ENOMEM;
|
||||
|
||||
uvm_up_read(&g_uvm_global.pm.lock);
|
||||
}
|
||||
// By default all struct files on the same inode share the same
|
||||
// address_space structure (the inode's) across all processes. This means
|
||||
// unmap_mapping_range would unmap virtual mappings across all processes on
|
||||
// that inode.
|
||||
//
|
||||
// Since the UVM driver uses the mapping offset as the VA of the file's
|
||||
// process, we need to isolate the mappings to each process.
|
||||
address_space_init_once(mapping);
|
||||
mapping->host = inode;
|
||||
|
||||
return -nv_status_to_errno(status);
|
||||
// Some paths in the kernel, for example force_page_cache_readahead which
|
||||
// can be invoked from user-space via madvise MADV_WILLNEED and fadvise
|
||||
// POSIX_FADV_WILLNEED, check the function pointers within
|
||||
// file->f_mapping->a_ops for validity. However, those paths assume that a_ops
|
||||
// itself is always valid. Handle that by using the inode's a_ops pointer,
|
||||
// which is what f_mapping->a_ops would point to anyway if we weren't re-
|
||||
// assigning f_mapping.
|
||||
mapping->a_ops = inode->i_mapping->a_ops;
|
||||
|
||||
#if defined(NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO)
|
||||
mapping->backing_dev_info = inode->i_mapping->backing_dev_info;
|
||||
#endif
|
||||
|
||||
filp->private_data = NULL;
|
||||
filp->f_mapping = mapping;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static int uvm_open_entry(struct inode *inode, struct file *filp)
|
||||
@@ -147,9 +149,18 @@ static void uvm_release_deferred(void *data)
|
||||
|
||||
static int uvm_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_fd_type_t fd_type;
|
||||
int ret;
|
||||
|
||||
fd_type = uvm_fd_type(filp, (void **)&va_space);
|
||||
UVM_ASSERT(fd_type != UVM_FD_INITIALIZING);
|
||||
if (fd_type == UVM_FD_UNINITIALIZED) {
|
||||
uvm_kvfree(filp->f_mapping);
|
||||
return 0;
|
||||
}
|
||||
|
||||
UVM_ASSERT(fd_type == UVM_FD_VA_SPACE);
|
||||
filp->private_data = NULL;
|
||||
filp->f_mapping = NULL;
|
||||
|
||||
@@ -167,7 +178,7 @@ static int uvm_release(struct inode *inode, struct file *filp)
|
||||
// been destroyed, and va_space->mapping won't be used again. Still,
|
||||
// the va_space survives the inode if its destruction is deferred, in
|
||||
// which case the references are rendered stale.
|
||||
address_space_init_once(&va_space->mapping);
|
||||
address_space_init_once(va_space->mapping);
|
||||
|
||||
nv_kthread_q_item_init(&va_space->deferred_release_q_item, uvm_release_deferred, va_space);
|
||||
ret = nv_kthread_q_schedule_q_item(&g_uvm_global.deferred_release_q, &va_space->deferred_release_q_item);
|
||||
@@ -430,14 +441,12 @@ static void uvm_vm_open_managed_entry(struct vm_area_struct *vma)
|
||||
static void uvm_vm_close_managed(struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_processor_id_t gpu_id;
|
||||
bool make_zombie = false;
|
||||
|
||||
if (current->mm != NULL)
|
||||
uvm_record_lock_mmap_lock_write(current->mm);
|
||||
|
||||
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
|
||||
|
||||
// current->mm will be NULL on process teardown, in which case we have
|
||||
// special handling.
|
||||
if (current->mm == NULL) {
|
||||
@@ -467,13 +476,11 @@ static void uvm_vm_close_managed(struct vm_area_struct *vma)
|
||||
|
||||
uvm_destroy_vma_managed(vma, make_zombie);
|
||||
|
||||
// Notify GPU address spaces that the fault buffer needs to be flushed to avoid finding stale entries
|
||||
// that can be attributed to new VA ranges reallocated at the same address
|
||||
for_each_va_space_gpu_in_mask(gpu, va_space, &va_space->registered_gpu_va_spaces) {
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
|
||||
UVM_ASSERT(gpu_va_space);
|
||||
|
||||
gpu_va_space->needs_fault_buffer_flush = true;
|
||||
// Notify GPU address spaces that the fault buffer needs to be flushed to
|
||||
// avoid finding stale entries that can be attributed to new VA ranges
|
||||
// reallocated at the same address.
|
||||
for_each_gpu_id_in_mask(gpu_id, &va_space->registered_gpu_va_spaces) {
|
||||
uvm_processor_mask_set_atomic(&va_space->needs_fault_buffer_flush, gpu_id);
|
||||
}
|
||||
uvm_va_space_up_write(va_space);
|
||||
|
||||
@@ -489,139 +496,10 @@ static void uvm_vm_close_managed_entry(struct vm_area_struct *vma)
|
||||
static vm_fault_t uvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
|
||||
uvm_va_block_t *va_block;
|
||||
NvU64 fault_addr = nv_page_fault_va(vmf);
|
||||
bool is_write = vmf->flags & FAULT_FLAG_WRITE;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
bool tools_enabled;
|
||||
bool major_fault = false;
|
||||
uvm_service_block_context_t *service_context;
|
||||
uvm_global_processor_mask_t gpus_to_check_for_ecc;
|
||||
|
||||
if (status != NV_OK)
|
||||
goto convert_error;
|
||||
|
||||
// TODO: Bug 2583279: Lock tracking is disabled for the power management
|
||||
// lock in order to suppress reporting of a lock policy violation.
|
||||
// The violation consists in acquiring the power management lock multiple
|
||||
// times, and it is manifested as an error during release. The
|
||||
// re-acquisition of the power management locks happens upon re-entry in the
|
||||
// UVM module, and it is benign on itself, but when combined with certain
|
||||
// power management scenarios, it is indicative of a potential deadlock.
|
||||
// Tracking will be re-enabled once the power management locking strategy is
|
||||
// modified to avoid deadlocks.
|
||||
if (!uvm_down_read_trylock_no_tracking(&g_uvm_global.pm.lock)) {
|
||||
status = NV_ERR_BUSY_RETRY;
|
||||
goto convert_error;
|
||||
}
|
||||
|
||||
service_context = uvm_service_block_context_cpu_alloc();
|
||||
if (!service_context) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
service_context->cpu_fault.wakeup_time_stamp = 0;
|
||||
|
||||
// The mmap_lock might be held in write mode, but the mode doesn't matter
|
||||
// for the purpose of lock ordering and we don't rely on it being in write
|
||||
// anywhere so just record it as read mode in all cases.
|
||||
uvm_record_lock_mmap_lock_read(vma->vm_mm);
|
||||
|
||||
do {
|
||||
bool do_sleep = false;
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
|
||||
NvU64 now = NV_GETTIME();
|
||||
if (now < service_context->cpu_fault.wakeup_time_stamp)
|
||||
do_sleep = true;
|
||||
|
||||
if (do_sleep)
|
||||
uvm_tools_record_throttling_start(va_space, fault_addr, UVM_ID_CPU);
|
||||
|
||||
// Drop the VA space lock while we sleep
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
// usleep_range is preferred because msleep has a 20ms granularity
|
||||
// and udelay uses a busy-wait loop. usleep_range uses high-resolution
|
||||
// timers and, by adding a range, the Linux scheduler may coalesce
|
||||
// our wakeup with others, thus saving some interrupts.
|
||||
if (do_sleep) {
|
||||
unsigned long nap_us = (service_context->cpu_fault.wakeup_time_stamp - now) / 1000;
|
||||
|
||||
usleep_range(nap_us, nap_us + nap_us / 2);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
if (do_sleep)
|
||||
uvm_tools_record_throttling_end(va_space, fault_addr, UVM_ID_CPU);
|
||||
|
||||
status = uvm_va_block_find_create_managed(va_space, fault_addr, &va_block);
|
||||
if (status != NV_OK) {
|
||||
UVM_ASSERT_MSG(status == NV_ERR_NO_MEMORY, "status: %s\n", nvstatusToString(status));
|
||||
break;
|
||||
}
|
||||
|
||||
// Watch out, current->mm might not be vma->vm_mm
|
||||
UVM_ASSERT(vma == uvm_va_range_vma(va_block->va_range));
|
||||
|
||||
// Loop until thrashing goes away.
|
||||
status = uvm_va_block_cpu_fault(va_block, fault_addr, is_write, service_context);
|
||||
} while (status == NV_WARN_MORE_PROCESSING_REQUIRED);
|
||||
|
||||
if (status != NV_OK) {
|
||||
UvmEventFatalReason reason;
|
||||
|
||||
reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||||
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
|
||||
|
||||
uvm_tools_record_cpu_fatal_fault(va_space, fault_addr, is_write, reason);
|
||||
}
|
||||
|
||||
tools_enabled = va_space->tools.enabled;
|
||||
|
||||
if (status == NV_OK) {
|
||||
uvm_va_space_global_gpus_in_mask(va_space,
|
||||
&gpus_to_check_for_ecc,
|
||||
&service_context->cpu_fault.gpus_to_check_for_ecc);
|
||||
uvm_global_mask_retain(&gpus_to_check_for_ecc);
|
||||
}
|
||||
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_record_unlock_mmap_lock_read(vma->vm_mm);
|
||||
|
||||
if (status == NV_OK) {
|
||||
status = uvm_global_mask_check_ecc_error(&gpus_to_check_for_ecc);
|
||||
uvm_global_mask_release(&gpus_to_check_for_ecc);
|
||||
}
|
||||
|
||||
if (tools_enabled)
|
||||
uvm_tools_flush_events();
|
||||
|
||||
// Major faults involve I/O in order to resolve the fault.
|
||||
// If any pages were DMA'ed between the GPU and host memory, that makes it a major fault.
|
||||
// A process can also get statistics for major and minor faults by calling readproc().
|
||||
major_fault = service_context->cpu_fault.did_migrate;
|
||||
uvm_service_block_context_cpu_free(service_context);
|
||||
|
||||
unlock:
|
||||
// TODO: Bug 2583279: See the comment above the matching lock acquisition
|
||||
uvm_up_read_no_tracking(&g_uvm_global.pm.lock);
|
||||
|
||||
convert_error:
|
||||
switch (status) {
|
||||
case NV_OK:
|
||||
case NV_ERR_BUSY_RETRY:
|
||||
return VM_FAULT_NOPAGE | (major_fault ? VM_FAULT_MAJOR : 0);
|
||||
case NV_ERR_NO_MEMORY:
|
||||
return VM_FAULT_OOM;
|
||||
default:
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
return uvm_va_space_cpu_fault_managed(va_space, vma, vmf);
|
||||
}
|
||||
|
||||
|
||||
static vm_fault_t uvm_vm_fault_entry(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
UVM_ENTRY_RET(uvm_vm_fault(vma, vmf));
|
||||
@@ -752,7 +630,7 @@ static struct vm_operations_struct uvm_vm_ops_semaphore_pool =
|
||||
|
||||
static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_va_range_t *va_range;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
int ret = 0;
|
||||
@@ -761,8 +639,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
if (status != NV_OK)
|
||||
return -nv_status_to_errno(status);
|
||||
|
||||
status = uvm_va_space_initialized(va_space);
|
||||
if (status != NV_OK)
|
||||
va_space = uvm_fd_va_space(filp);
|
||||
if (!va_space)
|
||||
return -EBADFD;
|
||||
|
||||
// When the VA space is associated with an mm, all vmas under the VA space
|
||||
@@ -814,7 +692,11 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
// Using VM_DONTCOPY would be nice, but madvise(MADV_DOFORK) can reset that
|
||||
// so we have to handle vm_open on fork anyway. We could disable MADV_DOFORK
|
||||
// with VM_IO, but that causes other mapping issues.
|
||||
vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
|
||||
// Make the default behavior be VM_DONTCOPY to avoid the performance impact
|
||||
// of removing CPU mappings in the parent on fork()+exec(). Users can call
|
||||
// madvise(MDV_DOFORK) if the child process requires access to the
|
||||
// allocation.
|
||||
nv_vm_flags_set(vma, VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTCOPY);
|
||||
|
||||
vma->vm_ops = &uvm_vm_ops_managed;
|
||||
|
||||
@@ -881,7 +763,53 @@ static int uvm_mmap_entry(struct file *filp, struct vm_area_struct *vma)
|
||||
|
||||
static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *filp)
|
||||
{
|
||||
return uvm_va_space_initialize(uvm_va_space_get(filp), params->flags);
|
||||
uvm_va_space_t *va_space;
|
||||
NV_STATUS status;
|
||||
uvm_fd_type_t old_fd_type;
|
||||
|
||||
// Normally we expect private_data == UVM_FD_UNINITIALIZED. However multiple
|
||||
// threads may call this ioctl concurrently so we have to be careful to
|
||||
// avoid initializing multiple va_spaces and/or leaking memory. To do this
|
||||
// we do an atomic compare and swap. Only one thread will observe
|
||||
// UVM_FD_UNINITIALIZED and that thread will allocate and setup the
|
||||
// va_space.
|
||||
//
|
||||
// Other threads will either see UVM_FD_INITIALIZING or UVM_FD_VA_SPACE. In
|
||||
// the case of UVM_FD_VA_SPACE we return success if and only if the
|
||||
// initialization flags match. If another thread is still initializing the
|
||||
// va_space we return NV_ERR_BUSY_RETRY.
|
||||
//
|
||||
// If va_space initialization fails we return the failure code and reset the
|
||||
// FD state back to UVM_FD_UNINITIALIZED to allow another initialization
|
||||
// attempt to be made. This is safe because other threads will have only had
|
||||
// a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
|
||||
// case.
|
||||
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type == UVM_FD_UNINITIALIZED) {
|
||||
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
|
||||
if (status != NV_OK) {
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
|
||||
return status;
|
||||
}
|
||||
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)va_space | UVM_FD_VA_SPACE);
|
||||
}
|
||||
else if (old_fd_type == UVM_FD_VA_SPACE) {
|
||||
va_space = uvm_va_space_get(filp);
|
||||
|
||||
if (params->flags != va_space->initialization_flags)
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
else
|
||||
status = NV_OK;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(old_fd_type == UVM_FD_INITIALIZING);
|
||||
status = NV_ERR_BUSY_RETRY;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_api_pageable_mem_access(UVM_PAGEABLE_MEM_ACCESS_PARAMS *params, struct file *filp)
|
||||
@@ -978,16 +906,9 @@ static const struct file_operations uvm_fops =
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
bool uvm_file_is_nvidia_uvm(struct file *filp)
|
||||
{
|
||||
return (filp != NULL) && (filp->f_op == &uvm_fops);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params, struct file *filp)
|
||||
{
|
||||
long ret;
|
||||
int write = 1;
|
||||
int force = 0;
|
||||
struct page *page;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
@@ -998,7 +919,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
|
||||
// are not used because unload_state_buf may be a managed memory pointer and
|
||||
// therefore a locking assertion from the CPU fault handler could be fired.
|
||||
nv_mmap_read_lock(current->mm);
|
||||
ret = NV_GET_USER_PAGES(params->unload_state_buf, 1, write, force, &page, NULL);
|
||||
ret = NV_PIN_USER_PAGES(params->unload_state_buf, 1, FOLL_WRITE, &page, NULL);
|
||||
nv_mmap_read_unlock(current->mm);
|
||||
|
||||
if (ret < 0)
|
||||
@@ -1008,7 +929,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
if (g_uvm_global.unload_state.ptr) {
|
||||
put_page(page);
|
||||
NV_UNPIN_USER_PAGE(page);
|
||||
status = NV_ERR_IN_USE;
|
||||
goto error;
|
||||
}
|
||||
@@ -1027,7 +948,7 @@ static void uvm_test_unload_state_exit(void)
|
||||
{
|
||||
if (g_uvm_global.unload_state.ptr) {
|
||||
kunmap(g_uvm_global.unload_state.page);
|
||||
put_page(g_uvm_global.unload_state.page);
|
||||
NV_UNPIN_USER_PAGE(g_uvm_global.unload_state.page);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1094,7 +1015,6 @@ static int uvm_init(void)
|
||||
if (uvm_enable_builtin_tests)
|
||||
pr_info("Built-in UVM tests are enabled. This is a security risk.\n");
|
||||
|
||||
|
||||
// After Open RM is released, both the enclosing "#if" and this comment
|
||||
// block should be removed, because the uvm_hmm_is_enabled_system_wide()
|
||||
// check is both necessary and sufficient for reporting functionality.
|
||||
@@ -1104,7 +1024,6 @@ static int uvm_init(void)
|
||||
if (uvm_hmm_is_enabled_system_wide())
|
||||
UVM_INFO_PRINT("HMM (Heterogeneous Memory Management) is enabled in the UVM driver.\n");
|
||||
|
||||
|
||||
return 0;
|
||||
|
||||
error:
|
||||
@@ -1146,4 +1065,4 @@ module_exit(uvm_exit_entry);
|
||||
|
||||
MODULE_LICENSE("Dual MIT/GPL");
|
||||
MODULE_INFO(supported, "external");
|
||||
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
|
||||
@@ -211,18 +211,12 @@ NV_STATUS UvmDeinitialize(void);
|
||||
// UvmReopen
|
||||
//
|
||||
// Reinitializes the UVM driver after checking for minimal user-mode state.
|
||||
// Before calling this function, all GPUs must be unregistered with
|
||||
// Before calling this function, all GPUs must be unregistered with
|
||||
// UvmUnregisterGpu() and all allocated VA ranges must be freed with UvmFree().
|
||||
// Note that it is not required to release VA ranges that were reserved with
|
||||
// UvmReserveVa().
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// UvmReopen() closes the open file returned by UvmGetFileDescriptor() and
|
||||
// UvmReopen() closes the open file returned by UvmGetFileDescriptor() and
|
||||
// replaces it with a new open file with the same name.
|
||||
//
|
||||
// Arguments:
|
||||
@@ -416,14 +410,6 @@ NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
|
||||
// location will have their range group association changed to
|
||||
// UVM_RANGE_GROUP_ID_NONE.
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to unregister.
|
||||
@@ -1276,14 +1262,6 @@ NV_STATUS UvmCleanUpZombieResources(void);
|
||||
//
|
||||
// The VA range can be unmapped and freed via a call to UvmFree.
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Arguments:
|
||||
// base: (INPUT)
|
||||
// Base address of the virtual address range.
|
||||
@@ -1320,10 +1298,6 @@ NV_STATUS UvmCleanUpZombieResources(void);
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// perGpuAttribs is NULL but gpuAttribsCount is non-zero or vice-versa,
|
||||
// or caching is requested on more than one GPU.
|
||||
|
||||
|
||||
|
||||
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The current process is not the one which called UvmInitialize, and
|
||||
@@ -1772,17 +1746,20 @@ NV_STATUS UvmCreateExternalRange(void *base,
|
||||
// GPUs. The external allocation can be unmapped from a specific GPU using
|
||||
// UvmUnmapExternal or from all GPUs using UvmFree.
|
||||
//
|
||||
// The virtual address range specified by (base, length) must be aligned to the
|
||||
// allocation's physical page size and must fall within a VA range previously
|
||||
// created with UvmCreateExternalRange. A GPU VA space must have been registered
|
||||
// for each GPU in the list. The offset in the physical allocation at which the
|
||||
// allocation must be mapped should also be aligned to the allocation's physical
|
||||
// page size. The (base, length) range must lie within the largest possible
|
||||
// virtual address supported by the specified GPUs.
|
||||
// The virtual address range specified by (base, length) must fall within a VA
|
||||
// range previously created with UvmCreateExternalRange. A GPU VA space must
|
||||
// have been registered for each GPU in the list. The (base, length) range must
|
||||
// lie within the largest possible virtual address supported by the specified
|
||||
// GPUs.
|
||||
//
|
||||
// The page size used for the mapping is the largest supported page size less
|
||||
// than or equal to the alignments of base, length, offset, and the allocation
|
||||
// page size.
|
||||
//
|
||||
// If the range specified by (base, length) falls within any existing mappings,
|
||||
// the behavior is the same as if UvmUnmapExternal with the range specified by
|
||||
// (base, length) had been called first.
|
||||
// (base, length) had been called first, provided that base and length are
|
||||
// aligned to the page size used for the existing one.
|
||||
//
|
||||
// If the allocation resides in GPU memory, that GPU must have been registered
|
||||
// via UvmRegisterGpu. If the allocation resides in GPU memory and a mapping is
|
||||
@@ -1864,8 +1841,9 @@ NV_STATUS UvmCreateExternalRange(void *base,
|
||||
// - The requested address range does not fall entirely within an
|
||||
// existing external VA range created with a single call to
|
||||
// UvmCreateExternalRange.
|
||||
// - At least one of base and length is not aligned to the allocation's
|
||||
// physical page size.
|
||||
// - The mapping page size allowed by the alignments of base, length,
|
||||
// and offset is smaller than the minimum supported page size on the
|
||||
// GPU.
|
||||
// - base or base + length fall within an existing mapping but are not
|
||||
// aligned to that mapping's page size.
|
||||
//
|
||||
@@ -1874,8 +1852,7 @@ NV_STATUS UvmCreateExternalRange(void *base,
|
||||
// address supported by one or more of the specified GPUs.
|
||||
//
|
||||
// NV_ERR_INVALID_OFFSET:
|
||||
// offset is not aligned to the allocation's physical page size or
|
||||
// offset+length exceeds the allocation size.
|
||||
// - offset+length exceeds the allocation size.
|
||||
//
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
// One of the following occurred:
|
||||
@@ -3784,6 +3761,7 @@ NV_STATUS UvmToolsDisableCounters(UvmToolsCountersHandle counters,
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// Read spans more than a single target process allocation.
|
||||
//
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmToolsReadProcessMemory(UvmToolsSessionHandle session,
|
||||
void *buffer,
|
||||
|
||||
95
kernel-open/nvidia-uvm/uvm_ada.c
Normal file
95
kernel-open/nvidia-uvm/uvm_ada.c
Normal file
@@ -0,0 +1,95 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_ada_fault_buffer.h"
|
||||
|
||||
void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
parent_gpu->tlb_batch.va_invalidate_supported = true;
|
||||
|
||||
parent_gpu->tlb_batch.va_range_invalidate_supported = true;
|
||||
|
||||
// TODO: Bug 1767241: Run benchmarks to figure out a good number
|
||||
parent_gpu->tlb_batch.max_ranges = 8;
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_ada_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) *
|
||||
8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Ada covers 128 TB and that's the minimum size
|
||||
// that can be used.
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024 * 1024;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = 384ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
|
||||
// Not all units on Ada support 49-bit addressing, including those which
|
||||
// access channel buffers.
|
||||
parent_gpu->max_channel_va = 1ULL << 40;
|
||||
|
||||
parent_gpu->max_host_va = 1ULL << 40;
|
||||
|
||||
// Ada can map sysmem with any page size
|
||||
parent_gpu->can_map_sysmem_with_large_pages = true;
|
||||
|
||||
// Prefetch instructions will generate faults
|
||||
parent_gpu->prefetch_fault_supported = true;
|
||||
|
||||
// Ada can place GPFIFO in vidmem
|
||||
parent_gpu->gpfifo_in_vidmem_supported = true;
|
||||
|
||||
parent_gpu->replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
|
||||
parent_gpu->has_clear_faulted_channel_sw_method = true;
|
||||
|
||||
parent_gpu->has_clear_faulted_channel_method = false;
|
||||
|
||||
parent_gpu->smc.supported = true;
|
||||
|
||||
parent_gpu->sparse_mappings_supported = true;
|
||||
|
||||
parent_gpu->map_remap_larger_page_promotion = false;
|
||||
|
||||
parent_gpu->plc_supported = true;
|
||||
}
|
||||
84
kernel-open/nvidia-uvm/uvm_ada_fault_buffer.h
Normal file
84
kernel-open/nvidia-uvm/uvm_ada_fault_buffer.h
Normal file
@@ -0,0 +1,84 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef __UVM_HAL_ADA_FAULT_BUFFER_H__
|
||||
#define __UVM_HAL_ADA_FAULT_BUFFER_H__
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_gpu.h"
|
||||
|
||||
// There are up to 6 TPCs per GPC in Ada, and there are 2 LTP uTLB per TPC.
|
||||
// Besides, there is one RGG uTLB per GPC. Each TPC has a number of clients
|
||||
// that can make requests to its uTLBs: 1xTPCCS, 1xPE, 2xT1. Requests from
|
||||
// these units are routed as follows to the 2 LTP uTLBs:
|
||||
//
|
||||
// -------- ---------
|
||||
// | T1_0 | -----------------> | uTLB0 |
|
||||
// -------- ---------
|
||||
//
|
||||
// -------- ---------
|
||||
// | T1_1 | -----------------> | uTLB1 |
|
||||
// -------- --------> ---------
|
||||
// | ^
|
||||
// ------- | |
|
||||
// | PE | ----------- |
|
||||
// ------- |
|
||||
// |
|
||||
// --------- |
|
||||
// | TPCCS | -----------------------
|
||||
// ---------
|
||||
//
|
||||
//
|
||||
// The client ids are local to their GPC and the id mapping is linear across
|
||||
// TPCs: TPC_n has TPCCS_n, PE_n, T1_p, and T1_q, where p=2*n and q=p+1.
|
||||
//
|
||||
// NV_PFAULT_CLIENT_GPC_LTP_UTLB_n and NV_PFAULT_CLIENT_GPC_RGG_UTLB enums can
|
||||
// be ignored. These will never be reported in a fault message, and should
|
||||
// never be used in an invalidate. Therefore, we define our own values.
|
||||
typedef enum {
|
||||
UVM_ADA_GPC_UTLB_ID_RGG = 0,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP0 = 1,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP1 = 2,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP2 = 3,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP3 = 4,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP4 = 5,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP5 = 6,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP6 = 7,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP7 = 8,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP8 = 9,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP9 = 10,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP10 = 11,
|
||||
UVM_ADA_GPC_UTLB_ID_LTP11 = 12,
|
||||
|
||||
UVM_ADA_GPC_UTLB_COUNT,
|
||||
} uvm_ada_gpc_utlb_id_t;
|
||||
|
||||
static NvU32 uvm_ada_get_utlbs_per_gpc(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NvU32 utlbs = parent_gpu->rm_info.maxTpcPerGpcCount * 2 + 1;
|
||||
UVM_ASSERT(utlbs <= UVM_ADA_GPC_UTLB_COUNT);
|
||||
return utlbs;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2021 NVIDIA Corporation
|
||||
Copyright (c) 2018-20221 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -53,7 +53,7 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
// See uvm_mmu.h for mapping placement
|
||||
parent_gpu->flat_vidmem_va_base = 132ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->flat_vidmem_va_base = 136ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->flat_sysmem_va_base = 256ull * 1024 * 1024 * 1024 * 1024;
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
#include "clc7b5.h"
|
||||
#include "clc56f.h" // Needed because HAL ce_init pushes SET_OBJECT
|
||||
|
||||
bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return true;
|
||||
@@ -112,7 +112,7 @@ NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void)
|
||||
return HWCONST(C7B5, LAUNCH_DMA, DISABLE_PLC, TRUE);
|
||||
}
|
||||
|
||||
bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
NvU64 push_begin_gpu_va;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
@@ -183,7 +183,7 @@ void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_
|
||||
src->address -= uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
|
||||
}
|
||||
|
||||
bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2021 NVIDIA Corporation
|
||||
Copyright (c) 2018-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -29,7 +29,7 @@
|
||||
#include "clc56f.h"
|
||||
#include "clc076.h"
|
||||
|
||||
bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
@@ -82,7 +82,7 @@ bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_hal_ampere_host_sw_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_hal_ampere_host_sw_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return true;
|
||||
@@ -107,6 +107,8 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
|
||||
uvm_spin_loop_t spin;
|
||||
NvU32 channel_faulted_mask = 0;
|
||||
NvU32 clear_type_value = 0;
|
||||
NvU32 doorbell_value = 0;
|
||||
volatile NvU32 *doorbell_ptr;
|
||||
|
||||
UVM_ASSERT(!user_channel->gpu->parent->has_clear_faulted_channel_method);
|
||||
|
||||
@@ -123,6 +125,12 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
|
||||
uvm_mmu_engine_type_string(fault->fault_source.mmu_engine_type));
|
||||
}
|
||||
|
||||
doorbell_ptr = (NvU32 *)((NvU8 *)user_channel->runlist_pri_base_register + NV_RUNLIST_INTERNAL_DOORBELL);
|
||||
|
||||
// GFID is not required since we clear faulted channel with a SW method on
|
||||
// SRIOV. On baremetal, GFID is always zero.
|
||||
doorbell_value = HWVALUE(_RUNLIST, INTERNAL_DOORBELL, CHID, user_channel->hw_channel_id);
|
||||
|
||||
// Wait for the channel to have the FAULTED bit set as this can race with
|
||||
// interrupt notification
|
||||
UVM_SPIN_WHILE(!(UVM_GPU_READ_ONCE(*user_channel->chram_channel_register) & channel_faulted_mask), &spin);
|
||||
@@ -131,7 +139,7 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
|
||||
|
||||
wmb();
|
||||
|
||||
UVM_GPU_WRITE_ONCE(*user_channel->work_submission_offset, user_channel->work_submission_token);
|
||||
UVM_GPU_WRITE_ONCE(*doorbell_ptr, doorbell_value);
|
||||
}
|
||||
|
||||
static NvU32 instance_ptr_aperture_type_to_hw_value(uvm_aperture_t aperture)
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#define __UVM_API_H__
|
||||
|
||||
#include "uvm_types.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_ioctl.h"
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_lock.h"
|
||||
@@ -51,8 +52,10 @@
|
||||
\
|
||||
params.rmStatus = uvm_global_get_status(); \
|
||||
if (params.rmStatus == NV_OK) { \
|
||||
if (do_init_check) \
|
||||
params.rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp)); \
|
||||
if (do_init_check) { \
|
||||
if (!uvm_fd_va_space(filp)) \
|
||||
params.rmStatus = NV_ERR_ILLEGAL_ACTION; \
|
||||
} \
|
||||
if (likely(params.rmStatus == NV_OK)) \
|
||||
params.rmStatus = function_name(¶ms, filp); \
|
||||
} \
|
||||
@@ -88,8 +91,10 @@
|
||||
\
|
||||
params->rmStatus = uvm_global_get_status(); \
|
||||
if (params->rmStatus == NV_OK) { \
|
||||
if (do_init_check) \
|
||||
params->rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp)); \
|
||||
if (do_init_check) { \
|
||||
if (!uvm_fd_va_space(filp)) \
|
||||
params->rmStatus = NV_ERR_ILLEGAL_ACTION; \
|
||||
} \
|
||||
if (likely(params->rmStatus == NV_OK)) \
|
||||
params->rmStatus = function_name(params, filp); \
|
||||
} \
|
||||
|
||||
@@ -58,12 +58,6 @@ NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
return uvm_ats_ibm_add_gpu(parent_gpu);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -77,12 +71,6 @@ void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
uvm_ats_ibm_remove_gpu(parent_gpu);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
@@ -100,10 +88,6 @@ NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
status = uvm_ats_ibm_bind_gpu(gpu_va_space);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -116,10 +100,6 @@ void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_unbind_gpu(gpu_va_space);
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
@@ -147,10 +127,6 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
status = uvm_ats_ibm_register_gpu_va_space(gpu_va_space);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if (status == NV_OK)
|
||||
uvm_processor_mask_set(&va_space->ats.registered_gpu_va_spaces, gpu_id);
|
||||
|
||||
@@ -173,10 +149,6 @@ void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_unregister_gpu_va_space(gpu_va_space);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
uvm_va_space_down_write(va_space);
|
||||
uvm_processor_mask_clear(&va_space->ats.registered_gpu_va_spaces, gpu_id);
|
||||
uvm_va_space_up_write(va_space);
|
||||
|
||||
@@ -29,14 +29,8 @@
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "nv_uvm_types.h"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED())
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Mask of gpu_va_spaces which are registered for ATS access. The mask is
|
||||
@@ -47,11 +41,6 @@ typedef struct
|
||||
{
|
||||
uvm_ibm_va_space_t ibm;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
};
|
||||
} uvm_ats_va_space_t;
|
||||
|
||||
@@ -69,11 +58,6 @@ typedef struct
|
||||
{
|
||||
uvm_ibm_gpu_va_space_t ibm;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
};
|
||||
} uvm_ats_gpu_va_space_t;
|
||||
|
||||
@@ -106,10 +90,6 @@ void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
// LOCKING: mmap_lock must be lockable.
|
||||
// VA space lock must be lockable.
|
||||
// gpu_va_space->gpu must be retained.
|
||||
|
||||
|
||||
|
||||
|
||||
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// Decrements the refcount on the {gpu, mm} pair. Removes the binding from the
|
||||
|
||||
@@ -25,9 +25,62 @@
|
||||
#include "uvm_ats_faults.h"
|
||||
#include "uvm_migrate_pageable.h"
|
||||
|
||||
// TODO: Bug 2103669: Implement a real prefetching policy and remove or adapt
|
||||
// these experimental parameters. These are intended to help guide that policy.
|
||||
static unsigned int uvm_exp_perf_prefetch_ats_order_replayable = 0;
|
||||
module_param(uvm_exp_perf_prefetch_ats_order_replayable, uint, 0644);
|
||||
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_replayable,
|
||||
"Max order of pages (2^N) to prefetch on replayable ATS faults");
|
||||
|
||||
static unsigned int uvm_exp_perf_prefetch_ats_order_non_replayable = 0;
|
||||
module_param(uvm_exp_perf_prefetch_ats_order_non_replayable, uint, 0644);
|
||||
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_non_replayable,
|
||||
"Max order of pages (2^N) to prefetch on non-replayable ATS faults");
|
||||
|
||||
// Expand the fault region to the naturally-aligned region with order given by
|
||||
// the module parameters, clamped to the vma containing fault_addr (if any).
|
||||
// Note that this means the region contains fault_addr but may not begin at
|
||||
// fault_addr.
|
||||
static void expand_fault_region(struct mm_struct *mm,
|
||||
NvU64 fault_addr,
|
||||
uvm_fault_client_type_t client_type,
|
||||
unsigned long *start,
|
||||
unsigned long *size)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned int order;
|
||||
unsigned long outer, aligned_start, aligned_size;
|
||||
|
||||
*start = fault_addr;
|
||||
*size = PAGE_SIZE;
|
||||
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_HUB)
|
||||
order = uvm_exp_perf_prefetch_ats_order_non_replayable;
|
||||
else
|
||||
order = uvm_exp_perf_prefetch_ats_order_replayable;
|
||||
|
||||
if (order == 0)
|
||||
return;
|
||||
|
||||
vma = find_vma_intersection(mm, fault_addr, fault_addr + 1);
|
||||
if (!vma)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(order < BITS_PER_LONG - PAGE_SHIFT);
|
||||
|
||||
aligned_size = (1UL << order) * PAGE_SIZE;
|
||||
|
||||
aligned_start = fault_addr & ~(aligned_size - 1);
|
||||
|
||||
*start = max(vma->vm_start, aligned_start);
|
||||
outer = min(vma->vm_end, aligned_start + aligned_size);
|
||||
*size = outer - *start;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU64 fault_addr,
|
||||
uvm_fault_access_type_t access_type)
|
||||
uvm_fault_access_type_t access_type,
|
||||
uvm_fault_client_type_t client_type)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
@@ -66,8 +119,6 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
{
|
||||
.va_space = va_space,
|
||||
.mm = mm,
|
||||
.start = fault_addr,
|
||||
.length = PAGE_SIZE,
|
||||
.dst_id = gpu_va_space->gpu->parent->id,
|
||||
.dst_node_id = -1,
|
||||
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
|
||||
@@ -79,6 +130,8 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
|
||||
|
||||
expand_fault_region(mm, fault_addr, client_type, &uvm_migrate_args.start, &uvm_migrate_args.length);
|
||||
|
||||
// TODO: Bug 2103669: Service more than a single fault at a time
|
||||
//
|
||||
// We are trying to use migrate_vma API in the kernel (if it exists) to
|
||||
@@ -131,7 +184,10 @@ NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
|
||||
}
|
||||
else {
|
||||
// TODO: Bug 2103669: Service more than a single fault at a time
|
||||
status = uvm_ats_service_fault(gpu_va_space, current_entry->fault_address, service_access_type);
|
||||
status = uvm_ats_service_fault(gpu_va_space,
|
||||
current_entry->fault_address,
|
||||
service_access_type,
|
||||
current_entry->fault_source.client_type);
|
||||
}
|
||||
|
||||
// Do not flag prefetch faults as fatal unless something fatal happened
|
||||
@@ -155,7 +211,8 @@ NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
|
||||
status = uvm_ats_service_fault(gpu_va_space,
|
||||
current_entry->fault_address,
|
||||
UVM_FAULT_ACCESS_TYPE_READ);
|
||||
UVM_FAULT_ACCESS_TYPE_READ,
|
||||
current_entry->fault_source.client_type);
|
||||
|
||||
// If read accesses are also invalid, cancel the fault. If a
|
||||
// different error code is returned, exit
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "uvm_channel.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_test.h"
|
||||
#include "uvm_tracker.h"
|
||||
@@ -52,13 +53,13 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
|
||||
uvm_push_t push;
|
||||
bool is_proxy;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, CE_TEST_MEM_SIZE, &host_mem);
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, CE_TEST_MEM_SIZE, 0, &host_mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
|
||||
memset(host_ptr, 0, CE_TEST_MEM_SIZE);
|
||||
|
||||
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
|
||||
status = uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, CE_TEST_MEM_SIZE, &mem[i]);
|
||||
status = uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, CE_TEST_MEM_SIZE, 0, &mem[i]);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
}
|
||||
|
||||
@@ -167,7 +168,7 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
|
||||
uvm_push_t push;
|
||||
NvU32 value;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(NvU32), &host_mem);
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(NvU32), 0, &host_mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
|
||||
*host_ptr = 0;
|
||||
@@ -429,13 +430,13 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
gpu_addresses[1] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by vidmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, &gpu_rm_mem), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
|
||||
is_proxy_va_space = false;
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
|
||||
gpu_addresses[2] = uvm_gpu_address_virtual(gpu_va);
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by sysmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, &sys_rm_mem), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
|
||||
gpu_addresses[3] = uvm_gpu_address_virtual(gpu_va);
|
||||
|
||||
@@ -655,9 +656,11 @@ static NV_STATUS test_ce(uvm_va_space_t *va_space, bool skipTimestampTest)
|
||||
TEST_NV_CHECK_RET(test_memcpy_and_memset(gpu));
|
||||
TEST_NV_CHECK_RET(test_semaphore_reduction_inc(gpu));
|
||||
TEST_NV_CHECK_RET(test_semaphore_release(gpu));
|
||||
|
||||
if (!skipTimestampTest)
|
||||
TEST_NV_CHECK_RET(test_semaphore_timestamp(gpu));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "uvm_channel.h"
|
||||
|
||||
#include "uvm_api.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_procfs.h"
|
||||
@@ -68,6 +69,30 @@ typedef enum
|
||||
UVM_CHANNEL_UPDATE_MODE_FORCE_ALL
|
||||
} uvm_channel_update_mode_t;
|
||||
|
||||
static void channel_pool_lock_init(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_mutex_init(&pool->mutex, UVM_LOCK_ORDER_CHANNEL);
|
||||
else
|
||||
uvm_spin_lock_init(&pool->spinlock, UVM_LOCK_ORDER_CHANNEL);
|
||||
}
|
||||
|
||||
static void channel_pool_lock(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_mutex_lock(&pool->mutex);
|
||||
else
|
||||
uvm_spin_lock(&pool->spinlock);
|
||||
}
|
||||
|
||||
static void channel_pool_unlock(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_mutex_unlock(&pool->mutex);
|
||||
else
|
||||
uvm_spin_unlock(&pool->spinlock);
|
||||
}
|
||||
|
||||
// Update channel progress, completing up to max_to_complete entries
|
||||
static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
NvU32 max_to_complete,
|
||||
@@ -80,12 +105,14 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
|
||||
NvU64 completed_value = uvm_channel_update_completed_value(channel);
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
// Completed value should never exceed the queued value
|
||||
UVM_ASSERT_MSG_RELEASE(completed_value <= channel->tracking_sem.queued_value,
|
||||
"GPU %s channel %s unexpected completed_value 0x%llx > queued_value 0x%llx\n",
|
||||
channel->pool->manager->gpu->parent->name, channel->name, completed_value,
|
||||
channel->pool->manager->gpu->parent->name,
|
||||
channel->name,
|
||||
completed_value,
|
||||
channel->tracking_sem.queued_value);
|
||||
|
||||
cpu_put = channel->cpu_put;
|
||||
@@ -97,15 +124,18 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
if (mode == UVM_CHANNEL_UPDATE_MODE_COMPLETED && entry->tracking_semaphore_value > completed_value)
|
||||
break;
|
||||
|
||||
uvm_pushbuffer_mark_completed(channel->pool->manager->pushbuffer, entry);
|
||||
list_add_tail(&entry->push_info->available_list_node, &channel->available_push_infos);
|
||||
if (entry->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL) {
|
||||
uvm_pushbuffer_mark_completed(channel->pool->manager->pushbuffer, entry);
|
||||
list_add_tail(&entry->push_info->available_list_node, &channel->available_push_infos);
|
||||
}
|
||||
|
||||
gpu_get = (gpu_get + 1) % channel->num_gpfifo_entries;
|
||||
++completed_count;
|
||||
}
|
||||
|
||||
channel->gpu_get = gpu_get;
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
if (cpu_put >= gpu_get)
|
||||
pending_gpfifos = cpu_put - gpu_get;
|
||||
@@ -118,7 +148,8 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
NvU32 uvm_channel_update_progress(uvm_channel_t *channel)
|
||||
{
|
||||
// By default, don't complete too many entries at a time to spread the cost
|
||||
// of doing so across callers and avoid holding a spin lock for too long.
|
||||
// of doing so across callers and avoid potentially holding a spin lock for
|
||||
// too long.
|
||||
return uvm_channel_update_progress_with_max(channel, 8, UVM_CHANNEL_UPDATE_MODE_COMPLETED);
|
||||
}
|
||||
|
||||
@@ -150,81 +181,95 @@ NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager
|
||||
return pending_gpfifos;
|
||||
}
|
||||
|
||||
static bool channel_is_available(uvm_channel_t *channel)
|
||||
static NvU32 channel_get_available_gpfifo_entries(uvm_channel_t *channel)
|
||||
{
|
||||
NvU32 next_put;
|
||||
NvU32 available = channel->num_gpfifo_entries;
|
||||
|
||||
uvm_assert_spinlock_locked(&channel->pool->lock);
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
|
||||
next_put = (channel->cpu_put + channel->current_pushes_count + 1) % channel->num_gpfifo_entries;
|
||||
// Remove sentinel entry
|
||||
available -= 1;
|
||||
|
||||
return (next_put != channel->gpu_get);
|
||||
// Remove entries of ongoing pushes
|
||||
available -= channel->current_gpfifo_count;
|
||||
|
||||
// Remove pending entries
|
||||
if (channel->cpu_put >= channel->gpu_get)
|
||||
available -= (channel->cpu_put - channel->gpu_get);
|
||||
else
|
||||
available -= (channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get);
|
||||
|
||||
UVM_ASSERT(available < channel->num_gpfifo_entries);
|
||||
|
||||
return available;
|
||||
}
|
||||
|
||||
static bool try_claim_channel(uvm_channel_t *channel)
|
||||
NvU32 uvm_channel_get_available_gpfifo_entries(uvm_channel_t *channel)
|
||||
{
|
||||
NvU32 available;
|
||||
|
||||
channel_pool_lock(channel->pool);
|
||||
available = channel_get_available_gpfifo_entries(channel);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
return available;
|
||||
}
|
||||
|
||||
static bool try_claim_channel_locked(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
bool claimed = false;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
UVM_ASSERT(num_gpfifo_entries > 0);
|
||||
UVM_ASSERT(num_gpfifo_entries < channel->num_gpfifo_entries);
|
||||
|
||||
if (channel_is_available(channel)) {
|
||||
++channel->current_pushes_count;
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
|
||||
if (channel_get_available_gpfifo_entries(channel) >= num_gpfifo_entries) {
|
||||
channel->current_gpfifo_count += num_gpfifo_entries;
|
||||
claimed = true;
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
|
||||
return claimed;
|
||||
}
|
||||
|
||||
static void lock_push(uvm_channel_t *channel)
|
||||
static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
bool claimed;
|
||||
|
||||
channel_pool_lock(channel->pool);
|
||||
claimed = try_claim_channel_locked(channel, num_gpfifo_entries);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
|
||||
|
||||
return claimed;
|
||||
}
|
||||
|
||||
static void unlock_push(uvm_channel_t *channel)
|
||||
static void unlock_channel_for_push(uvm_channel_t *channel)
|
||||
{
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
static bool trylock_push(uvm_channel_t *channel)
|
||||
static bool is_channel_locked_for_push(uvm_channel_t *channel)
|
||||
{
|
||||
|
||||
|
||||
|
||||
|
||||
// For CE and proxy channels, we always return that the channel is locked,
|
||||
// which has no functional impact in the UVM channel code-flow, this is only
|
||||
// used on UVM_ASSERTs.
|
||||
return true;
|
||||
}
|
||||
|
||||
// Reserve a channel in the specified pool
|
||||
static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
|
||||
// Reserve a channel in the specified CE pool
|
||||
static NV_STATUS channel_reserve_in_ce_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
|
||||
{
|
||||
uvm_channel_t *channel;
|
||||
uvm_spin_loop_t spin;
|
||||
|
||||
UVM_ASSERT(pool);
|
||||
UVM_ASSERT(uvm_channel_pool_is_ce(pool));
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
// TODO: Bug 1764953: Prefer idle/less busy channels
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if (trylock_push(channel)) {
|
||||
if (try_claim_channel(channel)) {
|
||||
*channel_out = channel;
|
||||
return NV_OK;
|
||||
}
|
||||
else {
|
||||
unlock_push(channel);
|
||||
}
|
||||
if (try_claim_channel(channel, 1)) {
|
||||
*channel_out = channel;
|
||||
return NV_OK;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -235,8 +280,7 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
|
||||
|
||||
uvm_channel_update_progress(channel);
|
||||
|
||||
if (try_claim_channel(channel)) {
|
||||
lock_push(channel);
|
||||
if (try_claim_channel(channel, 1)) {
|
||||
*channel_out = channel;
|
||||
|
||||
return NV_OK;
|
||||
@@ -256,8 +300,12 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
|
||||
|
||||
NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_type_t type, uvm_channel_t **channel_out)
|
||||
{
|
||||
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
|
||||
return channel_reserve_in_pool(manager->pool_to_use.default_for_type[type], channel_out);
|
||||
uvm_channel_pool_t *pool = manager->pool_to_use.default_for_type[type];
|
||||
|
||||
UVM_ASSERT(pool != NULL);
|
||||
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
|
||||
|
||||
return channel_reserve_in_ce_pool(pool, channel_out);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
|
||||
@@ -273,7 +321,7 @@ NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
|
||||
|
||||
UVM_ASSERT(pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
|
||||
|
||||
return channel_reserve_in_pool(pool, channel_out);
|
||||
return channel_reserve_in_ce_pool(pool, channel_out);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager)
|
||||
@@ -297,14 +345,14 @@ static NvU32 channel_get_available_push_info_index(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_push_info_t *push_info;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
push_info = list_first_entry_or_null(&channel->available_push_infos, uvm_push_info_t, available_list_node);
|
||||
UVM_ASSERT(push_info != NULL);
|
||||
UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
|
||||
list_del(&push_info->available_list_node);
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
return push_info - channel->push_infos;
|
||||
}
|
||||
@@ -319,12 +367,7 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
|
||||
|
||||
manager = channel->pool->manager;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
UVM_ASSERT(is_channel_locked_for_push(channel));
|
||||
|
||||
status = uvm_pushbuffer_begin_push(manager->pushbuffer, push);
|
||||
if (status != NV_OK)
|
||||
@@ -370,10 +413,6 @@ static void proxy_channel_submit_work(uvm_push_t *push, NvU32 push_size)
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_proxy(channel));
|
||||
|
||||
// nvUvmInterfacePagingChannelPushStream should not sleep, because a
|
||||
// spinlock is currently held.
|
||||
uvm_assert_spinlock_locked(&channel->pool->lock);
|
||||
|
||||
status = nvUvmInterfacePagingChannelPushStream(channel->proxy.handle, (char *) push->begin, push_size);
|
||||
|
||||
if (status != NV_OK) {
|
||||
@@ -407,10 +446,6 @@ static void uvm_channel_semaphore_release(uvm_push_t *push, NvU64 semaphore_va,
|
||||
|
||||
if (uvm_channel_is_ce(push->channel))
|
||||
gpu->parent->ce_hal->semaphore_release(push, semaphore_va, new_payload);
|
||||
|
||||
|
||||
|
||||
|
||||
else
|
||||
UVM_ASSERT_MSG(0, "Semaphore release on an unsupported channel.\n");
|
||||
}
|
||||
@@ -428,7 +463,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
NvU32 cpu_put;
|
||||
NvU32 new_cpu_put;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
new_tracking_value = ++channel->tracking_sem.queued_value;
|
||||
new_payload = (NvU32)new_tracking_value;
|
||||
@@ -447,9 +482,10 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
entry->pushbuffer_offset = uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
|
||||
entry->pushbuffer_size = push_size;
|
||||
entry->push_info = &channel->push_infos[push->push_info_index];
|
||||
entry->type = UVM_GPFIFO_ENTRY_TYPE_NORMAL;
|
||||
|
||||
UVM_ASSERT(channel->current_pushes_count > 0);
|
||||
--channel->current_pushes_count;
|
||||
UVM_ASSERT(channel->current_gpfifo_count > 0);
|
||||
--channel->current_gpfifo_count;
|
||||
|
||||
if (uvm_channel_is_proxy(channel))
|
||||
proxy_channel_submit_work(push, push_size);
|
||||
@@ -464,8 +500,8 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
// may notice the GPU work to be completed and hence all state tracking the
|
||||
// push must be updated before that. Notably uvm_pushbuffer_end_push() has
|
||||
// to be called first.
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
unlock_push(channel);
|
||||
unlock_channel_for_push(channel);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
// This memory barrier is borrowed from CUDA, as it supposedly fixes perf
|
||||
// issues on some systems. Comment from CUDA: "fixes throughput-related
|
||||
@@ -477,27 +513,122 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
push->channel_tracking_value = new_tracking_value;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_reserve(uvm_channel_t *channel)
|
||||
// The caller must submit a normal GPFIFO entry with a semaphore release
|
||||
// following the control GPFIFO, refer to uvm_channel_write_ctrl_gpfifo() for an
|
||||
// example.
|
||||
static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_value)
|
||||
{
|
||||
uvm_gpfifo_entry_t *entry;
|
||||
NvU64 *gpfifo_entry;
|
||||
NvU32 cpu_put;
|
||||
NvU32 new_cpu_put;
|
||||
uvm_gpu_t *gpu = channel->pool->manager->gpu;
|
||||
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
cpu_put = channel->cpu_put;
|
||||
new_cpu_put = (cpu_put + 1) % channel->num_gpfifo_entries;
|
||||
|
||||
entry = &channel->gpfifo_entries[cpu_put];
|
||||
memset(entry, 0, sizeof(*entry));
|
||||
entry->type = UVM_GPFIFO_ENTRY_TYPE_CONTROL;
|
||||
|
||||
// Control GPFIFO entries are followed by a semaphore_release push in UVM.
|
||||
// We add the tracking semaphore value of the next GPFIFO entry,
|
||||
// potentially the associated semaphore release push. Even if a different
|
||||
// GPFIFO entry sneaks in, the purposes of signaling that this control
|
||||
// GPFIFO entry has been processed is accomplished.
|
||||
entry->tracking_semaphore_value = channel->tracking_sem.queued_value + 1;
|
||||
|
||||
UVM_ASSERT(channel->current_gpfifo_count > 1);
|
||||
--channel->current_gpfifo_count;
|
||||
|
||||
gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + cpu_put;
|
||||
*gpfifo_entry = ctrl_fifo_entry_value;
|
||||
|
||||
// Need to make sure all the GPFIFO entries writes complete before updating
|
||||
// GPPUT. We also don't want any reads to be moved after the GPPut write as
|
||||
// the GPU might modify the data they read as soon as the GPPut write
|
||||
// happens.
|
||||
mb();
|
||||
|
||||
gpu->parent->host_hal->write_gpu_put(channel, new_cpu_put);
|
||||
|
||||
channel->cpu_put = new_cpu_put;
|
||||
|
||||
// The moment the channel is unlocked uvm_channel_update_progress_with_max()
|
||||
// may notice the GPU work to be completed and hence all state tracking the
|
||||
// push must be updated before that. Note that we do not call
|
||||
// unlock_channel_for_push() because a control GPFIFO is followed by a
|
||||
// semaphore release, where the channel is unlocked.
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
// This memory barrier is borrowed from CUDA, as it supposedly fixes perf
|
||||
// issues on some systems. Comment from CUDA: "fixes throughput-related
|
||||
// performance problems, e.g. bugs 626179, 593841. This may be related to
|
||||
// bug 124888, which GL works around by doing a clflush"
|
||||
wmb();
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_value)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu = channel->pool->manager->gpu;
|
||||
uvm_push_t push;
|
||||
|
||||
UVM_ASSERT(!uvm_channel_is_proxy(channel));
|
||||
|
||||
// We reserve two GPFIFO entries, i.e., the control GPFIFO entry and the
|
||||
// subsequent semaphore_release push. There is, however, a potential case
|
||||
// for GPFIFO control submission starvation. This may happen because a
|
||||
// GPFIFO control submission requires two available GPFIFO entries. If you
|
||||
// have a full GPFIFO ring buffer that frees one entry at a time, while
|
||||
// there is another thread consuming this recently released entry at the
|
||||
// same rate, a concurrent thread trying to reserve two entries for a GPFIFO
|
||||
// control submission may starve. We could handle this by imposing minimal
|
||||
// release entries in uvm_channel.c:uvm_channel_update_progress(). Instead,
|
||||
// we don't deal with this potential starvation case because:
|
||||
// - Control GPFIFO are rarely used.
|
||||
// - By default, we release up to 8 GPFIFO entries at a time, except if the
|
||||
// release rate is constrained by lengthy pushbuffers -- another rare
|
||||
// situation.
|
||||
// - It would add unnecessary complexity to channel_update_progress().
|
||||
status = uvm_channel_reserve(channel, 2);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
write_ctrl_gpfifo(channel, ctrl_fifo_entry_value);
|
||||
|
||||
status = uvm_push_begin_on_reserved_channel(channel, &push, "write_ctrl_GPFIFO");
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to begin push on channel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
// This is an empty push, the push's embedded semaphore_release signals that
|
||||
// the GPFIFO control entry has been processed.
|
||||
uvm_push_end(&push);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_spin_loop_t spin;
|
||||
|
||||
if (try_claim_channel(channel))
|
||||
goto out;
|
||||
if (try_claim_channel(channel, num_gpfifo_entries))
|
||||
return NV_OK;
|
||||
|
||||
uvm_channel_update_progress(channel);
|
||||
|
||||
uvm_spin_loop_init(&spin);
|
||||
while (!try_claim_channel(channel) && status == NV_OK) {
|
||||
while (!try_claim_channel(channel, num_gpfifo_entries) && status == NV_OK) {
|
||||
UVM_SPIN_LOOP(&spin);
|
||||
status = uvm_channel_check_errors(channel);
|
||||
uvm_channel_update_progress(channel);
|
||||
}
|
||||
|
||||
out:
|
||||
if (status == NV_OK)
|
||||
lock_push(channel);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -511,12 +642,12 @@ static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *ch
|
||||
if (pending_count == 0)
|
||||
return NULL;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
if (channel->gpu_get != channel->cpu_put)
|
||||
entry = &channel->gpfifo_entries[channel->gpu_get];
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
return entry;
|
||||
}
|
||||
@@ -568,12 +699,28 @@ NV_STATUS uvm_channel_check_errors(uvm_channel_t *channel)
|
||||
|
||||
fatal_entry = uvm_channel_get_fatal_entry(channel);
|
||||
if (fatal_entry != NULL) {
|
||||
uvm_push_info_t *push_info = fatal_entry->push_info;
|
||||
UVM_ERR_PRINT("Channel error likely caused by push '%s' started at %s:%d in %s()\n",
|
||||
push_info->description, push_info->filename, push_info->line, push_info->function);
|
||||
if (fatal_entry->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL) {
|
||||
uvm_push_info_t *push_info = fatal_entry->push_info;
|
||||
UVM_ERR_PRINT("Channel error likely caused by push '%s' started at %s:%d in %s()\n",
|
||||
push_info->description,
|
||||
push_info->filename,
|
||||
push_info->line,
|
||||
push_info->function);
|
||||
}
|
||||
else {
|
||||
NvU64 *gpfifo_entry;
|
||||
|
||||
UVM_ASSERT(!uvm_channel_is_proxy(channel));
|
||||
|
||||
gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + (fatal_entry - channel->gpfifo_entries);
|
||||
UVM_ERR_PRINT("Channel error likely caused by GPFIFO control entry, data: 0x%llx, gpu_get: %d\n",
|
||||
*gpfifo_entry,
|
||||
channel->gpu_get);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_global_set_fatal_error(status);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -608,40 +755,6 @@ NvU64 uvm_channel_update_completed_value(uvm_channel_t *channel)
|
||||
return uvm_gpu_tracking_semaphore_update_completed_value(&channel->tracking_sem);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
|
||||
{
|
||||
UVM_ASSERT(pool->num_channels > 0);
|
||||
@@ -658,19 +771,15 @@ static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
|
||||
channel_update_progress_all(channel, UVM_CHANNEL_UPDATE_MODE_FORCE_ALL);
|
||||
}
|
||||
|
||||
uvm_procfs_destroy_entry(channel->procfs.pushes);
|
||||
uvm_procfs_destroy_entry(channel->procfs.info);
|
||||
uvm_procfs_destroy_entry(channel->procfs.dir);
|
||||
proc_remove(channel->procfs.pushes);
|
||||
proc_remove(channel->procfs.info);
|
||||
proc_remove(channel->procfs.dir);
|
||||
|
||||
uvm_kvfree(channel->push_acquire_infos);
|
||||
uvm_kvfree(channel->push_infos);
|
||||
|
||||
uvm_kvfree(channel->gpfifo_entries);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if (uvm_channel_is_proxy(channel))
|
||||
uvm_rm_locked_call_void(nvUvmInterfacePagingChannelDestroy(channel->proxy.handle));
|
||||
else
|
||||
@@ -692,27 +801,17 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel, unsigned engine
|
||||
uvm_channel_manager_t *manager = channel->pool->manager;
|
||||
uvm_gpu_t *gpu = manager->gpu;
|
||||
|
||||
if (uvm_channel_is_ce(channel)) {
|
||||
UVM_ASSERT(channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
memset(&channel_alloc_params, 0, sizeof(channel_alloc_params));
|
||||
channel_alloc_params.numGpFifoEntries = manager->conf.num_gpfifo_entries;
|
||||
channel_alloc_params.gpFifoLoc = manager->conf.gpfifo_loc;
|
||||
channel_alloc_params.gpPutLoc = manager->conf.gpput_loc;
|
||||
channel_alloc_params.engineIndex = engine_index;
|
||||
|
||||
if (uvm_channel_is_ce(channel))
|
||||
if (uvm_channel_is_ce(channel)) {
|
||||
UVM_ASSERT(channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
|
||||
|
||||
channel_alloc_params.engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_CE;
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceChannelAllocate(gpu->rm_address_space,
|
||||
&channel_alloc_params,
|
||||
@@ -732,11 +831,7 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel, unsigned engine
|
||||
channel_info->hwChannelId,
|
||||
channel_info->hwRunlistId,
|
||||
channel_info->hwChannelId,
|
||||
|
||||
|
||||
|
||||
"CE",
|
||||
|
||||
engine_index);
|
||||
|
||||
return NV_OK;
|
||||
@@ -803,12 +898,6 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
channel->num_gpfifo_entries = manager->conf.num_gpfifo_entries;
|
||||
channel->gpfifo_entries = uvm_kvmalloc_zero(sizeof(*channel->gpfifo_entries) * channel->num_gpfifo_entries);
|
||||
if (channel->gpfifo_entries == NULL) {
|
||||
@@ -854,12 +943,29 @@ NvU64 uvm_channel_tracking_semaphore_get_gpu_va_in_channel(uvm_channel_t *semaph
|
||||
return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, uvm_channel_is_proxy(access_channel));
|
||||
}
|
||||
|
||||
static NV_STATUS init_channel(uvm_channel_t *channel)
|
||||
static NV_STATUS channel_init(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_push_t push;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
NV_STATUS status = uvm_push_begin_on_channel(channel, &push, "Init channel");
|
||||
NV_STATUS status;
|
||||
NvU32 num_entries = 1;
|
||||
|
||||
if (uvm_gpu_has_pushbuffer_segments(gpu))
|
||||
num_entries++;
|
||||
|
||||
status = uvm_channel_reserve(channel, num_entries);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
if (uvm_gpu_has_pushbuffer_segments(gpu)) {
|
||||
NvU64 gpfifo_entry;
|
||||
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
|
||||
gpu->parent->host_hal->set_gpfifo_pushbuffer_segment_base(&gpfifo_entry,
|
||||
uvm_pushbuffer_get_gpu_va_base(pushbuffer));
|
||||
write_ctrl_gpfifo(channel, gpfifo_entry);
|
||||
}
|
||||
|
||||
status = uvm_push_begin_on_reserved_channel(channel, &push, "Init channel");
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to begin push on channel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
return status;
|
||||
@@ -868,10 +974,6 @@ static NV_STATUS init_channel(uvm_channel_t *channel)
|
||||
if (uvm_channel_is_ce(channel))
|
||||
gpu->parent->ce_hal->init(&push);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
gpu->parent->host_hal->init(&push);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
@@ -925,9 +1027,10 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
|
||||
pool->engine_index = engine_index;
|
||||
pool->pool_type = pool_type;
|
||||
|
||||
uvm_spin_lock_init(&pool->lock, UVM_LOCK_ORDER_CHANNEL);
|
||||
channel_pool_lock_init(pool);
|
||||
|
||||
num_channels = channel_pool_type_num_channels(pool_type);
|
||||
UVM_ASSERT(num_channels <= UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
|
||||
pool->channels = uvm_kvmalloc_zero(sizeof(*pool->channels) * num_channels);
|
||||
if (!pool->channels)
|
||||
@@ -942,7 +1045,7 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
status = init_channel(channel);
|
||||
status = channel_init(channel);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
}
|
||||
@@ -1322,15 +1425,55 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
manager->conf.gpput_loc = string_to_buffer_location(gpput_loc_value);
|
||||
}
|
||||
|
||||
// A pool is created for each usable CE, even if it has not been selected as the
|
||||
// preferred CE for any type, because as more information is discovered (for
|
||||
// example, a pair of peer GPUs is added) we may start using the previously idle
|
||||
// channels.
|
||||
// Returns the maximum number of pools that are needed in the current
|
||||
// configuration. The implementation may choose to create a smaller number of
|
||||
// pools.
|
||||
static unsigned channel_manager_get_max_pools(uvm_channel_manager_t *manager)
|
||||
{
|
||||
unsigned num_channel_pools;
|
||||
unsigned num_used_ce = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
// Create one CE channel pool per usable CE
|
||||
num_channel_pools = num_used_ce;
|
||||
|
||||
// CE proxy channel pool.
|
||||
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu))
|
||||
num_channel_pools++;
|
||||
|
||||
return num_channel_pools;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_manager_create_ce_pools(uvm_channel_manager_t *manager, unsigned *preferred_ce)
|
||||
{
|
||||
unsigned ce;
|
||||
|
||||
// A pool is created for each usable CE, even if it has not been selected as
|
||||
// the preferred CE for any type, because as more information is discovered
|
||||
// (for example, a pair of peer GPUs is added) we may start using the
|
||||
// previously idle pools.
|
||||
for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
|
||||
NV_STATUS status;
|
||||
unsigned type;
|
||||
uvm_channel_pool_t *pool = NULL;
|
||||
|
||||
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_CE, ce, &pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
for (type = 0; type < UVM_CHANNEL_TYPE_CE_COUNT; type++) {
|
||||
if (preferred_ce[type] == ce)
|
||||
manager->pool_to_use.default_for_type[type] = pool;
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
|
||||
{
|
||||
NV_STATUS status;
|
||||
unsigned ce, type;
|
||||
unsigned num_channel_pools;
|
||||
uvm_channel_type_t type;
|
||||
unsigned max_channel_pools;
|
||||
unsigned preferred_ce[UVM_CHANNEL_TYPE_CE_COUNT];
|
||||
uvm_channel_pool_t *pool = NULL;
|
||||
|
||||
@@ -1341,42 +1484,21 @@ static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// CE channel pools
|
||||
num_channel_pools = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
max_channel_pools = channel_manager_get_max_pools(manager);
|
||||
|
||||
// CE proxy channel pool.
|
||||
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu))
|
||||
num_channel_pools++;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
manager->channel_pools = uvm_kvmalloc_zero(sizeof(*manager->channel_pools) * num_channel_pools);
|
||||
manager->channel_pools = uvm_kvmalloc_zero(sizeof(*manager->channel_pools) * max_channel_pools);
|
||||
if (!manager->channel_pools)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
|
||||
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_CE, ce, &pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
// Assign channel types to pools
|
||||
for (type = 0; type < ARRAY_SIZE(preferred_ce); type++) {
|
||||
unsigned ce = preferred_ce[type];
|
||||
|
||||
UVM_ASSERT(test_bit(ce, manager->ce_mask));
|
||||
|
||||
manager->pool_to_use.default_for_type[type] = channel_manager_ce_pool(manager, ce);
|
||||
}
|
||||
status = channel_manager_create_ce_pools(manager, preferred_ce);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// In SR-IOV heavy, add an additional, single-channel, pool that is
|
||||
// dedicated to the MEMOPS type.
|
||||
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu)) {
|
||||
uvm_channel_type_t channel_type = uvm_channel_proxy_channel_type();
|
||||
|
||||
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_CE_PROXY, preferred_ce[channel_type], &pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@@ -1384,17 +1506,6 @@ static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
|
||||
manager->pool_to_use.default_for_type[channel_type] = pool;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -1447,11 +1558,11 @@ void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager)
|
||||
if (channel_manager == NULL)
|
||||
return;
|
||||
|
||||
uvm_procfs_destroy_entry(channel_manager->procfs.pending_pushes);
|
||||
proc_remove(channel_manager->procfs.pending_pushes);
|
||||
|
||||
channel_manager_destroy_pools(channel_manager);
|
||||
|
||||
uvm_procfs_destroy_entry(channel_manager->procfs.channels_dir);
|
||||
proc_remove(channel_manager->procfs.channels_dir);
|
||||
|
||||
uvm_pushbuffer_destroy(channel_manager->pushbuffer);
|
||||
|
||||
@@ -1520,39 +1631,25 @@ uvm_channel_t *uvm_channel_any_of_type(uvm_channel_manager_t *manager, NvU32 poo
|
||||
|
||||
const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type)
|
||||
{
|
||||
|
||||
|
||||
|
||||
BUILD_BUG_ON(UVM_CHANNEL_TYPE_COUNT != 5);
|
||||
|
||||
|
||||
switch (channel_type) {
|
||||
UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_CPU_TO_GPU);
|
||||
UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_GPU_TO_CPU);
|
||||
UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_GPU_INTERNAL);
|
||||
UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_MEMOPS);
|
||||
UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_GPU_TO_GPU);
|
||||
|
||||
|
||||
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
}
|
||||
|
||||
const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type)
|
||||
{
|
||||
|
||||
|
||||
|
||||
BUILD_BUG_ON(UVM_CHANNEL_POOL_TYPE_COUNT != 2);
|
||||
|
||||
|
||||
switch (channel_pool_type) {
|
||||
UVM_ENUM_STRING_CASE(UVM_CHANNEL_POOL_TYPE_CE);
|
||||
UVM_ENUM_STRING_CASE(UVM_CHANNEL_POOL_TYPE_CE_PROXY);
|
||||
|
||||
|
||||
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
}
|
||||
@@ -1562,7 +1659,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
|
||||
uvm_channel_manager_t *manager = channel->pool->manager;
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Channel %s\n", channel->name);
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "completed %llu\n", uvm_channel_update_completed_value(channel));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "queued %llu\n", channel->tracking_sem.queued_value);
|
||||
@@ -1574,7 +1671,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore GPU VA 0x%llx\n", uvm_channel_tracking_semaphore_get_gpu_va(channel));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore CPU VA 0x%llx\n", (NvU64)(uintptr_t)channel->tracking_sem.semaphore.payload);
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
channel_pool_unlock(channel->pool);
|
||||
}
|
||||
|
||||
static void channel_print_push_acquires(uvm_push_acquire_info_t *push_acquire_info, struct seq_file *seq)
|
||||
@@ -1618,7 +1715,7 @@ static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_c
|
||||
|
||||
NvU64 completed_value = uvm_channel_update_completed_value(channel);
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
cpu_put = channel->cpu_put;
|
||||
|
||||
@@ -1630,28 +1727,43 @@ static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_c
|
||||
if (entry->tracking_semaphore_value + finished_pushes_count <= completed_value)
|
||||
continue;
|
||||
|
||||
// Obtain the value acquire tracking information from the push_info index
|
||||
if (uvm_push_info_is_tracking_acquires()) {
|
||||
NvU32 push_info_index = push_info - channel->push_infos;
|
||||
UVM_ASSERT(push_info_index < channel->num_gpfifo_entries);
|
||||
if (entry->type == UVM_GPFIFO_ENTRY_TYPE_CONTROL) {
|
||||
NvU64 *gpfifo_entry;
|
||||
|
||||
push_acquire_info = &channel->push_acquire_infos[push_info_index];
|
||||
UVM_ASSERT(!uvm_channel_is_proxy(channel));
|
||||
|
||||
gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + gpu_get;
|
||||
UVM_SEQ_OR_DBG_PRINT(seq,
|
||||
" control GPFIFO entry - data: 0x%llx, gpu_get: %d\n",
|
||||
*gpfifo_entry,
|
||||
gpu_get);
|
||||
}
|
||||
else {
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(seq,
|
||||
" %s push '%s' started at %s:%d in %s() releasing value %llu%s",
|
||||
entry->tracking_semaphore_value <= completed_value ? "finished" : "pending",
|
||||
push_info->description,
|
||||
push_info->filename,
|
||||
push_info->line,
|
||||
push_info->function,
|
||||
entry->tracking_semaphore_value,
|
||||
!push_acquire_info || push_acquire_info->num_values == 0? "\n" : "");
|
||||
// Obtain the value acquire tracking information from the push_info
|
||||
// index
|
||||
if (uvm_push_info_is_tracking_acquires()) {
|
||||
NvU32 push_info_index = push_info - channel->push_infos;
|
||||
UVM_ASSERT(push_info_index < channel->num_gpfifo_entries);
|
||||
|
||||
if (push_acquire_info)
|
||||
channel_print_push_acquires(push_acquire_info, seq);
|
||||
push_acquire_info = &channel->push_acquire_infos[push_info_index];
|
||||
}
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(seq,
|
||||
" %s push '%s' started at %s:%d in %s() releasing value %llu%s",
|
||||
entry->tracking_semaphore_value <= completed_value ? "finished" : "pending",
|
||||
push_info->description,
|
||||
push_info->filename,
|
||||
push_info->line,
|
||||
push_info->function,
|
||||
entry->tracking_semaphore_value,
|
||||
!push_acquire_info || push_acquire_info->num_values == 0 ? "\n" : "");
|
||||
|
||||
if (push_acquire_info)
|
||||
channel_print_push_acquires(push_acquire_info, seq);
|
||||
}
|
||||
}
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
channel_pool_unlock(channel->pool);
|
||||
}
|
||||
|
||||
void uvm_channel_print_pending_pushes(uvm_channel_t *channel)
|
||||
@@ -1694,7 +1806,7 @@ static int nv_procfs_read_manager_pending_pushes(struct seq_file *s, void *v)
|
||||
uvm_channel_manager_t *manager = (uvm_channel_manager_t *)s->private;
|
||||
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
return -EAGAIN;
|
||||
|
||||
channel_manager_print_pending_pushes(manager, s);
|
||||
|
||||
@@ -1733,7 +1845,7 @@ static int nv_procfs_read_channel_info(struct seq_file *s, void *v)
|
||||
uvm_channel_t *channel = (uvm_channel_t *)s->private;
|
||||
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
return -EAGAIN;
|
||||
|
||||
uvm_channel_print_info(channel, s);
|
||||
|
||||
@@ -1754,7 +1866,7 @@ static int nv_procfs_read_channel_pushes(struct seq_file *s, void *v)
|
||||
uvm_channel_t *channel = (uvm_channel_t *)s->private;
|
||||
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
return -EAGAIN;
|
||||
|
||||
// Include up to 5 finished pushes for some context
|
||||
channel_print_pushes(channel, 5, s);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -50,6 +50,9 @@
|
||||
#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MIN 32
|
||||
#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX (1024 * 1024)
|
||||
|
||||
// Maximum number of channels per pool.
|
||||
#define UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL 8
|
||||
|
||||
// Semaphore payloads cannot advance too much between calls to
|
||||
// uvm_gpu_tracking_semaphore_update_completed_value(). In practice the jumps
|
||||
// are bound by gpfifo sizing as we have to update the completed value to
|
||||
@@ -61,6 +64,14 @@
|
||||
// uvm_channel.h includes uvm_gpu_semaphore.h.
|
||||
#define UVM_GPU_SEMAPHORE_MAX_JUMP (2 * UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX)
|
||||
|
||||
#define uvm_channel_pool_assert_locked(pool) ( \
|
||||
{ \
|
||||
if (uvm_channel_pool_is_proxy(pool)) \
|
||||
uvm_assert_mutex_locked(&(pool)->mutex); \
|
||||
else \
|
||||
uvm_assert_spinlock_locked(&(pool)->spinlock); \
|
||||
})
|
||||
|
||||
// Channel types
|
||||
typedef enum
|
||||
{
|
||||
@@ -83,18 +94,7 @@ typedef enum
|
||||
|
||||
// ^^^^^^
|
||||
// Channel types backed by a CE.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
UVM_CHANNEL_TYPE_COUNT = UVM_CHANNEL_TYPE_CE_COUNT,
|
||||
|
||||
} uvm_channel_type_t;
|
||||
|
||||
typedef enum
|
||||
@@ -112,34 +112,43 @@ typedef enum
|
||||
// There is a single proxy pool and channel per GPU.
|
||||
UVM_CHANNEL_POOL_TYPE_CE_PROXY = (1 << 1),
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
UVM_CHANNEL_POOL_TYPE_COUNT = 2,
|
||||
|
||||
|
||||
// A mask used to select pools of any type.
|
||||
UVM_CHANNEL_POOL_TYPE_MASK = ((1U << UVM_CHANNEL_POOL_TYPE_COUNT) - 1)
|
||||
} uvm_channel_pool_type_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
// Push-based GPFIFO entry
|
||||
UVM_GPFIFO_ENTRY_TYPE_NORMAL,
|
||||
|
||||
// Control GPFIFO entry, i.e., the LENGTH field is zero, not associated with
|
||||
// a push.
|
||||
UVM_GPFIFO_ENTRY_TYPE_CONTROL
|
||||
} uvm_gpfifo_entry_type_t;
|
||||
|
||||
struct uvm_gpfifo_entry_struct
|
||||
{
|
||||
// Offset of the pushbuffer in the pushbuffer allocation used by this entry
|
||||
uvm_gpfifo_entry_type_t type;
|
||||
|
||||
// Channel tracking semaphore value that indicates completion of
|
||||
// this entry.
|
||||
NvU64 tracking_semaphore_value;
|
||||
|
||||
// The following fields are only valid when type is
|
||||
// UVM_GPFIFO_ENTRY_TYPE_NORMAL.
|
||||
|
||||
// Offset of the pushbuffer in the pushbuffer allocation used by
|
||||
// this entry.
|
||||
NvU32 pushbuffer_offset;
|
||||
|
||||
// Size of the pushbuffer used for this entry
|
||||
// Size of the pushbuffer used for this entry.
|
||||
NvU32 pushbuffer_size;
|
||||
|
||||
// List node used by the pushbuffer tracking
|
||||
struct list_head pending_list_node;
|
||||
|
||||
// Channel tracking semaphore value that indicates completion of this entry
|
||||
NvU64 tracking_semaphore_value;
|
||||
|
||||
// Push info for the pending push that used this GPFIFO entry
|
||||
uvm_push_info_t *push_info;
|
||||
};
|
||||
@@ -164,8 +173,25 @@ typedef struct
|
||||
// Pool type: Refer to the uvm_channel_pool_type_t enum.
|
||||
uvm_channel_pool_type_t pool_type;
|
||||
|
||||
// Lock protecting the state of channels in the pool
|
||||
uvm_spinlock_t lock;
|
||||
// Lock protecting the state of channels in the pool.
|
||||
//
|
||||
// There are two pool lock types available: spinlock and mutex. The mutex
|
||||
// variant is required when the thread holding the pool lock must
|
||||
// sleep (ex: acquire another mutex) deeper in the call stack, either in UVM
|
||||
// or RM. For example, work submission to proxy channels in SR-IOV heavy
|
||||
// entails calling an RM API that acquires a mutex, so the proxy channel
|
||||
// pool must use the mutex variant.
|
||||
//
|
||||
// Unless the mutex is required, the spinlock is preferred. This is because,
|
||||
// other than for proxy channels, work submission takes little time and does
|
||||
// not involve any RM calls, so UVM can avoid any invocation that may result
|
||||
// on a sleep. All non-proxy channel pools use the spinlock variant, even in
|
||||
// SR-IOV heavy.
|
||||
union {
|
||||
uvm_spinlock_t spinlock;
|
||||
uvm_mutex_t mutex;
|
||||
};
|
||||
|
||||
} uvm_channel_pool_t;
|
||||
|
||||
struct uvm_channel_struct
|
||||
@@ -193,10 +219,10 @@ struct uvm_channel_struct
|
||||
// for completion.
|
||||
NvU32 gpu_get;
|
||||
|
||||
// Number of currently on-going pushes on this channel
|
||||
// A new push is only allowed to begin on the channel if there is a free
|
||||
// GPFIFO entry for it.
|
||||
NvU32 current_pushes_count;
|
||||
// Number of currently on-going gpfifo entries on this channel
|
||||
// A new push or control GPFIFO is only allowed to begin on the channel if
|
||||
// there is a free GPFIFO entry for it.
|
||||
NvU32 current_gpfifo_count;
|
||||
|
||||
// Array of uvm_push_info_t for all pending pushes on the channel
|
||||
uvm_push_info_t *push_infos;
|
||||
@@ -211,30 +237,10 @@ struct uvm_channel_struct
|
||||
// been marked as completed.
|
||||
struct list_head available_push_infos;
|
||||
|
||||
// GPU tracking semaphore tracking the work in the channel
|
||||
// GPU tracking semaphore tracking the work in the channel.
|
||||
// Each push on the channel increments the semaphore, see
|
||||
// uvm_channel_end_push().
|
||||
uvm_gpu_tracking_semaphore_t tracking_sem;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// RM channel information
|
||||
union
|
||||
@@ -293,7 +299,7 @@ struct uvm_channel_manager_struct
|
||||
unsigned num_channel_pools;
|
||||
|
||||
// Mask containing the indexes of the usable Copy Engines. Each usable CE
|
||||
// has a pool associated with it, see channel_manager_ce_pool
|
||||
// has at least one pool associated with it.
|
||||
DECLARE_BITMAP(ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
struct
|
||||
@@ -331,26 +337,30 @@ struct uvm_channel_manager_struct
|
||||
// Create a channel manager for the GPU
|
||||
NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
|
||||
|
||||
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
|
||||
return pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_proxy(uvm_channel_t *channel)
|
||||
{
|
||||
UVM_ASSERT(channel->pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
return channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
|
||||
return uvm_channel_pool_is_proxy(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
|
||||
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_pool_is_proxy(pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_ce(uvm_channel_t *channel)
|
||||
{
|
||||
UVM_ASSERT(channel->pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
return (channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_is_proxy(channel);
|
||||
return uvm_channel_pool_is_ce(channel->pool);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Proxy channels are used to push page tree related methods, so their channel
|
||||
// type is UVM_CHANNEL_TYPE_MEMOPS.
|
||||
static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
|
||||
@@ -437,8 +447,8 @@ NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *channel_manager,
|
||||
uvm_gpu_t *dst_gpu,
|
||||
uvm_channel_t **channel_out);
|
||||
|
||||
// Reserve a specific channel for a push
|
||||
NV_STATUS uvm_channel_reserve(uvm_channel_t *channel);
|
||||
// Reserve a specific channel for a push or for a control GPFIFO entry.
|
||||
NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries);
|
||||
|
||||
// Set optimal CE for P2P transfers between manager->gpu and peer
|
||||
void uvm_channel_manager_set_p2p_ce(uvm_channel_manager_t *manager, uvm_gpu_t *peer, NvU32 optimal_ce);
|
||||
@@ -451,9 +461,24 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push);
|
||||
// Should be used by uvm_push_end() only.
|
||||
void uvm_channel_end_push(uvm_push_t *push);
|
||||
|
||||
// Write/send a control GPFIFO to channel. This is not supported by proxy
|
||||
// channels.
|
||||
// Ordering guarantees:
|
||||
// Input: Control GPFIFO entries are guaranteed to be processed by ESCHED after
|
||||
// all prior GPFIFO entries and pushbuffers have been fetched, but not
|
||||
// necessarily completed.
|
||||
// Output ordering: A caller can wait for this control entry to complete with
|
||||
// uvm_channel_manager_wait(), or by waiting for any later push in the same
|
||||
// channel to complete.
|
||||
NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_value);
|
||||
|
||||
const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type);
|
||||
const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type);
|
||||
|
||||
// Returns the number of available GPFIFO entries. The function internally
|
||||
// acquires the channel pool lock.
|
||||
NvU32 uvm_channel_get_available_gpfifo_entries(uvm_channel_t *channel);
|
||||
|
||||
void uvm_channel_print_pending_pushes(uvm_channel_t *channel);
|
||||
|
||||
static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -60,7 +60,7 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
TEST_CHECK_RET(gpu != NULL);
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_all(gpu, UVM_RM_MEM_TYPE_SYS, buffer_size, &mem);
|
||||
status = uvm_rm_mem_alloc_and_map_all(gpu, UVM_RM_MEM_TYPE_SYS, buffer_size, 0, &mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
host_mem = (NvU32*)uvm_rm_mem_get_cpu_va(mem);
|
||||
@@ -153,7 +153,6 @@ done:
|
||||
|
||||
static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
@@ -168,11 +167,12 @@ static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
|
||||
completed_value = uvm_channel_update_completed_value(channel);
|
||||
uvm_gpu_semaphore_set_payload(&channel->tracking_sem.semaphore, (NvU32)completed_value + 1);
|
||||
|
||||
TEST_CHECK_RET(uvm_global_get_status() == NV_OK);
|
||||
TEST_NV_CHECK_RET(uvm_global_get_status());
|
||||
uvm_channel_update_progress_all(channel);
|
||||
TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE);
|
||||
|
||||
uvm_channel_manager_destroy(gpu->channel_manager);
|
||||
|
||||
// Destruction will hit the error again, so clear one more time.
|
||||
uvm_global_reset_fatal_error();
|
||||
|
||||
@@ -306,7 +306,6 @@ static NV_STATUS test_rc(uvm_va_space_t *va_space)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uvm_push_t push;
|
||||
@@ -481,12 +480,14 @@ static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
|
||||
status = uvm_rm_mem_alloc_and_map_all(gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
MAX_COUNTER_REPEAT_COUNT * sizeof(NvU32),
|
||||
0,
|
||||
&stream->counter_mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_all(gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
TEST_SNAPSHOT_SIZE(iterations_per_stream),
|
||||
0,
|
||||
&stream->counter_snapshots_mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
@@ -495,6 +496,7 @@ static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
|
||||
status = uvm_rm_mem_alloc_and_map_all(gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
TEST_SNAPSHOT_SIZE(iterations_per_stream),
|
||||
0,
|
||||
&stream->other_stream_counter_snapshots_mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
@@ -565,6 +567,7 @@ static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
|
||||
stream->counter_snapshots_mem,
|
||||
i,
|
||||
stream->queued_counter_repeat);
|
||||
|
||||
// Set a random number [2, MAX_COUNTER_REPEAT_COUNT] of counters
|
||||
stream->queued_counter_repeat = uvm_test_rng_range_32(&rng, 2, MAX_COUNTER_REPEAT_COUNT);
|
||||
set_counter(&stream->push,
|
||||
@@ -669,61 +672,206 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_manager_t *manager = gpu->channel_manager;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
|
||||
if (uvm_channel_is_proxy(channel))
|
||||
continue;
|
||||
|
||||
// We submit 8x the channel's GPFIFO entries to force a few
|
||||
// complete loops in the GPFIFO circular buffer.
|
||||
for (i = 0; i < 8 * channel->num_gpfifo_entries; i++) {
|
||||
NvU64 entry;
|
||||
gpu->parent->host_hal->set_gpfifo_noop(&entry);
|
||||
TEST_NV_CHECK_RET(uvm_channel_write_ctrl_gpfifo(channel, entry));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_manager_t *manager = gpu->channel_manager;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
uvm_push_t push;
|
||||
|
||||
if (uvm_channel_is_proxy(channel))
|
||||
continue;
|
||||
|
||||
// We submit 8x the channel's GPFIFO entries to force a few
|
||||
// complete loops in the GPFIFO circular buffer.
|
||||
for (i = 0; i < 8 * channel->num_gpfifo_entries; i++) {
|
||||
if (i % 2 == 0) {
|
||||
NvU64 entry;
|
||||
gpu->parent->host_hal->set_gpfifo_noop(&entry);
|
||||
TEST_NV_CHECK_RET(uvm_channel_write_ctrl_gpfifo(channel, entry));
|
||||
}
|
||||
else {
|
||||
TEST_NV_CHECK_RET(uvm_push_begin_on_channel(channel, &push, "gpfifo ctrl and push test"));
|
||||
uvm_push_end(&push);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_push_wait(&push));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_channel_t *channel;
|
||||
uvm_rm_mem_t *mem;
|
||||
NvU32 *cpu_ptr;
|
||||
NvU64 gpu_va;
|
||||
NvU32 i;
|
||||
NvU64 entry;
|
||||
uvm_push_t push;
|
||||
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_manager_t *manager = gpu->channel_manager;
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(*cpu_ptr), 0, &mem));
|
||||
cpu_ptr = uvm_rm_mem_get_cpu_va(mem);
|
||||
gpu_va = uvm_rm_mem_get_gpu_uvm_va(mem, gpu);
|
||||
|
||||
*cpu_ptr = 0;
|
||||
|
||||
// This semaphore acquire takes 1 GPFIFO entries.
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin(manager, UVM_CHANNEL_TYPE_GPU_TO_GPU, &push, "gpfifo ctrl tight test acq"),
|
||||
error);
|
||||
|
||||
channel = push.channel;
|
||||
UVM_ASSERT(!uvm_channel_is_proxy(channel));
|
||||
|
||||
gpu->parent->host_hal->semaphore_acquire(&push, gpu_va, 1);
|
||||
uvm_push_end(&push);
|
||||
|
||||
// Flush all completed entries from the GPFIFO ring buffer. This test
|
||||
// requires this flush because we verify (below with
|
||||
// uvm_channel_get_available_gpfifo_entries) the number of free entries
|
||||
// in the channel.
|
||||
uvm_channel_update_progress_all(channel);
|
||||
|
||||
// Populate the remaining GPFIFO entries, leaving 2 slots available.
|
||||
// 2 available entries + 1 semaphore acquire (above) + 1 spare entry to
|
||||
// indicate a terminal condition for the GPFIFO ringbuffer, therefore we
|
||||
// push num_gpfifo_entries-4.
|
||||
for (i = 0; i < channel->num_gpfifo_entries - 4; i++) {
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin_on_channel(channel, &push, "gpfifo ctrl tight test populate"), error);
|
||||
uvm_push_end(&push);
|
||||
}
|
||||
|
||||
TEST_CHECK_GOTO(uvm_channel_get_available_gpfifo_entries(channel) == 2, error);
|
||||
|
||||
// We should have room for the control GPFIFO and the subsequent
|
||||
// semaphore release.
|
||||
gpu->parent->host_hal->set_gpfifo_noop(&entry);
|
||||
TEST_NV_CHECK_GOTO(uvm_channel_write_ctrl_gpfifo(channel, entry), error);
|
||||
|
||||
// Release the semaphore.
|
||||
UVM_WRITE_ONCE(*cpu_ptr, 1);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_wait(&push), error);
|
||||
|
||||
uvm_rm_mem_free(mem);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
||||
error:
|
||||
uvm_rm_mem_free(mem);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// This test is inspired by the test_rc (above).
|
||||
// The test recreates the GPU's channel manager forcing its pushbuffer to be
|
||||
// mapped on a non-zero 1TB segment. This exercises work submission from
|
||||
// pushbuffers whose VAs are greater than 1TB.
|
||||
static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_manager_t *manager;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
if (!uvm_gpu_has_pushbuffer_segments(gpu))
|
||||
continue;
|
||||
|
||||
// The GPU channel manager pushbuffer is destroyed and then re-created
|
||||
// after testing a non-zero pushbuffer extension base, so this test
|
||||
// requires exclusive access to the GPU.
|
||||
TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
|
||||
|
||||
gpu->uvm_test_force_upper_pushbuffer_segment = 1;
|
||||
uvm_channel_manager_destroy(gpu->channel_manager);
|
||||
TEST_NV_CHECK_GOTO(uvm_channel_manager_create(gpu, &gpu->channel_manager), error);
|
||||
gpu->uvm_test_force_upper_pushbuffer_segment = 0;
|
||||
|
||||
manager = gpu->channel_manager;
|
||||
TEST_CHECK_GOTO(uvm_pushbuffer_get_gpu_va_base(manager->pushbuffer) >= (1ull << 40), error);
|
||||
|
||||
// Submit a few pushes with the recently allocated
|
||||
// channel_manager->pushbuffer.
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
uvm_push_t push;
|
||||
|
||||
for (i = 0; i < channel->num_gpfifo_entries; i++) {
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin_on_channel(channel, &push, "pushbuffer extension push test"),
|
||||
error);
|
||||
uvm_push_end(&push);
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_wait(&push), error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
||||
error:
|
||||
gpu->uvm_test_force_upper_pushbuffer_segment = 0;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct file *filp)
|
||||
{
|
||||
@@ -737,11 +885,23 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
status = test_write_ctrl_gpfifo_noop(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
status = test_write_ctrl_gpfifo_and_pushes(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
status = test_write_ctrl_gpfifo_tight(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
|
||||
|
||||
// The following tests have side effects, they reset the GPU's
|
||||
// channel_manager.
|
||||
status = test_channel_pushbuffer_extension_base(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
g_uvm_global.disable_fatal_error_assert = true;
|
||||
uvm_release_asserts_set_global_error_for_tests = true;
|
||||
@@ -767,7 +927,7 @@ done:
|
||||
static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
|
||||
const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (params->iterations == 0 || params->num_streams == 0)
|
||||
return NV_ERR_INVALID_PARAMETER;
|
||||
@@ -782,10 +942,7 @@ static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
|
||||
params->iterations,
|
||||
params->seed,
|
||||
params->verbose);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
done:
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ static int uvm_debug_prints = UVM_IS_DEBUG() || UVM_IS_DEVELOP();
|
||||
module_param(uvm_debug_prints, int, S_IRUGO|S_IWUSR);
|
||||
MODULE_PARM_DESC(uvm_debug_prints, "Enable uvm debug prints.");
|
||||
|
||||
bool uvm_debug_prints_enabled()
|
||||
bool uvm_debug_prints_enabled(void)
|
||||
{
|
||||
return uvm_debug_prints != 0;
|
||||
}
|
||||
|
||||
@@ -347,6 +347,21 @@ typedef struct
|
||||
NvHandle user_object;
|
||||
} uvm_rm_user_object_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING,
|
||||
UVM_FD_VA_SPACE,
|
||||
UVM_FD_COUNT
|
||||
} uvm_fd_type_t;
|
||||
|
||||
// This should be large enough to fit the valid values from uvm_fd_type_t above.
|
||||
// Note we can't use order_base_2(UVM_FD_COUNT) to define this because our code
|
||||
// coverage tool fails due when the preprocessor expands that to a huge mess of
|
||||
// ternary operators.
|
||||
#define UVM_FD_TYPE_BITS 2
|
||||
#define UVM_FD_TYPE_MASK ((1UL << UVM_FD_TYPE_BITS) - 1)
|
||||
|
||||
// Macro used to compare two values for types that support less than operator.
|
||||
// It returns -1 if a < b, 1 if a > b and 0 if a == 0
|
||||
#define UVM_CMP_DEFAULT(a,b) \
|
||||
@@ -369,6 +384,10 @@ typedef struct
|
||||
// file. A NULL input returns false.
|
||||
bool uvm_file_is_nvidia_uvm(struct file *filp);
|
||||
|
||||
// Returns the type of data filp->private_data contains to and if ptr_val !=
|
||||
// NULL returns the value of the pointer.
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val);
|
||||
|
||||
// Reads the first word in the supplied struct page.
|
||||
static inline void uvm_touch_page(struct page *page)
|
||||
{
|
||||
|
||||
@@ -28,6 +28,8 @@ typedef struct uvm_global_struct uvm_global_t;
|
||||
|
||||
typedef struct uvm_gpu_struct uvm_gpu_t;
|
||||
typedef struct uvm_parent_gpu_struct uvm_parent_gpu_t;
|
||||
typedef struct uvm_gpu_chunk_struct uvm_gpu_chunk_t;
|
||||
typedef struct uvm_cpu_chunk_struct uvm_cpu_chunk_t;
|
||||
typedef struct uvm_rm_mem_struct uvm_rm_mem_t;
|
||||
typedef struct uvm_mem_struct uvm_mem_t;
|
||||
typedef struct uvm_host_hal_struct uvm_host_hal_t;
|
||||
@@ -35,9 +37,6 @@ typedef struct uvm_ce_hal_struct uvm_ce_hal_t;
|
||||
typedef struct uvm_arch_hal_struct uvm_arch_hal_t;
|
||||
typedef struct uvm_fault_buffer_hal_struct uvm_fault_buffer_hal_t;
|
||||
typedef struct uvm_access_counter_buffer_hal_struct uvm_access_counter_buffer_hal_t;
|
||||
|
||||
|
||||
|
||||
typedef struct uvm_gpu_semaphore_struct uvm_gpu_semaphore_t;
|
||||
typedef struct uvm_gpu_tracking_semaphore_struct uvm_gpu_tracking_semaphore_t;
|
||||
typedef struct uvm_gpu_semaphore_pool_struct uvm_gpu_semaphore_pool_t;
|
||||
@@ -59,6 +58,7 @@ typedef struct uvm_va_range_struct uvm_va_range_t;
|
||||
typedef struct uvm_va_block_struct uvm_va_block_t;
|
||||
typedef struct uvm_va_block_test_struct uvm_va_block_test_t;
|
||||
typedef struct uvm_va_block_wrapper_struct uvm_va_block_wrapper_t;
|
||||
typedef struct uvm_va_block_retry_struct uvm_va_block_retry_t;
|
||||
typedef struct uvm_va_space_struct uvm_va_space_t;
|
||||
typedef struct uvm_va_space_mm_struct uvm_va_space_mm_t;
|
||||
|
||||
|
||||
@@ -71,12 +71,10 @@ static void uvm_unregister_callbacks(void)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static void sev_init(const UvmPlatformInfo *platform_info)
|
||||
{
|
||||
g_uvm_global.sev_enabled = platform_info->sevEnabled;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_global_init(void)
|
||||
{
|
||||
@@ -127,9 +125,7 @@ NV_STATUS uvm_global_init(void)
|
||||
uvm_ats_init(&platform_info);
|
||||
g_uvm_global.num_simulated_devices = 0;
|
||||
|
||||
|
||||
|
||||
|
||||
sev_init(&platform_info);
|
||||
|
||||
status = uvm_gpu_init();
|
||||
if (status != NV_OK) {
|
||||
|
||||
@@ -143,12 +143,11 @@ struct uvm_global_struct
|
||||
struct page *page;
|
||||
} unload_state;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// AMD Secure Encrypted Virtualization (SEV) status. True if VM has SEV
|
||||
// enabled. This field is set once during global initialization
|
||||
// (uvm_global_init), and can be read afterwards without acquiring any
|
||||
// locks.
|
||||
bool sev_enabled;
|
||||
};
|
||||
|
||||
// Initialize global uvm state
|
||||
@@ -187,12 +186,21 @@ static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
|
||||
|
||||
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index]);
|
||||
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == parent_gpu);
|
||||
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == NULL || g_uvm_global.parent_gpus[gpu_index] == parent_gpu);
|
||||
|
||||
g_uvm_global.parent_gpus[gpu_index] = NULL;
|
||||
}
|
||||
|
||||
// Get a parent gpu by its id.
|
||||
// Returns a pointer to the parent GPU object, or NULL if not found.
|
||||
//
|
||||
// LOCKING: requires that you hold the gpu_table_lock, the global lock, or have
|
||||
// retained at least one of the child GPUs.
|
||||
static uvm_parent_gpu_t *uvm_parent_gpu_get(uvm_gpu_id_t id)
|
||||
{
|
||||
return g_uvm_global.parent_gpus[uvm_id_gpu_index(id)];
|
||||
}
|
||||
|
||||
// Get a gpu by its global id.
|
||||
// Returns a pointer to the GPU object, or NULL if not found.
|
||||
//
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -42,9 +42,6 @@
|
||||
#include "uvm_ats.h"
|
||||
#include "uvm_test.h"
|
||||
|
||||
|
||||
|
||||
|
||||
#include "uvm_linux.h"
|
||||
|
||||
#define UVM_PROC_GPUS_PEER_DIR_NAME "peers"
|
||||
@@ -95,12 +92,8 @@ static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
|
||||
return UVM_GPU_LINK_NVLINK_2;
|
||||
case UVM_LINK_TYPE_NVLINK_3:
|
||||
return UVM_GPU_LINK_NVLINK_3;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
case UVM_LINK_TYPE_NVLINK_4:
|
||||
return UVM_GPU_LINK_NVLINK_4;
|
||||
default:
|
||||
return UVM_GPU_LINK_INVALID;
|
||||
}
|
||||
@@ -215,27 +208,12 @@ static bool gpu_supports_uvm(uvm_parent_gpu_t *parent_gpu)
|
||||
return parent_gpu->rm_info.subdeviceCount == 1;
|
||||
}
|
||||
|
||||
static bool parent_gpu_uses_canonical_form_address(uvm_parent_gpu_t *parent_gpu)
|
||||
static bool platform_uses_canonical_form_address(void)
|
||||
{
|
||||
NvU64 gpu_addr_shift;
|
||||
NvU64 cpu_addr_shift;
|
||||
|
||||
// PPC64LE doesn't use canonical form addresses.
|
||||
if (NVCPU_IS_PPC64LE)
|
||||
return false;
|
||||
|
||||
// We use big_page_size as UVM_PAGE_SIZE_64K because num_va_bits() is
|
||||
// big_page_size invariant in the MMU HAL.
|
||||
UVM_ASSERT(!parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_128K) ||
|
||||
(parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits() ==
|
||||
parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_128K)->num_va_bits()));
|
||||
|
||||
gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
|
||||
cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
|
||||
|
||||
// Refer to the comments and diagram in uvm_gpu.c:uvm_gpu_can_address().
|
||||
return gpu_addr_shift >= cpu_addr_shift;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
@@ -244,6 +222,9 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
// the canonical address form.
|
||||
NvU64 max_va_lower;
|
||||
NvU64 addr_end = addr + size - 1;
|
||||
NvU8 gpu_addr_shift;
|
||||
NvU8 cpu_addr_shift;
|
||||
NvU8 addr_shift;
|
||||
|
||||
// Watch out for calling this too early in init
|
||||
UVM_ASSERT(gpu->address_space_tree.hal);
|
||||
@@ -251,6 +232,10 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
UVM_ASSERT(addr <= addr_end);
|
||||
UVM_ASSERT(size > 0);
|
||||
|
||||
gpu_addr_shift = gpu->address_space_tree.hal->num_va_bits();
|
||||
cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
|
||||
addr_shift = gpu_addr_shift;
|
||||
|
||||
// Pascal+ GPUs are capable of accessing kernel pointers in various modes
|
||||
// by applying the same upper-bit checks that x86, ARM, and Power
|
||||
// processors do. x86 and ARM use canonical form addresses. For ARM, even
|
||||
@@ -260,13 +245,15 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
// mapped (or addressed) by the GPU/CPU when the CPU uses canonical form.
|
||||
// (C) regions are only accessible by the CPU. Similarly, (G) regions
|
||||
// are only accessible by the GPU. (X) regions are not addressible.
|
||||
// Note that we only consider (V) regions, i.e., address ranges that are
|
||||
// addressable by both, the CPU and GPU.
|
||||
//
|
||||
// GPU MAX VA < CPU MAX VA GPU MAX VA >= CPU MAX VA
|
||||
// 0xF..F +----------------+ 0xF..F +----------------+
|
||||
// |CCCCCCCCCCCCCCCC| |VVVVVVVVVVVVVVVV|
|
||||
// |CCCCCCCCCCCCCCCC| |VVVVVVVVVVVVVVVV|
|
||||
// |CCCCCCCCCCCCCCCC| |VVVVVVVVVVVVVVVV|
|
||||
// |CCCCCCCCCCCCCCCC| CPU MIN UPPER VA|----------------|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// GPU MIN UPPER VA|----------------| CPU MIN UPPER VA|----------------|
|
||||
// |CCCCCCCCCCCCCCCC| |GGGGGGGGGGGGGGGG|
|
||||
// |CCCCCCCCCCCCCCCC| |GGGGGGGGGGGGGGGG|
|
||||
// CPU MIN UPPER VA|----------------| GPU MIN UPPER VA|----------------|
|
||||
@@ -275,32 +262,83 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
// CPU MAX LOWER VA|----------------| GPU MAX LOWER VA|----------------|
|
||||
// |CCCCCCCCCCCCCCCC| |GGGGGGGGGGGGGGGG|
|
||||
// |CCCCCCCCCCCCCCCC| |GGGGGGGGGGGGGGGG|
|
||||
// GPU MAX VA|----------------| CPU MAX LOWER VA|----------------|
|
||||
// GPU MAX LOWER VA|----------------| CPU MAX LOWER VA|----------------|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// 0 +----------------+ 0 +----------------+
|
||||
|
||||
if (parent_gpu_uses_canonical_form_address(gpu->parent)) {
|
||||
NvU64 min_va_upper = (NvU64)((NvS64)(1ULL << 63) >> (64 - gpu->address_space_tree.hal->num_va_bits()));
|
||||
max_va_lower = 1ULL << (gpu->address_space_tree.hal->num_va_bits() - 1);
|
||||
// On canonical form address platforms and Pascal+ GPUs.
|
||||
if (platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
|
||||
NvU64 min_va_upper;
|
||||
|
||||
// On x86, when cpu_addr_shift > gpu_addr_shift, it means the CPU uses
|
||||
// 5-level paging and the GPU is pre-Hopper. On Pascal-Ada GPUs (49b
|
||||
// wide VA) we set addr_shift to match a 4-level paging x86 (48b wide).
|
||||
// See more details on uvm_parent_gpu_canonical_address(..);
|
||||
if (cpu_addr_shift > gpu_addr_shift)
|
||||
addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
|
||||
else if (gpu_addr_shift == 57)
|
||||
addr_shift = gpu_addr_shift;
|
||||
else
|
||||
addr_shift = cpu_addr_shift;
|
||||
|
||||
min_va_upper = (NvU64)((NvS64)(1ULL << 63) >> (64 - addr_shift));
|
||||
max_va_lower = 1ULL << (addr_shift - 1);
|
||||
return (addr_end < max_va_lower) || (addr >= min_va_upper);
|
||||
}
|
||||
else {
|
||||
max_va_lower = 1ULL << gpu->address_space_tree.hal->num_va_bits();
|
||||
max_va_lower = 1ULL << addr_shift;
|
||||
return addr_end < max_va_lower;
|
||||
}
|
||||
}
|
||||
|
||||
// The internal UVM VAS does not use canonical form addresses.
|
||||
bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
{
|
||||
NvU64 addr_end = addr + size - 1;
|
||||
NvU64 max_gpu_va;
|
||||
|
||||
// Watch out for calling this too early in init
|
||||
UVM_ASSERT(gpu->address_space_tree.hal);
|
||||
UVM_ASSERT(gpu->address_space_tree.hal->num_va_bits() < 64);
|
||||
UVM_ASSERT(addr <= addr_end);
|
||||
UVM_ASSERT(size > 0);
|
||||
|
||||
max_gpu_va = 1ULL << gpu->address_space_tree.hal->num_va_bits();
|
||||
return addr_end < max_gpu_va;
|
||||
}
|
||||
|
||||
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)
|
||||
{
|
||||
NvU32 gpu_va_bits;
|
||||
NvU32 shift;
|
||||
NvU8 gpu_addr_shift;
|
||||
NvU8 cpu_addr_shift;
|
||||
NvU8 addr_shift;
|
||||
NvU64 input_addr = addr;
|
||||
|
||||
if (parent_gpu_uses_canonical_form_address(parent_gpu)) {
|
||||
gpu_va_bits = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
|
||||
shift = 64 - gpu_va_bits;
|
||||
addr = (NvU64)((NvS64)(addr << shift) >> shift);
|
||||
if (platform_uses_canonical_form_address()) {
|
||||
// When the CPU VA width is larger than GPU's, it means that:
|
||||
// On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
|
||||
// On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
|
||||
// We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
|
||||
// behavior of CPUs with smaller (than GPU) VA widths.
|
||||
gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
|
||||
cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
|
||||
|
||||
if (cpu_addr_shift > gpu_addr_shift)
|
||||
addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
|
||||
else if (gpu_addr_shift == 57)
|
||||
addr_shift = gpu_addr_shift;
|
||||
else
|
||||
addr_shift = cpu_addr_shift;
|
||||
|
||||
addr = (NvU64)((NvS64)(addr << (64 - addr_shift)) >> (64 - addr_shift));
|
||||
|
||||
// This protection acts on when the address is not covered by the GPU's
|
||||
// OOR_ADDR_CHECK. This can only happen when OOR_ADDR_CHECK is in
|
||||
// permissive (NO_CHECK) mode.
|
||||
if ((addr << (64 - gpu_addr_shift)) != (input_addr << (64 - gpu_addr_shift)))
|
||||
return input_addr;
|
||||
}
|
||||
|
||||
return addr;
|
||||
@@ -356,11 +394,7 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
|
||||
|
||||
static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
{
|
||||
|
||||
|
||||
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 5);
|
||||
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 6);
|
||||
|
||||
switch (link_type) {
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_INVALID);
|
||||
@@ -368,10 +402,7 @@ static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_1);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_2);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_3);
|
||||
|
||||
|
||||
|
||||
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_4);
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
}
|
||||
@@ -519,12 +550,6 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
|
||||
gpu_info_print_ce_caps(gpu, s);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -711,7 +736,7 @@ static NV_STATUS init_parent_procfs_dir(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
static void deinit_parent_procfs_dir(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(parent_gpu->procfs.dir);
|
||||
proc_remove(parent_gpu->procfs.dir);
|
||||
}
|
||||
|
||||
static NV_STATUS init_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
|
||||
@@ -739,8 +764,8 @@ static NV_STATUS init_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
static void deinit_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(parent_gpu->procfs.access_counters_file);
|
||||
uvm_procfs_destroy_entry(parent_gpu->procfs.fault_stats_file);
|
||||
proc_remove(parent_gpu->procfs.access_counters_file);
|
||||
proc_remove(parent_gpu->procfs.fault_stats_file);
|
||||
}
|
||||
|
||||
static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
@@ -791,9 +816,9 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
// The kernel waits on readers to finish before returning from those calls
|
||||
static void deinit_procfs_dirs(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(gpu->procfs.dir_peers);
|
||||
uvm_procfs_destroy_entry(gpu->procfs.dir_symlink);
|
||||
uvm_procfs_destroy_entry(gpu->procfs.dir);
|
||||
proc_remove(gpu->procfs.dir_peers);
|
||||
proc_remove(gpu->procfs.dir_symlink);
|
||||
proc_remove(gpu->procfs.dir);
|
||||
}
|
||||
|
||||
static NV_STATUS init_procfs_files(uvm_gpu_t *gpu)
|
||||
@@ -807,15 +832,15 @@ static NV_STATUS init_procfs_files(uvm_gpu_t *gpu)
|
||||
|
||||
static void deinit_procfs_files(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(gpu->procfs.info_file);
|
||||
proc_remove(gpu->procfs.info_file);
|
||||
}
|
||||
|
||||
static void deinit_procfs_peer_cap_files(uvm_gpu_peer_t *peer_caps)
|
||||
{
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_symlink_file[0]);
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_symlink_file[1]);
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_file[0]);
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_file[1]);
|
||||
proc_remove(peer_caps->procfs.peer_symlink_file[0]);
|
||||
proc_remove(peer_caps->procfs.peer_symlink_file[1]);
|
||||
proc_remove(peer_caps->procfs.peer_file[0]);
|
||||
proc_remove(peer_caps->procfs.peer_file[1]);
|
||||
}
|
||||
|
||||
static NV_STATUS init_semaphore_pool(uvm_gpu_t *gpu)
|
||||
@@ -883,6 +908,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_parent_gpu_t **parent_gpu_out)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
NV_STATUS status;
|
||||
|
||||
parent_gpu = uvm_kvmalloc_zero(sizeof(*parent_gpu));
|
||||
if (!parent_gpu)
|
||||
@@ -899,11 +925,14 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
|
||||
uvm_rb_tree_init(&parent_gpu->tsg_table);
|
||||
|
||||
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
|
||||
status = errno_to_nv_status(nv_kthread_q_init(&parent_gpu->lazy_free_q, "vidmem lazy free"));
|
||||
|
||||
nv_kref_init(&parent_gpu->gpu_kref);
|
||||
|
||||
*parent_gpu_out = parent_gpu;
|
||||
|
||||
return NV_OK;
|
||||
return status;
|
||||
}
|
||||
|
||||
// Allocates a uvm_gpu_t struct and initializes the basic fields and leaves all
|
||||
@@ -1038,15 +1067,6 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
parent_gpu->pci_dev = gpu_platform_info->pci_dev;
|
||||
parent_gpu->closest_cpu_numa_node = dev_to_node(&parent_gpu->pci_dev->dev);
|
||||
parent_gpu->dma_addressable_start = gpu_platform_info->dma_addressable_start;
|
||||
@@ -1208,16 +1228,6 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
status = init_procfs_files(gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init procfs files: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
@@ -1393,10 +1403,6 @@ static void remove_gpus_from_gpu(uvm_gpu_t *gpu)
|
||||
// Sync all trackers in PMM
|
||||
uvm_pmm_gpu_sync(&gpu->pmm);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
// Remove all references to the given GPU from its parent, since it is being
|
||||
@@ -1530,13 +1536,6 @@ static void remove_gpu(uvm_gpu_t *gpu)
|
||||
if (free_parent)
|
||||
destroy_nvlink_peers(gpu);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// TODO: Bug 2844714: If the parent is not being freed, the following
|
||||
// gpu_table_lock is only needed to protect concurrent
|
||||
// find_first_valid_gpu() in BH from the __clear_bit here. After
|
||||
@@ -1586,6 +1585,8 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)
|
||||
UVM_ASSERT(parent_gpu->num_retained_gpus == 0);
|
||||
UVM_ASSERT(bitmap_empty(parent_gpu->valid_gpus, UVM_ID_MAX_SUB_PROCESSORS));
|
||||
|
||||
nv_kthread_q_stop(&parent_gpu->lazy_free_q);
|
||||
|
||||
for (sub_processor_index = 0; sub_processor_index < UVM_ID_MAX_SUB_PROCESSORS; sub_processor_index++)
|
||||
UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);
|
||||
|
||||
@@ -2212,16 +2213,9 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// check for peer-to-peer compatibility (PCI-E or NvLink).
|
||||
peer_caps->link_type = get_gpu_link_type(p2p_caps_params->p2pLink);
|
||||
if (peer_caps->link_type == UVM_GPU_LINK_INVALID
|
||||
|
||||
|
||||
|
||||
)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
@@ -2604,7 +2598,10 @@ uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu
|
||||
uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu)
|
||||
{
|
||||
// See comment in page_tree_set_location
|
||||
return uvm_gpu_is_virt_mode_sriov_heavy(gpu)? UVM_APERTURE_VID : UVM_APERTURE_DEFAULT;
|
||||
if (uvm_gpu_is_virt_mode_sriov_heavy(gpu))
|
||||
return UVM_APERTURE_VID;
|
||||
|
||||
return UVM_APERTURE_DEFAULT;
|
||||
}
|
||||
|
||||
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr)
|
||||
@@ -3015,9 +3012,6 @@ NV_STATUS uvm_gpu_fault_entry_to_va_space(uvm_gpu_t *gpu,
|
||||
exit_unlock:
|
||||
uvm_spin_unlock(&gpu->parent->instance_ptr_table_lock);
|
||||
|
||||
if (status == NV_OK)
|
||||
UVM_ASSERT(uvm_va_space_initialized(*out_va_space) == NV_OK);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -3056,9 +3050,6 @@ NV_STATUS uvm_gpu_access_counter_entry_to_va_space(uvm_gpu_t *gpu,
|
||||
exit_unlock:
|
||||
uvm_spin_unlock(&gpu->parent->instance_ptr_table_lock);
|
||||
|
||||
if (status == NV_OK)
|
||||
UVM_ASSERT(uvm_va_space_initialized(*out_va_space) == NV_OK);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -3131,41 +3122,41 @@ void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_add
|
||||
atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
|
||||
UVM_ASSERT(PAGE_ALIGNED(size));
|
||||
|
||||
dma_addr = dma_map_page(&gpu->parent->pci_dev->dev, page, 0, size, DMA_BIDIRECTIONAL);
|
||||
if (dma_mapping_error(&gpu->parent->pci_dev->dev, dma_addr))
|
||||
dma_addr = dma_map_page(&parent_gpu->pci_dev->dev, page, 0, size, DMA_BIDIRECTIONAL);
|
||||
if (dma_mapping_error(&parent_gpu->pci_dev->dev, dma_addr))
|
||||
return NV_ERR_OPERATING_SYSTEM;
|
||||
|
||||
if (dma_addr < gpu->parent->dma_addressable_start ||
|
||||
dma_addr + size - 1 > gpu->parent->dma_addressable_limit) {
|
||||
dma_unmap_page(&gpu->parent->pci_dev->dev, dma_addr, size, DMA_BIDIRECTIONAL);
|
||||
if (dma_addr < parent_gpu->dma_addressable_start ||
|
||||
dma_addr + size - 1 > parent_gpu->dma_addressable_limit) {
|
||||
dma_unmap_page(&parent_gpu->pci_dev->dev, dma_addr, size, DMA_BIDIRECTIONAL);
|
||||
UVM_ERR_PRINT_RL("PCI mapped range [0x%llx, 0x%llx) not in the addressable range [0x%llx, 0x%llx), GPU %s\n",
|
||||
dma_addr,
|
||||
dma_addr + (NvU64)size,
|
||||
gpu->parent->dma_addressable_start,
|
||||
gpu->parent->dma_addressable_limit + 1,
|
||||
uvm_gpu_name(gpu));
|
||||
parent_gpu->dma_addressable_start,
|
||||
parent_gpu->dma_addressable_limit + 1,
|
||||
parent_gpu->name);
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
atomic64_add(size, &gpu->parent->mapped_cpu_pages_size);
|
||||
*dma_address_out = dma_addr_to_gpu_addr(gpu->parent, dma_addr);
|
||||
atomic64_add(size, &parent_gpu->mapped_cpu_pages_size);
|
||||
*dma_address_out = dma_addr_to_gpu_addr(parent_gpu, dma_addr);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_gpu_t *gpu, NvU64 dma_address, size_t size)
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size)
|
||||
{
|
||||
UVM_ASSERT(PAGE_ALIGNED(size));
|
||||
|
||||
dma_address = gpu_addr_to_dma_addr(gpu->parent, dma_address);
|
||||
dma_unmap_page(&gpu->parent->pci_dev->dev, dma_address, size, DMA_BIDIRECTIONAL);
|
||||
atomic64_sub(size, &gpu->parent->mapped_cpu_pages_size);
|
||||
dma_address = gpu_addr_to_dma_addr(parent_gpu, dma_address);
|
||||
dma_unmap_page(&parent_gpu->pci_dev->dev, dma_address, size, DMA_BIDIRECTIONAL);
|
||||
atomic64_sub(size, &parent_gpu->mapped_cpu_pages_size);
|
||||
}
|
||||
|
||||
// This function implements the UvmRegisterGpu API call, as described in uvm.h.
|
||||
|
||||
@@ -44,11 +44,9 @@
|
||||
#include "uvm_va_block_types.h"
|
||||
#include "uvm_perf_module.h"
|
||||
#include "uvm_rb_tree.h"
|
||||
#include "uvm_perf_prefetch.h"
|
||||
#include "nv-kthread-q.h"
|
||||
|
||||
|
||||
|
||||
|
||||
// Buffer length to store uvm gpu id, RM device name and gpu uuid.
|
||||
#define UVM_GPU_NICE_NAME_BUFFER_LENGTH (sizeof("ID 999: : ") + \
|
||||
UVM_GPU_NAME_LENGTH + UVM_GPU_UUID_TEXT_BUFFER_LENGTH)
|
||||
@@ -162,6 +160,12 @@ struct uvm_service_block_context_struct
|
||||
|
||||
// State used by the VA block routines called by the servicing routine
|
||||
uvm_va_block_context_t block_context;
|
||||
|
||||
// Prefetch state hint
|
||||
uvm_perf_prefetch_hint_t prefetch_hint;
|
||||
|
||||
// Prefetch temporary state.
|
||||
uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
|
||||
};
|
||||
|
||||
struct uvm_fault_service_batch_context_struct
|
||||
@@ -377,6 +381,17 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
// determine at fetch time that all the access counter notifications in the
|
||||
// batch report the same instance_ptr
|
||||
bool is_single_instance_ptr;
|
||||
|
||||
// Scratch space, used to generate artificial physically addressed notifications.
|
||||
// Virtual address notifications are always aligned to 64k. This means up to 16
|
||||
// different physical locations could have been accessed to trigger one notification.
|
||||
// The sub-granularity mask can correspond to any of them.
|
||||
struct
|
||||
{
|
||||
uvm_processor_id_t resident_processors[16];
|
||||
uvm_gpu_phys_address_t phys_addresses[16];
|
||||
uvm_access_counter_buffer_entry_t phys_entry;
|
||||
} scratch;
|
||||
} virt;
|
||||
|
||||
struct
|
||||
@@ -508,10 +523,7 @@ typedef enum
|
||||
UVM_GPU_LINK_NVLINK_1,
|
||||
UVM_GPU_LINK_NVLINK_2,
|
||||
UVM_GPU_LINK_NVLINK_3,
|
||||
|
||||
|
||||
|
||||
|
||||
UVM_GPU_LINK_NVLINK_4,
|
||||
UVM_GPU_LINK_MAX
|
||||
} uvm_gpu_link_type_t;
|
||||
|
||||
@@ -684,10 +696,6 @@ struct uvm_gpu_struct
|
||||
// mappings (instead of kernel), and it is used in most configurations.
|
||||
uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// ECC handling
|
||||
// In order to trap ECC errors as soon as possible the driver has the hw
|
||||
// interrupt register mapped directly. If an ECC interrupt is ever noticed
|
||||
@@ -742,6 +750,9 @@ struct uvm_gpu_struct
|
||||
|
||||
// Placeholder for per-GPU performance heuristics information
|
||||
uvm_perf_module_data_desc_t perf_modules_data[UVM_PERF_MODULE_TYPE_COUNT];
|
||||
|
||||
// Force pushbuffer's GPU VA to be >= 1TB; used only for testing purposes.
|
||||
bool uvm_test_force_upper_pushbuffer_segment;
|
||||
};
|
||||
|
||||
struct uvm_parent_gpu_struct
|
||||
@@ -823,9 +834,6 @@ struct uvm_parent_gpu_struct
|
||||
uvm_fault_buffer_hal_t *fault_buffer_hal;
|
||||
uvm_access_counter_buffer_hal_t *access_counter_buffer_hal;
|
||||
|
||||
|
||||
|
||||
|
||||
uvm_gpu_peer_copy_mode_t peer_copy_mode;
|
||||
|
||||
// Virtualization mode of the GPU.
|
||||
@@ -949,6 +957,10 @@ struct uvm_parent_gpu_struct
|
||||
// NUMA info, mainly for ATS
|
||||
uvm_numa_info_t numa_info;
|
||||
|
||||
// PMM lazy free processing queue.
|
||||
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
|
||||
nv_kthread_q_t lazy_free_q;
|
||||
|
||||
// Access counter buffer info. This is only valid if supports_access_counters is set to true
|
||||
uvm_access_counter_buffer_info_t access_counter_buffer_info;
|
||||
|
||||
@@ -1112,7 +1124,8 @@ struct uvm_gpu_peer_struct
|
||||
// deletion.
|
||||
NvHandle p2p_handle;
|
||||
|
||||
struct {
|
||||
struct
|
||||
{
|
||||
struct proc_dir_entry *peer_file[2];
|
||||
struct proc_dir_entry *peer_symlink_file[2];
|
||||
|
||||
@@ -1318,19 +1331,19 @@ NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
|
||||
//
|
||||
// Returns the physical address of the pages that can be used to access them on
|
||||
// the GPU.
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
|
||||
// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_gpu_t *gpu, NvU64 dma_address, size_t size);
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
|
||||
|
||||
static NV_STATUS uvm_gpu_map_cpu_page(uvm_gpu_t *gpu, struct page *page, NvU64 *dma_address_out)
|
||||
static NV_STATUS uvm_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
|
||||
{
|
||||
return uvm_gpu_map_cpu_pages(gpu, page, PAGE_SIZE, dma_address_out);
|
||||
return uvm_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
|
||||
}
|
||||
|
||||
static void uvm_gpu_unmap_cpu_page(uvm_gpu_t *gpu, NvU64 dma_address)
|
||||
static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
|
||||
{
|
||||
uvm_gpu_unmap_cpu_pages(gpu, dma_address, PAGE_SIZE);
|
||||
uvm_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
|
||||
}
|
||||
|
||||
// Allocate and map a page of system DMA memory on the GPU for physical access
|
||||
@@ -1356,18 +1369,27 @@ void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_add
|
||||
// The GPU must be initialized before calling this function.
|
||||
bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
|
||||
// Returns whether the given range is within the GPU's addressable VA ranges in
|
||||
// the internal GPU VA "kernel" address space, which is a linear address space.
|
||||
// Therefore, the input 'addr' must not be in canonical form, even platforms
|
||||
// that use to the canonical form addresses, i.e., ARM64, and x86.
|
||||
// Warning: This only checks whether the GPU's MMU can support the given
|
||||
// address. Some HW units on that GPU might only support a smaller range.
|
||||
//
|
||||
// The GPU must be initialized before calling this function.
|
||||
bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
|
||||
// Returns addr's canonical form for host systems that use canonical form
|
||||
// addresses.
|
||||
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
|
||||
|
||||
static bool uvm_gpu_has_pushbuffer_segments(uvm_gpu_t *gpu)
|
||||
{
|
||||
return gpu->parent->max_host_va > (1ull << 40);
|
||||
}
|
||||
|
||||
static bool uvm_gpu_supports_eviction(uvm_gpu_t *gpu)
|
||||
{
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Eviction is supported only if the GPU supports replayable faults
|
||||
return gpu->parent->replayable_faults_supported;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2021 NVIDIA Corporation
|
||||
Copyright (c) 2017-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -41,6 +41,10 @@
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX ((1 << 16) - 1)
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT 256
|
||||
|
||||
#define UVM_ACCESS_COUNTER_ACTION_NOTIFY 0x1
|
||||
#define UVM_ACCESS_COUNTER_ACTION_CLEAR 0x2
|
||||
#define UVM_ACCESS_COUNTER_ON_MANAGED 0x4
|
||||
|
||||
// Each page in a tracked physical range may belong to a different VA Block. We
|
||||
// preallocate an array of reverse map translations. However, access counter
|
||||
// granularity can be set to up to 16G, which would require an array too large
|
||||
@@ -934,25 +938,6 @@ static void preprocess_virt_notifications(uvm_gpu_t *gpu,
|
||||
translate_virt_notifications_instance_ptrs(gpu, batch_context);
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
// TODO: Bug 1990466: Service virtual notifications. Entries with NULL
|
||||
// va_space are simply dropped.
|
||||
if (uvm_enable_builtin_tests) {
|
||||
NvU32 i;
|
||||
|
||||
preprocess_virt_notifications(gpu, batch_context);
|
||||
|
||||
for (i = 0; i < batch_context->virt.num_notifications; ++i) {
|
||||
const bool on_managed = false;
|
||||
uvm_tools_broadcast_access_counter(gpu, batch_context->virt.notifications[i], on_managed);
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// GPA notifications provide a physical address and an aperture. Sort
|
||||
// accesses by aperture to try to coalesce operations on the same target
|
||||
// processor.
|
||||
@@ -1046,9 +1031,19 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
uvm_page_mask_set(&service_context->thrashing_pin_mask, page_index);
|
||||
}
|
||||
|
||||
// If the underlying VMA is gone, skip HMM migrations.
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
status = uvm_hmm_find_vma(&service_context->block_context, address);
|
||||
if (status == NV_ERR_INVALID_ADDRESS)
|
||||
continue;
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
service_context->block_context.policy = uvm_va_policy_get(va_block, address);
|
||||
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&service_context->block_context,
|
||||
page_index,
|
||||
processor,
|
||||
uvm_fault_access_type_mask_bit(UVM_FAULT_ACCESS_TYPE_PREFETCH),
|
||||
@@ -1158,7 +1153,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
bool *clear_counter)
|
||||
unsigned *out_flags)
|
||||
{
|
||||
size_t index;
|
||||
uvm_va_block_t *va_block = reverse_mappings[0].va_block;
|
||||
@@ -1168,7 +1163,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
const uvm_processor_id_t processor = current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC?
|
||||
gpu->id: UVM_ID_CPU;
|
||||
|
||||
*clear_counter = false;
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
UVM_ASSERT(num_reverse_mappings > 0);
|
||||
|
||||
@@ -1217,7 +1212,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
if (status == NV_OK)
|
||||
*clear_counter = true;
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
}
|
||||
|
||||
done:
|
||||
@@ -1238,25 +1233,26 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
bool *clear_counter)
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t index;
|
||||
|
||||
*clear_counter = false;
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
for (index = 0; index < num_reverse_mappings; ++index) {
|
||||
bool clear_counter_local = false;
|
||||
unsigned out_flags_local = 0;
|
||||
status = service_phys_single_va_block(gpu,
|
||||
batch_context,
|
||||
current_entry,
|
||||
reverse_mappings + index,
|
||||
1,
|
||||
&clear_counter_local);
|
||||
&out_flags_local);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
*clear_counter = *clear_counter || clear_counter_local;
|
||||
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
|
||||
*out_flags |= out_flags_local;
|
||||
}
|
||||
|
||||
// In the case of failure, drop the refcounts for the remaining reverse mappings
|
||||
@@ -1267,18 +1263,13 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
// Iterate over all regions set in the given sub_granularity mask
|
||||
#define for_each_sub_granularity_region(region_start, region_end, sub_granularity, config) \
|
||||
for ((region_start) = find_first_bit(&(sub_granularity), (config)->sub_granularity_regions_per_translation), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), \
|
||||
(config)->sub_granularity_regions_per_translation, \
|
||||
(region_start) + 1); \
|
||||
(region_start) < config->sub_granularity_regions_per_translation; \
|
||||
(region_start) = find_next_bit(&(sub_granularity), \
|
||||
(config)->sub_granularity_regions_per_translation, \
|
||||
(region_end) + 1), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), \
|
||||
(config)->sub_granularity_regions_per_translation, \
|
||||
(region_start) + 1))
|
||||
#define for_each_sub_granularity_region(region_start, region_end, sub_granularity, num_regions) \
|
||||
for ((region_start) = find_first_bit(&(sub_granularity), (num_regions)), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1); \
|
||||
(region_start) < (num_regions); \
|
||||
(region_start) = find_next_bit(&(sub_granularity), (num_regions), (region_end) + 1), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1))
|
||||
|
||||
|
||||
static bool are_reverse_mappings_on_single_block(const uvm_reverse_map_t *reverse_mappings, size_t num_reverse_mappings)
|
||||
{
|
||||
@@ -1309,7 +1300,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
NvU64 address,
|
||||
unsigned long sub_granularity,
|
||||
size_t *num_reverse_mappings,
|
||||
bool *clear_counter)
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 region_start, region_end;
|
||||
@@ -1318,7 +1309,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
|
||||
// Get the reverse_map translations for all the regions set in the
|
||||
// sub_granularity field of the counter.
|
||||
for_each_sub_granularity_region(region_start, region_end, sub_granularity, config) {
|
||||
for_each_sub_granularity_region(region_start, region_end, sub_granularity, config->sub_granularity_regions_per_translation) {
|
||||
NvU64 local_address = address + region_start * config->sub_granularity_region_size;
|
||||
NvU32 local_translation_size = (region_end - region_start) * config->sub_granularity_region_size;
|
||||
uvm_reverse_map_t *local_reverse_mappings = batch_context->phys.translations + *num_reverse_mappings;
|
||||
@@ -1350,7 +1341,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
current_entry,
|
||||
batch_context->phys.translations,
|
||||
*num_reverse_mappings,
|
||||
clear_counter);
|
||||
out_flags);
|
||||
}
|
||||
else {
|
||||
status = service_phys_va_blocks(gpu,
|
||||
@@ -1358,7 +1349,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
current_entry,
|
||||
batch_context->phys.translations,
|
||||
*num_reverse_mappings,
|
||||
clear_counter);
|
||||
out_flags);
|
||||
}
|
||||
|
||||
return status;
|
||||
@@ -1366,7 +1357,8 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
|
||||
static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry)
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NvU64 address;
|
||||
NvU64 translation_index;
|
||||
@@ -1377,7 +1369,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
size_t total_reverse_mappings = 0;
|
||||
uvm_gpu_t *resident_gpu = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
bool clear_counter = false;
|
||||
unsigned flags = 0;
|
||||
|
||||
address = current_entry->address.address;
|
||||
UVM_ASSERT(address % config->translation_size == 0);
|
||||
@@ -1405,7 +1397,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
|
||||
for (translation_index = 0; translation_index < config->translations_per_counter; ++translation_index) {
|
||||
size_t num_reverse_mappings;
|
||||
bool clear_counter_local = false;
|
||||
unsigned out_flags_local = 0;
|
||||
status = service_phys_notification_translation(gpu,
|
||||
resident_gpu,
|
||||
batch_context,
|
||||
@@ -1414,9 +1406,11 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
address,
|
||||
sub_granularity,
|
||||
&num_reverse_mappings,
|
||||
&clear_counter_local);
|
||||
&out_flags_local);
|
||||
total_reverse_mappings += num_reverse_mappings;
|
||||
clear_counter = clear_counter || clear_counter_local;
|
||||
|
||||
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
|
||||
flags |= out_flags_local;
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
@@ -1425,17 +1419,14 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
sub_granularity = sub_granularity >> config->sub_granularity_regions_per_translation;
|
||||
}
|
||||
|
||||
// TODO: Bug 1990466: Here we already have virtual addresses and
|
||||
// address spaces. Merge virtual and physical notification handling
|
||||
|
||||
// Currently we only report events for our tests, not for tools
|
||||
if (uvm_enable_builtin_tests) {
|
||||
const bool on_managed = total_reverse_mappings != 0;
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, on_managed);
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_NOTIFY;
|
||||
*out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_ON_MANAGED : 0);
|
||||
}
|
||||
|
||||
if (status == NV_OK && clear_counter)
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -1450,11 +1441,18 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
|
||||
for (i = 0; i < batch_context->phys.num_notifications; ++i) {
|
||||
NV_STATUS status;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->phys.notifications[i];
|
||||
unsigned flags = 0;
|
||||
|
||||
if (!UVM_ID_IS_VALID(current_entry->physical_info.resident_id))
|
||||
continue;
|
||||
|
||||
status = service_phys_notification(gpu, batch_context, current_entry);
|
||||
status = service_phys_notification(gpu, batch_context, current_entry, &flags);
|
||||
if (flags & UVM_ACCESS_COUNTER_ACTION_NOTIFY)
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
|
||||
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
@@ -1462,6 +1460,191 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static int cmp_sort_gpu_phys_addr(const void *_a, const void *_b)
|
||||
{
|
||||
return uvm_gpu_phys_addr_cmp(*(uvm_gpu_phys_address_t*)_a,
|
||||
*(uvm_gpu_phys_address_t*)_b);
|
||||
}
|
||||
|
||||
static bool gpu_phys_same_region(uvm_gpu_phys_address_t a, uvm_gpu_phys_address_t b, NvU64 granularity)
|
||||
{
|
||||
if (a.aperture != b.aperture)
|
||||
return false;
|
||||
|
||||
UVM_ASSERT(is_power_of_2(granularity));
|
||||
|
||||
return UVM_ALIGN_DOWN(a.address, granularity) == UVM_ALIGN_DOWN(b.address, granularity);
|
||||
}
|
||||
|
||||
static bool phys_address_in_accessed_sub_region(uvm_gpu_phys_address_t address,
|
||||
NvU64 region_size,
|
||||
NvU64 sub_region_size,
|
||||
NvU32 accessed_mask)
|
||||
{
|
||||
const unsigned accessed_index = (address.address % region_size) / sub_region_size;
|
||||
|
||||
// accessed_mask is only filled for tracking granularities larger than 64K
|
||||
if (region_size == UVM_PAGE_SIZE_64K)
|
||||
return true;
|
||||
|
||||
UVM_ASSERT(accessed_index < 32);
|
||||
return ((1 << accessed_index) & accessed_mask) != 0;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notification(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU64 notification_size;
|
||||
NvU64 address;
|
||||
uvm_processor_id_t *resident_processors = batch_context->virt.scratch.resident_processors;
|
||||
uvm_gpu_phys_address_t *phys_addresses = batch_context->virt.scratch.phys_addresses;
|
||||
int num_addresses = 0;
|
||||
int i;
|
||||
|
||||
// Virtual address notifications are always 64K aligned
|
||||
NvU64 region_start = current_entry->address.address;
|
||||
NvU64 region_end = current_entry->address.address + UVM_PAGE_SIZE_64K;
|
||||
|
||||
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
uvm_access_counter_type_t counter_type = current_entry->counter_type;
|
||||
|
||||
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters, counter_type);
|
||||
|
||||
uvm_va_space_t *va_space = current_entry->virtual_info.va_space;
|
||||
|
||||
UVM_ASSERT(counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC);
|
||||
|
||||
// Entries with NULL va_space are simply dropped.
|
||||
if (!va_space)
|
||||
return NV_OK;
|
||||
|
||||
status = config_granularity_to_bytes(config->rm.granularity, ¬ification_size);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// Collect physical locations that could have been touched
|
||||
// in the reported 64K VA region. The notification mask can
|
||||
// correspond to any of them.
|
||||
uvm_va_space_down_read(va_space);
|
||||
for (address = region_start; address < region_end;) {
|
||||
uvm_va_block_t *va_block;
|
||||
|
||||
NV_STATUS local_status = uvm_va_block_find(va_space, address, &va_block);
|
||||
if (local_status == NV_ERR_INVALID_ADDRESS || local_status == NV_ERR_OBJECT_NOT_FOUND) {
|
||||
address += PAGE_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
while (address < va_block->end && address < region_end) {
|
||||
const unsigned page_index = uvm_va_block_cpu_page_index(va_block, address);
|
||||
|
||||
// UVM va_block always maps the closest resident location to processor
|
||||
const uvm_processor_id_t res_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
|
||||
|
||||
// Add physical location if it's valid and not local vidmem
|
||||
if (UVM_ID_IS_VALID(res_id) && !uvm_id_equal(res_id, gpu->id)) {
|
||||
uvm_gpu_phys_address_t phys_address = uvm_va_block_res_phys_page_address(va_block, page_index, res_id, gpu);
|
||||
if (phys_address_in_accessed_sub_region(phys_address,
|
||||
notification_size,
|
||||
config->sub_granularity_region_size,
|
||||
current_entry->sub_granularity)) {
|
||||
resident_processors[num_addresses] = res_id;
|
||||
phys_addresses[num_addresses] = phys_address;
|
||||
++num_addresses;
|
||||
}
|
||||
else {
|
||||
UVM_DBG_PRINT_RL("Skipping phys address %llx:%s, because it couldn't have been accessed in mask %x",
|
||||
phys_address.address,
|
||||
uvm_aperture_string(phys_address.aperture),
|
||||
current_entry->sub_granularity);
|
||||
}
|
||||
}
|
||||
|
||||
address += PAGE_SIZE;
|
||||
}
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
}
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
// The addresses need to be sorted to aid coalescing.
|
||||
sort(phys_addresses,
|
||||
num_addresses,
|
||||
sizeof(*phys_addresses),
|
||||
cmp_sort_gpu_phys_addr,
|
||||
NULL);
|
||||
|
||||
for (i = 0; i < num_addresses; ++i) {
|
||||
uvm_access_counter_buffer_entry_t *fake_entry = &batch_context->virt.scratch.phys_entry;
|
||||
|
||||
// Skip the current pointer if the physical region was already handled
|
||||
if (i > 0 && gpu_phys_same_region(phys_addresses[i - 1], phys_addresses[i], notification_size)) {
|
||||
UVM_ASSERT(uvm_id_equal(resident_processors[i - 1], resident_processors[i]));
|
||||
continue;
|
||||
}
|
||||
UVM_DBG_PRINT_RL("Faking MIMC address[%i/%i]: %llx (granularity mask: %llx) in aperture %s on device %s\n",
|
||||
i,
|
||||
num_addresses,
|
||||
phys_addresses[i].address,
|
||||
notification_size - 1,
|
||||
uvm_aperture_string(phys_addresses[i].aperture),
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
// Construct a fake phys addr AC entry
|
||||
fake_entry->counter_type = current_entry->counter_type;
|
||||
fake_entry->address.address = UVM_ALIGN_DOWN(phys_addresses[i].address, notification_size);
|
||||
fake_entry->address.aperture = phys_addresses[i].aperture;
|
||||
fake_entry->address.is_virtual = false;
|
||||
fake_entry->physical_info.resident_id = resident_processors[i];
|
||||
fake_entry->counter_value = current_entry->counter_value;
|
||||
fake_entry->sub_granularity = current_entry->sub_granularity;
|
||||
|
||||
status = service_phys_notification(gpu, batch_context, fake_entry, out_flags);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
NvU32 i;
|
||||
NV_STATUS status = NV_OK;
|
||||
preprocess_virt_notifications(gpu, batch_context);
|
||||
|
||||
for (i = 0; i < batch_context->virt.num_notifications; ++i) {
|
||||
unsigned flags = 0;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
|
||||
|
||||
status = service_virt_notification(gpu, batch_context, current_entry, &flags);
|
||||
|
||||
UVM_DBG_PRINT_RL("Processed virt access counter (%d/%d): %sMANAGED (status: %d) clear: %s\n",
|
||||
i + 1,
|
||||
batch_context->virt.num_notifications,
|
||||
(flags & UVM_ACCESS_COUNTER_ON_MANAGED) ? "" : "NOT ",
|
||||
status,
|
||||
(flags & UVM_ACCESS_COUNTER_ACTION_CLEAR) ? "YES" : "NO");
|
||||
|
||||
if (uvm_enable_builtin_tests)
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
|
||||
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
void uvm_gpu_service_access_counters(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@@ -1535,7 +1718,7 @@ bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space)
|
||||
return atomic_read(&va_space_access_counters->params.enable_mimc_migrations);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_perf_access_counters_init()
|
||||
NV_STATUS uvm_perf_access_counters_init(void)
|
||||
{
|
||||
uvm_perf_module_init("perf_access_counters",
|
||||
UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS,
|
||||
@@ -1546,7 +1729,7 @@ NV_STATUS uvm_perf_access_counters_init()
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_perf_access_counters_exit()
|
||||
void uvm_perf_access_counters_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2021 NVIDIA Corporation
|
||||
Copyright (c) 2017-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -338,7 +338,6 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_processor_id_t new_residency;
|
||||
bool read_duplicate;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_va_range_t *va_range = va_block->va_range;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
|
||||
UVM_ASSERT(!fault_entry->is_fatal);
|
||||
@@ -365,8 +364,11 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
// Check logical permissions
|
||||
status = uvm_va_range_check_logical_permissions(va_range,
|
||||
status = uvm_va_block_check_logical_permissions(va_block,
|
||||
&service_context->block_context,
|
||||
gpu->id,
|
||||
uvm_va_block_cpu_page_index(va_block,
|
||||
fault_entry->fault_address),
|
||||
fault_entry->fault_access_type,
|
||||
uvm_range_group_address_migratable(va_space,
|
||||
fault_entry->fault_address));
|
||||
@@ -386,6 +388,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Compute new residency and update the masks
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&service_context->block_context,
|
||||
page_index,
|
||||
gpu->id,
|
||||
fault_entry->access_type_mask,
|
||||
@@ -422,7 +425,6 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||||
struct mm_struct *mm,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_fault_buffer_entry_t *fault_entry)
|
||||
{
|
||||
@@ -432,7 +434,6 @@ static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||||
|
||||
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
|
||||
service_context->num_retries = 0;
|
||||
service_context->block_context.mm = mm;
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
@@ -458,8 +459,6 @@ static void kill_channel_delayed(void *_user_channel)
|
||||
uvm_user_channel_t *user_channel = (uvm_user_channel_t *)_user_channel;
|
||||
uvm_va_space_t *va_space = user_channel->kill_channel.va_space;
|
||||
|
||||
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
|
||||
|
||||
uvm_va_space_down_read_rm(va_space);
|
||||
if (user_channel->gpu_va_space) {
|
||||
// RM handles the fault, which will do the correct fault reporting in the
|
||||
@@ -598,6 +597,7 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
// to remain valid until we release. If no mm is registered, we
|
||||
// can only service managed faults, not ATS/HMM faults.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
va_block_context->mm = mm;
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
@@ -622,12 +622,11 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
|
||||
if (!fault_entry->is_fatal) {
|
||||
status = uvm_va_block_find_create(fault_entry->va_space,
|
||||
mm,
|
||||
fault_entry->fault_address,
|
||||
va_block_context,
|
||||
&va_block);
|
||||
if (status == NV_OK)
|
||||
status = service_managed_fault_in_block(gpu_va_space->gpu, mm, va_block, fault_entry);
|
||||
status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry);
|
||||
else
|
||||
status = service_non_managed_fault(gpu_va_space, mm, fault_entry, status);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -1034,6 +1034,61 @@ static NV_STATUS preprocess_fault_batch(uvm_gpu_t *gpu, uvm_fault_service_batch_
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static bool check_fault_entry_duplicate(const uvm_fault_buffer_entry_t *current_entry,
|
||||
const uvm_fault_buffer_entry_t *previous_entry)
|
||||
{
|
||||
bool is_duplicate = false;
|
||||
|
||||
if (previous_entry) {
|
||||
is_duplicate = (current_entry->va_space == previous_entry->va_space) &&
|
||||
(current_entry->fault_address == previous_entry->fault_address);
|
||||
}
|
||||
|
||||
return is_duplicate;
|
||||
}
|
||||
|
||||
static void fault_entry_duplicate_flags(uvm_fault_buffer_entry_t *current_entry,
|
||||
const uvm_fault_buffer_entry_t *previous_entry)
|
||||
{
|
||||
UVM_ASSERT(previous_entry);
|
||||
UVM_ASSERT(check_fault_entry_duplicate(current_entry, previous_entry));
|
||||
|
||||
// Propagate the is_invalid_prefetch flag across all prefetch faults
|
||||
// on the page
|
||||
if (previous_entry->is_invalid_prefetch)
|
||||
current_entry->is_invalid_prefetch = true;
|
||||
|
||||
// If a page is throttled, all faults on the page must be skipped
|
||||
if (previous_entry->is_throttled)
|
||||
current_entry->is_throttled = true;
|
||||
}
|
||||
|
||||
static void update_batch_context(uvm_fault_service_batch_context_t *batch_context,
|
||||
uvm_fault_buffer_entry_t *current_entry,
|
||||
const uvm_fault_buffer_entry_t *previous_entry)
|
||||
{
|
||||
bool is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
|
||||
uvm_fault_utlb_info_t *utlb = &batch_context->utlbs[current_entry->fault_source.utlb_id];
|
||||
|
||||
UVM_ASSERT(utlb->num_pending_faults > 0);
|
||||
|
||||
if (is_duplicate)
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances;
|
||||
else
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances - 1;
|
||||
|
||||
if (current_entry->is_invalid_prefetch)
|
||||
batch_context->num_invalid_prefetch_faults += current_entry->num_instances;
|
||||
|
||||
if (current_entry->is_fatal) {
|
||||
utlb->has_fatal_faults = true;
|
||||
batch_context->has_fatal_faults = true;
|
||||
}
|
||||
|
||||
if (current_entry->is_throttled)
|
||||
batch_context->has_throttled_faults = true;
|
||||
}
|
||||
|
||||
// This function computes the maximum access type that can be serviced for the
|
||||
// reported fault instances given the logical permissions of the VA range. If
|
||||
// none of the fault instances can be serviced UVM_FAULT_ACCESS_TYPE_COUNT is
|
||||
@@ -1055,13 +1110,17 @@ static NV_STATUS preprocess_fault_batch(uvm_gpu_t *gpu, uvm_fault_service_batch_
|
||||
// - service_access_type: highest access type that can be serviced.
|
||||
static uvm_fault_access_type_t check_fault_access_permissions(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_fault_buffer_entry_t *fault_entry,
|
||||
bool allow_migration)
|
||||
{
|
||||
NV_STATUS perm_status;
|
||||
|
||||
perm_status = uvm_va_range_check_logical_permissions(va_block->va_range,
|
||||
perm_status = uvm_va_block_check_logical_permissions(va_block,
|
||||
va_block_context,
|
||||
gpu->id,
|
||||
uvm_va_block_cpu_page_index(va_block,
|
||||
fault_entry->fault_address),
|
||||
fault_entry->fault_access_type,
|
||||
allow_migration);
|
||||
if (perm_status == NV_OK)
|
||||
@@ -1083,8 +1142,11 @@ static uvm_fault_access_type_t check_fault_access_permissions(uvm_gpu_t *gpu,
|
||||
// service them before we can cancel the write/atomic faults. So we
|
||||
// retry with read fault access type.
|
||||
if (uvm_fault_access_type_mask_test(fault_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
|
||||
perm_status = uvm_va_range_check_logical_permissions(va_block->va_range,
|
||||
perm_status = uvm_va_block_check_logical_permissions(va_block,
|
||||
va_block_context,
|
||||
gpu->id,
|
||||
uvm_va_block_cpu_page_index(va_block,
|
||||
fault_entry->fault_address),
|
||||
UVM_FAULT_ACCESS_TYPE_READ,
|
||||
allow_migration);
|
||||
if (perm_status == NV_OK)
|
||||
@@ -1115,12 +1177,12 @@ static uvm_fault_access_type_t check_fault_access_permissions(uvm_gpu_t *gpu,
|
||||
// - NV_ERR_MORE_PROCESSING_REQUIRED if servicing needs allocation retry
|
||||
// - NV_ERR_NO_MEMORY if the faults could not be serviced due to OOM
|
||||
// - Any other value is a UVM-global error
|
||||
static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
NvU32 first_fault_index,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 *block_faults)
|
||||
static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 i;
|
||||
@@ -1156,14 +1218,16 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
UVM_ASSERT(ordered_fault_cache[first_fault_index]->fault_address >= va_block->start);
|
||||
UVM_ASSERT(ordered_fault_cache[first_fault_index]->fault_address <= va_block->end);
|
||||
|
||||
end = va_block->end;
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_hmm_find_policy_end(va_block,
|
||||
&block_context->block_context,
|
||||
ordered_fault_cache[first_fault_index]->fault_address,
|
||||
&end);
|
||||
else
|
||||
}
|
||||
else {
|
||||
block_context->block_context.policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
end = va_block->end;
|
||||
}
|
||||
|
||||
// Scan the sorted array and notify the fault event for all fault entries
|
||||
// in the block
|
||||
@@ -1191,7 +1255,7 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
if (i > first_fault_index) {
|
||||
previous_entry = ordered_fault_cache[i - 1];
|
||||
is_duplicate = current_entry->fault_address == previous_entry->fault_address;
|
||||
is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
|
||||
}
|
||||
|
||||
if (block_context->num_retries == 0) {
|
||||
@@ -1206,12 +1270,7 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Service the most intrusive fault per page, only. Waive the rest
|
||||
if (is_duplicate) {
|
||||
// Propagate the is_invalid_prefetch flag across all prefetch
|
||||
// faults on the page
|
||||
current_entry->is_invalid_prefetch = previous_entry->is_invalid_prefetch;
|
||||
|
||||
// If a page is throttled, all faults on the page must be skipped
|
||||
current_entry->is_throttled = previous_entry->is_throttled;
|
||||
fault_entry_duplicate_flags(current_entry, previous_entry);
|
||||
|
||||
// The previous fault was non-fatal so the page has been already
|
||||
// serviced
|
||||
@@ -1226,7 +1285,11 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
UVM_ASSERT(iter.start <= current_entry->fault_address && iter.end >= current_entry->fault_address);
|
||||
|
||||
service_access_type = check_fault_access_permissions(gpu, va_block, current_entry, iter.migratable);
|
||||
service_access_type = check_fault_access_permissions(gpu,
|
||||
va_block,
|
||||
&block_context->block_context,
|
||||
current_entry,
|
||||
iter.migratable);
|
||||
|
||||
// Do not exit early due to logical errors such as access permission
|
||||
// violation.
|
||||
@@ -1269,6 +1332,7 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Compute new residency and update the masks
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&block_context->block_context,
|
||||
page_index,
|
||||
gpu->id,
|
||||
service_access_type_mask,
|
||||
@@ -1302,25 +1366,8 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
// Only update counters the first time since logical permissions cannot
|
||||
// change while we hold the VA space lock
|
||||
// TODO: Bug 1750144: That might not be true with HMM.
|
||||
if (block_context->num_retries == 0) {
|
||||
uvm_fault_utlb_info_t *utlb = &batch_context->utlbs[current_entry->fault_source.utlb_id];
|
||||
|
||||
if (current_entry->is_invalid_prefetch)
|
||||
batch_context->num_invalid_prefetch_faults += current_entry->num_instances;
|
||||
|
||||
if (is_duplicate)
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances;
|
||||
else
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances - 1;
|
||||
|
||||
if (current_entry->is_throttled)
|
||||
batch_context->has_throttled_faults = true;
|
||||
|
||||
if (current_entry->is_fatal) {
|
||||
utlb->has_fatal_faults = true;
|
||||
batch_context->has_fatal_faults = true;
|
||||
}
|
||||
}
|
||||
if (block_context->num_retries == 0)
|
||||
update_batch_context(batch_context, current_entry, previous_entry);
|
||||
}
|
||||
|
||||
// Apply the changes computed in the fault service block context, if there
|
||||
@@ -1347,12 +1394,11 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
//
|
||||
// See the comments for function service_fault_batch_block_locked for
|
||||
// implementation details and error codes.
|
||||
static NV_STATUS service_batch_managed_faults_in_block(uvm_gpu_t *gpu,
|
||||
struct mm_struct *mm,
|
||||
uvm_va_block_t *va_block,
|
||||
NvU32 first_fault_index,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 *block_faults)
|
||||
static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
@@ -1361,17 +1407,16 @@ static NV_STATUS service_batch_managed_faults_in_block(uvm_gpu_t *gpu,
|
||||
|
||||
fault_block_context->operation = UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS;
|
||||
fault_block_context->num_retries = 0;
|
||||
fault_block_context->block_context.mm = mm;
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, &va_block_retry,
|
||||
service_batch_managed_faults_in_block_locked(gpu,
|
||||
va_block,
|
||||
&va_block_retry,
|
||||
first_fault_index,
|
||||
batch_context,
|
||||
block_faults));
|
||||
service_fault_batch_block_locked(gpu,
|
||||
va_block,
|
||||
&va_block_retry,
|
||||
batch_context,
|
||||
first_fault_index,
|
||||
block_faults));
|
||||
|
||||
tracker_status = uvm_tracker_add_tracker_safe(&batch_context->tracker, &va_block->tracker);
|
||||
|
||||
@@ -1390,94 +1435,128 @@ typedef enum
|
||||
FAULT_SERVICE_MODE_CANCEL,
|
||||
} fault_service_mode_t;
|
||||
|
||||
static NV_STATUS service_non_managed_fault(uvm_fault_buffer_entry_t *current_entry,
|
||||
const uvm_fault_buffer_entry_t *previous_entry,
|
||||
NV_STATUS lookup_status,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate,
|
||||
uvm_fault_utlb_info_t *utlb)
|
||||
static NV_STATUS service_fault_batch_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
NV_STATUS status = lookup_status;
|
||||
bool is_duplicate = false;
|
||||
UVM_ASSERT(utlb->num_pending_faults > 0);
|
||||
UVM_ASSERT(lookup_status != NV_OK);
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[first_fault_index];
|
||||
const uvm_fault_buffer_entry_t *previous_entry = first_fault_index > 0 ?
|
||||
batch_context->ordered_fault_cache[first_fault_index - 1] : NULL;
|
||||
bool is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
|
||||
|
||||
if (previous_entry) {
|
||||
is_duplicate = (current_entry->va_space == previous_entry->va_space) &&
|
||||
(current_entry->fault_address == previous_entry->fault_address);
|
||||
|
||||
if (is_duplicate) {
|
||||
// Propagate the is_invalid_prefetch flag across all prefetch faults
|
||||
// on the page
|
||||
if (previous_entry->is_invalid_prefetch)
|
||||
current_entry->is_invalid_prefetch = true;
|
||||
|
||||
// If a page is throttled, all faults on the page must be skipped
|
||||
if (previous_entry->is_throttled)
|
||||
current_entry->is_throttled = true;
|
||||
}
|
||||
}
|
||||
if (is_duplicate)
|
||||
fault_entry_duplicate_flags(current_entry, previous_entry);
|
||||
|
||||
// Generate fault events for all fault packets
|
||||
uvm_perf_event_notify_gpu_fault(¤t_entry->va_space->perf_events,
|
||||
NULL,
|
||||
gpu_va_space->gpu->id,
|
||||
gpu->id,
|
||||
UVM_ID_INVALID,
|
||||
current_entry,
|
||||
batch_context->batch_id,
|
||||
is_duplicate);
|
||||
|
||||
if (status != NV_ERR_INVALID_ADDRESS)
|
||||
return status;
|
||||
// The VA isn't managed. See if ATS knows about it, unless it is a
|
||||
// duplicate and the previous fault was non-fatal so the page has
|
||||
// already been serviced
|
||||
//
|
||||
// TODO: Bug 2103669: Service more than one ATS fault at a time so we
|
||||
// don't do an unconditional VA range lookup for every ATS fault.
|
||||
if (!is_duplicate || previous_entry->is_fatal)
|
||||
status = uvm_ats_service_fault_entry(gpu_va_space, current_entry, ats_invalidate);
|
||||
else
|
||||
status = NV_OK;
|
||||
|
||||
if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
|
||||
// The VA isn't managed. See if ATS knows about it, unless it is a
|
||||
// duplicate and the previous fault was non-fatal so the page has
|
||||
// already been serviced
|
||||
if (!is_duplicate || previous_entry->is_fatal)
|
||||
status = uvm_ats_service_fault_entry(gpu_va_space, current_entry, ats_invalidate);
|
||||
else
|
||||
status = NV_OK;
|
||||
(*block_faults)++;
|
||||
|
||||
update_batch_context(batch_context, current_entry, previous_entry);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void service_fault_batch_fatal(uvm_gpu_t *gpu,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
NV_STATUS status,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[first_fault_index];
|
||||
const uvm_fault_buffer_entry_t *previous_entry = first_fault_index > 0 ?
|
||||
batch_context->ordered_fault_cache[first_fault_index - 1] : NULL;
|
||||
bool is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
|
||||
|
||||
if (is_duplicate)
|
||||
fault_entry_duplicate_flags(current_entry, previous_entry);
|
||||
|
||||
// The VA block cannot be found, set the fatal fault flag,
|
||||
// unless it is a prefetch fault
|
||||
if (current_entry->fault_access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) {
|
||||
current_entry->is_invalid_prefetch = true;
|
||||
}
|
||||
else {
|
||||
// If the VA block cannot be found, set the fatal fault flag,
|
||||
// unless it is a prefetch fault
|
||||
if (current_entry->fault_access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) {
|
||||
current_entry->is_invalid_prefetch = true;
|
||||
}
|
||||
else {
|
||||
current_entry->is_fatal = true;
|
||||
current_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||||
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
}
|
||||
current_entry->is_fatal = true;
|
||||
current_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||||
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
}
|
||||
|
||||
update_batch_context(batch_context, current_entry, previous_entry);
|
||||
|
||||
uvm_perf_event_notify_gpu_fault(¤t_entry->va_space->perf_events,
|
||||
NULL,
|
||||
gpu->id,
|
||||
UVM_ID_INVALID,
|
||||
current_entry,
|
||||
batch_context->batch_id,
|
||||
is_duplicate);
|
||||
|
||||
(*block_faults)++;
|
||||
}
|
||||
|
||||
static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_va_block_context_t *va_block_context =
|
||||
&gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[first_fault_index];
|
||||
struct mm_struct *mm = va_block_context->mm;
|
||||
NvU64 fault_address = current_entry->fault_address;
|
||||
|
||||
(*block_faults) = 0;
|
||||
|
||||
va_range = uvm_va_range_find(va_space, fault_address);
|
||||
status = uvm_va_block_find_create_in_range(va_space, va_range, fault_address, va_block_context, &va_block);
|
||||
if (status == NV_OK) {
|
||||
status = service_fault_batch_block(gpu, va_block, batch_context, first_fault_index, block_faults);
|
||||
}
|
||||
else if ((status == NV_ERR_INVALID_ADDRESS) && uvm_ats_can_service_faults(gpu_va_space, mm)) {
|
||||
status = service_fault_batch_ats(gpu_va_space, mm, batch_context, first_fault_index, block_faults);
|
||||
}
|
||||
else {
|
||||
service_fault_batch_fatal(gpu_va_space->gpu, batch_context, first_fault_index, status, block_faults);
|
||||
|
||||
// Do not fail due to logical errors
|
||||
status = NV_OK;
|
||||
}
|
||||
|
||||
if (is_duplicate)
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances;
|
||||
else
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances - 1;
|
||||
|
||||
if (current_entry->is_invalid_prefetch)
|
||||
batch_context->num_invalid_prefetch_faults += current_entry->num_instances;
|
||||
|
||||
if (current_entry->is_fatal) {
|
||||
utlb->has_fatal_faults = true;
|
||||
batch_context->has_fatal_faults = true;
|
||||
}
|
||||
|
||||
if (current_entry->is_throttled)
|
||||
batch_context->has_throttled_faults = true;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Scan the ordered view of faults and group them by different va_blocks.
|
||||
// Service faults for each va_block, in batch.
|
||||
// Scan the ordered view of faults and group them by different va_blocks
|
||||
// (managed faults) and service faults for each va_block, in batch.
|
||||
// Service non-managed faults one at a time as they are encountered during the
|
||||
// scan.
|
||||
//
|
||||
// This function returns NV_WARN_MORE_PROCESSING_REQUIRED if the fault buffer
|
||||
// was flushed because the needs_fault_buffer_flush flag was set on some GPU VA
|
||||
@@ -1491,9 +1570,9 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
uvm_gpu_va_space_t *gpu_va_space = NULL;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
struct mm_struct *mm = NULL;
|
||||
const bool replay_per_va_block = service_mode != FAULT_SERVICE_MODE_CANCEL &&
|
||||
gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
|
||||
struct mm_struct *mm = NULL;
|
||||
uvm_va_block_context_t *va_block_context =
|
||||
&gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
|
||||
|
||||
@@ -1502,7 +1581,6 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
ats_invalidate->write_faults_in_batch = false;
|
||||
|
||||
for (i = 0; i < batch_context->num_coalesced_faults;) {
|
||||
uvm_va_block_t *va_block;
|
||||
NvU32 block_faults;
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
|
||||
uvm_fault_utlb_info_t *utlb = &batch_context->utlbs[current_entry->fault_source.utlb_id];
|
||||
@@ -1531,18 +1609,16 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
// to remain valid until we release. If no mm is registered, we
|
||||
// can only service managed faults, not ATS/HMM faults.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
va_block_context->mm = mm;
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
|
||||
if (gpu_va_space && gpu_va_space->needs_fault_buffer_flush) {
|
||||
// flush if required and clear the flush flag
|
||||
if (uvm_processor_mask_test_and_clear_atomic(&va_space->needs_fault_buffer_flush, gpu->id)) {
|
||||
status = fault_buffer_flush_locked(gpu,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
|
||||
UVM_FAULT_REPLAY_TYPE_START,
|
||||
batch_context);
|
||||
gpu_va_space->needs_fault_buffer_flush = false;
|
||||
|
||||
if (status == NV_OK)
|
||||
status = NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
|
||||
@@ -1573,51 +1649,22 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
continue;
|
||||
}
|
||||
|
||||
// TODO: Bug 2103669: Service more than one ATS fault at a time so we
|
||||
// don't do an unconditional VA range lookup for every ATS fault.
|
||||
status = uvm_va_block_find_create(va_space,
|
||||
mm,
|
||||
current_entry->fault_address,
|
||||
va_block_context,
|
||||
&va_block);
|
||||
if (status == NV_OK) {
|
||||
status = service_batch_managed_faults_in_block(gpu_va_space->gpu,
|
||||
mm,
|
||||
va_block,
|
||||
i,
|
||||
batch_context,
|
||||
&block_faults);
|
||||
|
||||
// When service_batch_managed_faults_in_block returns != NV_OK
|
||||
// something really bad happened
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
||||
i += block_faults;
|
||||
}
|
||||
else {
|
||||
const uvm_fault_buffer_entry_t *previous_entry = i == 0? NULL : batch_context->ordered_fault_cache[i - 1];
|
||||
|
||||
status = service_non_managed_fault(current_entry,
|
||||
previous_entry,
|
||||
status,
|
||||
gpu_va_space,
|
||||
mm,
|
||||
batch_context,
|
||||
ats_invalidate,
|
||||
utlb);
|
||||
|
||||
// When service_non_managed_fault returns != NV_OK something really
|
||||
// bad happened
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
||||
++i;
|
||||
status = service_fault_batch_dispatch(va_space, gpu_va_space, batch_context, i, &block_faults);
|
||||
// TODO: Bug 3900733: clean up locking in service_fault_batch().
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
mm = NULL;
|
||||
va_space = NULL;
|
||||
continue;
|
||||
}
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
||||
i += block_faults;
|
||||
|
||||
// Don't issue replays in cancel mode
|
||||
if (replay_per_va_block) {
|
||||
if (replay_per_va_block && !batch_context->has_fatal_faults) {
|
||||
status = push_replay_on_gpu(gpu, UVM_FAULT_REPLAY_TYPE_START, batch_context);
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -118,12 +118,20 @@ static bool is_canary(NvU32 val)
|
||||
return (val & ~UVM_SEMAPHORE_CANARY_MASK) == UVM_SEMAPHORE_CANARY_BASE;
|
||||
}
|
||||
|
||||
// Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore
|
||||
// pool?
|
||||
static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
|
||||
{
|
||||
return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
|
||||
}
|
||||
|
||||
static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_semaphore_pool_page_t *pool_page;
|
||||
NvU32 *payloads;
|
||||
size_t i;
|
||||
uvm_rm_mem_type_t rm_mem_type = UVM_RM_MEM_TYPE_SYS;
|
||||
|
||||
uvm_assert_mutex_locked(&pool->mutex);
|
||||
|
||||
@@ -134,10 +142,17 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
|
||||
pool_page->pool = pool;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_all(pool->gpu, UVM_RM_MEM_TYPE_SYS, UVM_SEMAPHORE_PAGE_SIZE, &pool_page->memory);
|
||||
status = uvm_rm_mem_alloc_and_map_all(pool->gpu,
|
||||
rm_mem_type,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
0,
|
||||
&pool_page->memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
// Verify the GPU can access the semaphore pool.
|
||||
UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
|
||||
|
||||
// All semaphores are initially free
|
||||
bitmap_fill(pool_page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
|
||||
@@ -321,7 +336,7 @@ NV_STATUS uvm_gpu_semaphore_pool_map_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu
|
||||
uvm_mutex_lock(&pool->mutex);
|
||||
|
||||
list_for_each_entry(page, &pool->pages, all_pages_node) {
|
||||
status = uvm_rm_mem_map_gpu(page->memory, gpu);
|
||||
status = uvm_rm_mem_map_gpu(page->memory, gpu, 0);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -41,10 +41,12 @@
|
||||
#include "clc6b5.h"
|
||||
#include "clc56f.h"
|
||||
#include "clc7b5.h"
|
||||
#include "clc86f.h"
|
||||
#include "clc8b5.h"
|
||||
|
||||
|
||||
|
||||
|
||||
static int uvm_downgrade_force_membar_sys = 1;
|
||||
module_param(uvm_downgrade_force_membar_sys, uint, 0644);
|
||||
MODULE_PARM_DESC(uvm_downgrade_force_membar_sys, "Force all TLB invalidation downgrades to use MEMBAR_SYS");
|
||||
|
||||
#define CE_OP_COUNT (sizeof(uvm_ce_hal_t) / sizeof(void *))
|
||||
#define HOST_OP_COUNT (sizeof(uvm_host_hal_t) / sizeof(void *))
|
||||
@@ -52,9 +54,6 @@
|
||||
#define FAULT_BUFFER_OP_COUNT (sizeof(uvm_fault_buffer_hal_t) / sizeof(void *))
|
||||
#define ACCESS_COUNTER_BUFFER_OP_COUNT (sizeof(uvm_access_counter_buffer_hal_t) / sizeof(void *))
|
||||
|
||||
|
||||
|
||||
|
||||
// Table for copy engine functions.
|
||||
// Each entry is associated with a copy engine class through the 'class' field.
|
||||
// By setting the 'parent_class' field, a class will inherit the parent class's
|
||||
@@ -66,7 +65,7 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.id = MAXWELL_DMA_COPY_A,
|
||||
.u.ce_ops = {
|
||||
.init = uvm_hal_maxwell_ce_init,
|
||||
.method_validate = uvm_hal_method_validate_stub,
|
||||
.method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.semaphore_release = uvm_hal_maxwell_ce_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_maxwell_ce_semaphore_timestamp,
|
||||
.semaphore_reduction_inc = uvm_hal_maxwell_ce_semaphore_reduction_inc,
|
||||
@@ -74,11 +73,11 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.offset_in_out = uvm_hal_maxwell_ce_offset_in_out,
|
||||
.phys_mode = uvm_hal_maxwell_ce_phys_mode,
|
||||
.plc_mode = uvm_hal_maxwell_ce_plc_mode,
|
||||
.memcopy_validate = uvm_hal_ce_memcopy_validate_stub,
|
||||
.memcopy_is_valid = uvm_hal_ce_memcopy_is_valid_stub,
|
||||
.memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
|
||||
.memcopy = uvm_hal_maxwell_ce_memcopy,
|
||||
.memcopy_v_to_v = uvm_hal_maxwell_ce_memcopy_v_to_v,
|
||||
.memset_validate = uvm_hal_ce_memset_validate_stub,
|
||||
.memset_is_valid = uvm_hal_ce_memset_is_valid_stub,
|
||||
.memset_1 = uvm_hal_maxwell_ce_memset_1,
|
||||
.memset_4 = uvm_hal_maxwell_ce_memset_4,
|
||||
.memset_8 = uvm_hal_maxwell_ce_memset_8,
|
||||
@@ -104,7 +103,15 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
{
|
||||
.id = VOLTA_DMA_COPY_A,
|
||||
.parent_id = PASCAL_DMA_COPY_B,
|
||||
.u.ce_ops = {},
|
||||
.u.ce_ops = {
|
||||
.semaphore_release = uvm_hal_volta_ce_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_volta_ce_semaphore_timestamp,
|
||||
.semaphore_reduction_inc = uvm_hal_volta_ce_semaphore_reduction_inc,
|
||||
.memcopy = uvm_hal_volta_ce_memcopy,
|
||||
.memset_1 = uvm_hal_volta_ce_memset_1,
|
||||
.memset_4 = uvm_hal_volta_ce_memset_4,
|
||||
.memset_8 = uvm_hal_volta_ce_memset_8,
|
||||
},
|
||||
},
|
||||
{
|
||||
.id = TURING_DMA_COPY_A,
|
||||
@@ -115,40 +122,40 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.id = AMPERE_DMA_COPY_A,
|
||||
.parent_id = TURING_DMA_COPY_A,
|
||||
.u.ce_ops = {
|
||||
.method_validate = uvm_hal_ampere_ce_method_validate_c6b5,
|
||||
.method_is_valid = uvm_hal_ampere_ce_method_is_valid_c6b5,
|
||||
.phys_mode = uvm_hal_ampere_ce_phys_mode,
|
||||
.memcopy_validate = uvm_hal_ampere_ce_memcopy_validate_c6b5,
|
||||
.memcopy_is_valid = uvm_hal_ampere_ce_memcopy_is_valid_c6b5,
|
||||
.memcopy_patch_src = uvm_hal_ampere_ce_memcopy_patch_src_c6b5,
|
||||
.memset_validate = uvm_hal_ampere_ce_memset_validate_c6b5,
|
||||
.memset_is_valid = uvm_hal_ampere_ce_memset_is_valid_c6b5,
|
||||
},
|
||||
},
|
||||
{
|
||||
.id = AMPERE_DMA_COPY_B,
|
||||
.parent_id = AMPERE_DMA_COPY_A,
|
||||
.u.ce_ops = {
|
||||
.method_validate = uvm_hal_method_validate_stub,
|
||||
.method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.plc_mode = uvm_hal_ampere_ce_plc_mode_c7b5,
|
||||
.memcopy_validate = uvm_hal_ce_memcopy_validate_stub,
|
||||
.memcopy_is_valid = uvm_hal_ce_memcopy_is_valid_stub,
|
||||
.memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
|
||||
.memset_validate = uvm_hal_ce_memset_validate_stub,
|
||||
.memset_is_valid = uvm_hal_ce_memset_is_valid_stub,
|
||||
},
|
||||
},
|
||||
{
|
||||
.id = HOPPER_DMA_COPY_A,
|
||||
.parent_id = AMPERE_DMA_COPY_B,
|
||||
.u.ce_ops = {
|
||||
.semaphore_release = uvm_hal_hopper_ce_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_hopper_ce_semaphore_timestamp,
|
||||
.semaphore_reduction_inc = uvm_hal_hopper_ce_semaphore_reduction_inc,
|
||||
.offset_out = uvm_hal_hopper_ce_offset_out,
|
||||
.offset_in_out = uvm_hal_hopper_ce_offset_in_out,
|
||||
.memset_1 = uvm_hal_hopper_ce_memset_1,
|
||||
.memset_4 = uvm_hal_hopper_ce_memset_4,
|
||||
.memset_8 = uvm_hal_hopper_ce_memset_8,
|
||||
.memcopy_is_valid = uvm_hal_hopper_ce_memcopy_is_valid,
|
||||
.memset_is_valid = uvm_hal_hopper_ce_memset_is_valid,
|
||||
},
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
// Table for GPFIFO functions. Same idea as the copy engine table.
|
||||
@@ -159,8 +166,8 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.id = KEPLER_CHANNEL_GPFIFO_B,
|
||||
.u.host_ops = {
|
||||
.init = uvm_hal_maxwell_host_init_noop,
|
||||
.method_validate = uvm_hal_method_validate_stub,
|
||||
.sw_method_validate = uvm_hal_method_validate_stub,
|
||||
.method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.sw_method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.wait_for_idle = uvm_hal_maxwell_host_wait_for_idle,
|
||||
.membar_sys = uvm_hal_maxwell_host_membar_sys,
|
||||
// No MEMBAR GPU until Pascal, just do a MEMBAR SYS.
|
||||
@@ -171,6 +178,8 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.semaphore_release = uvm_hal_maxwell_host_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_maxwell_host_semaphore_timestamp,
|
||||
.set_gpfifo_entry = uvm_hal_maxwell_host_set_gpfifo_entry,
|
||||
.set_gpfifo_noop = uvm_hal_maxwell_host_set_gpfifo_noop,
|
||||
.set_gpfifo_pushbuffer_segment_base = uvm_hal_maxwell_host_set_gpfifo_pushbuffer_segment_base_unsupported,
|
||||
.write_gpu_put = uvm_hal_maxwell_host_write_gpu_put,
|
||||
.tlb_invalidate_all = uvm_hal_maxwell_host_tlb_invalidate_all_a16f,
|
||||
.tlb_invalidate_va = uvm_hal_maxwell_host_tlb_invalidate_va,
|
||||
@@ -240,8 +249,8 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.id = AMPERE_CHANNEL_GPFIFO_A,
|
||||
.parent_id = TURING_CHANNEL_GPFIFO_A,
|
||||
.u.host_ops = {
|
||||
.method_validate = uvm_hal_ampere_host_method_validate,
|
||||
.sw_method_validate = uvm_hal_ampere_host_sw_method_validate,
|
||||
.method_is_valid = uvm_hal_ampere_host_method_is_valid,
|
||||
.sw_method_is_valid = uvm_hal_ampere_host_sw_method_is_valid,
|
||||
.clear_faulted_channel_sw_method = uvm_hal_ampere_host_clear_faulted_channel_sw_method,
|
||||
.clear_faulted_channel_register = uvm_hal_ampere_host_clear_faulted_channel_register,
|
||||
.tlb_invalidate_all = uvm_hal_ampere_host_tlb_invalidate_all,
|
||||
@@ -249,23 +258,23 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.tlb_invalidate_test = uvm_hal_ampere_host_tlb_invalidate_test,
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
.id = HOPPER_CHANNEL_GPFIFO_A,
|
||||
.parent_id = AMPERE_CHANNEL_GPFIFO_A,
|
||||
.u.host_ops = {
|
||||
.method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.sw_method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.semaphore_acquire = uvm_hal_hopper_host_semaphore_acquire,
|
||||
.semaphore_release = uvm_hal_hopper_host_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_hopper_host_semaphore_timestamp,
|
||||
.tlb_invalidate_all = uvm_hal_hopper_host_tlb_invalidate_all,
|
||||
.tlb_invalidate_va = uvm_hal_hopper_host_tlb_invalidate_va,
|
||||
.tlb_invalidate_test = uvm_hal_hopper_host_tlb_invalidate_test,
|
||||
.cancel_faults_va = uvm_hal_hopper_cancel_faults_va,
|
||||
.set_gpfifo_entry = uvm_hal_hopper_host_set_gpfifo_entry,
|
||||
.set_gpfifo_pushbuffer_segment_base = uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base,
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t arch_table[] =
|
||||
@@ -326,43 +335,23 @@ static uvm_hal_class_ops_t arch_table[] =
|
||||
.mmu_client_id_to_utlb_id = uvm_hal_ampere_mmu_client_id_to_utlb_id,
|
||||
},
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
|
||||
.u.arch_ops = {
|
||||
.init_properties = uvm_hal_ada_arch_init_properties,
|
||||
},
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.u.arch_ops = {
|
||||
.init_properties = uvm_hal_hopper_arch_init_properties,
|
||||
.mmu_mode_hal = uvm_hal_mmu_mode_hopper,
|
||||
.mmu_engine_id_to_type = uvm_hal_hopper_mmu_engine_id_to_type,
|
||||
.mmu_client_id_to_utlb_id = uvm_hal_hopper_mmu_client_id_to_utlb_id,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
@@ -430,33 +419,18 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
|
||||
.u.fault_buffer_ops = {}
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
|
||||
.u.fault_buffer_ops = {}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.u.fault_buffer_ops = {
|
||||
.get_ve_id = uvm_hal_hopper_fault_buffer_get_ve_id,
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t access_counter_buffer_table[] =
|
||||
@@ -509,105 +483,18 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
|
||||
.u.access_counter_buffer_ops = {}
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
|
||||
.u.access_counter_buffer_ops = {}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.u.access_counter_buffer_ops = {}
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static inline uvm_hal_class_ops_t *ops_find_by_id(uvm_hal_class_ops_t *table, NvU32 row_count, NvU32 id)
|
||||
{
|
||||
NvLength i;
|
||||
@@ -711,17 +598,6 @@ NV_STATUS uvm_hal_init_table(void)
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -772,27 +648,23 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counter_buffer_hal = &class_ops->u.access_counter_buffer_ops;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
// Access counters are currently not supported in vGPU.
|
||||
//
|
||||
// TODO: Bug 200692962: Add support for access counters in vGPU
|
||||
if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE)
|
||||
parent_gpu->access_counters_supported = false;
|
||||
}
|
||||
|
||||
void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
parent_gpu->arch_hal->init_properties(parent_gpu);
|
||||
|
||||
// Override the HAL when in non-passthrough virtualization
|
||||
// TODO: Bug 200692962: [UVM] Add support for access counters in UVM on SR-IOV configurations
|
||||
if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE)
|
||||
parent_gpu->access_counters_supported = false;
|
||||
hal_override_properties(parent_gpu);
|
||||
}
|
||||
|
||||
void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar)
|
||||
@@ -811,6 +683,44 @@ void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar)
|
||||
uvm_hal_membar(gpu, push, membar);
|
||||
}
|
||||
|
||||
bool uvm_hal_membar_before_semaphore(uvm_push_t *push)
|
||||
{
|
||||
uvm_membar_t membar = uvm_push_get_and_reset_membar_flag(push);
|
||||
|
||||
if (membar == UVM_MEMBAR_NONE) {
|
||||
// No MEMBAR requested, don't use a flush.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (membar == UVM_MEMBAR_GPU) {
|
||||
// MEMBAR GPU requested, do it on the HOST and skip the engine flush as
|
||||
// it doesn't have this capability.
|
||||
uvm_hal_wfi_membar(push, UVM_MEMBAR_GPU);
|
||||
return false;
|
||||
}
|
||||
|
||||
// By default do a MEMBAR SYS and for that we can just use flush on the
|
||||
// semaphore operation.
|
||||
return true;
|
||||
}
|
||||
|
||||
uvm_membar_t uvm_hal_downgrade_membar_type(uvm_gpu_t *gpu, bool is_local_vidmem)
|
||||
{
|
||||
// If the mapped memory was local, and we're not using a coherence protocol,
|
||||
// we only need a GPU-local membar. This is because all accesses to this
|
||||
// memory, including those from other processors like the CPU or peer GPUs,
|
||||
// must come through this GPU's L2. In all current architectures, MEMBAR_GPU
|
||||
// is sufficient to resolve ordering at the L2 level.
|
||||
if (is_local_vidmem && !gpu->parent->numa_info.enabled && !uvm_downgrade_force_membar_sys)
|
||||
return UVM_MEMBAR_GPU;
|
||||
|
||||
// If the mapped memory was remote, or if a coherence protocol can cache
|
||||
// this GPU's memory, then there are external ways for other processors to
|
||||
// access the memory without always going the local GPU L2, so we must use a
|
||||
// MEMBAR_SYS.
|
||||
return UVM_MEMBAR_SYS;
|
||||
}
|
||||
|
||||
const char *uvm_aperture_string(uvm_aperture_t aperture)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_APERTURE_MAX != 12);
|
||||
@@ -971,12 +881,12 @@ void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_e
|
||||
UVM_DBG_PRINT(" tag %x\n", entry->tag);
|
||||
}
|
||||
|
||||
bool uvm_hal_method_validate_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_hal_method_is_valid_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_hal_ce_memcopy_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
bool uvm_hal_ce_memcopy_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
@@ -985,7 +895,7 @@ void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
|
||||
{
|
||||
}
|
||||
|
||||
bool uvm_hal_ce_memset_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -34,24 +34,20 @@
|
||||
|
||||
// A dummy method validation that always returns true; it can be used to skip
|
||||
// CE/Host/SW method validations for a given architecture
|
||||
bool uvm_hal_method_validate_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_method_is_valid_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
|
||||
typedef void (*uvm_hal_init_t)(uvm_push_t *push);
|
||||
void uvm_hal_maxwell_ce_init(uvm_push_t *push);
|
||||
void uvm_hal_maxwell_host_init_noop(uvm_push_t *push);
|
||||
void uvm_hal_pascal_host_init(uvm_push_t *push);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Host method validation
|
||||
typedef bool (*uvm_hal_host_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
typedef bool (*uvm_hal_host_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
|
||||
// SW method validation
|
||||
typedef bool (*uvm_hal_host_sw_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_host_sw_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
typedef bool (*uvm_hal_host_sw_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_host_sw_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
|
||||
// Wait for idle
|
||||
typedef void (*uvm_hal_wait_for_idle_t)(uvm_push_t *push);
|
||||
@@ -118,12 +114,10 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar);
|
||||
|
||||
// Issue a TLB invalidate applying to the specified VA range in a PDB.
|
||||
//
|
||||
@@ -183,15 +177,13 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar);
|
||||
|
||||
typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
@@ -205,11 +197,9 @@ void uvm_hal_pascal_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
||||
|
||||
// By default all semaphore release operations include a membar sys before the
|
||||
// operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
|
||||
@@ -217,18 +207,11 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
typedef void (*uvm_hal_semaphore_release_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
|
||||
|
||||
void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_volta_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
// Release a semaphore including a timestamp at the specific GPU VA.
|
||||
//
|
||||
@@ -238,31 +221,29 @@ void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32
|
||||
typedef void (*uvm_hal_semaphore_timestamp_t)(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
|
||||
|
||||
|
||||
void uvm_hal_volta_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
|
||||
void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_volta_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
|
||||
typedef void (*uvm_hal_semaphore_acquire_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_turing_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
||||
void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
||||
void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
||||
void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
||||
|
||||
typedef void (*uvm_hal_host_set_gpfifo_noop_t)(NvU64 *fifo_entry);
|
||||
void uvm_hal_maxwell_host_set_gpfifo_noop(NvU64 *fifo_entry);
|
||||
|
||||
typedef void (*uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va);
|
||||
void uvm_hal_maxwell_host_set_gpfifo_pushbuffer_segment_base_unsupported(NvU64 *fifo_entry, NvU64 pushbuffer_va);
|
||||
void uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base(NvU64 *fifo_entry, NvU64 pushbuffer_va);
|
||||
|
||||
typedef void (*uvm_hal_host_write_gpu_put_t)(uvm_channel_t *channel, NvU32 gpu_put);
|
||||
void uvm_hal_maxwell_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put);
|
||||
@@ -277,16 +258,12 @@ NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu);
|
||||
typedef void (*uvm_hal_ce_offset_out_t)(uvm_push_t *push, NvU64 offset);
|
||||
void uvm_hal_maxwell_ce_offset_out(uvm_push_t *push, NvU64 offset);
|
||||
void uvm_hal_pascal_ce_offset_out(uvm_push_t *push, NvU64 offset);
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_ce_offset_out(uvm_push_t *push, NvU64 offset);
|
||||
|
||||
typedef void (*uvm_hal_ce_offset_in_out_t)(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
|
||||
void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
|
||||
void uvm_hal_pascal_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
|
||||
|
||||
typedef NvU32 (*uvm_hal_ce_phys_mode_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
NvU32 uvm_hal_maxwell_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
@@ -297,16 +274,17 @@ NvU32 uvm_hal_maxwell_ce_plc_mode(void);
|
||||
NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void);
|
||||
|
||||
// CE method validation
|
||||
typedef bool (*uvm_hal_ce_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
typedef bool (*uvm_hal_ce_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
|
||||
// Memcopy validation.
|
||||
// The validation happens at the start of the memcopy (uvm_hal_memcopy_t)
|
||||
// execution. Use uvm_hal_ce_memcopy_validate_stub to skip the validation for
|
||||
// execution. Use uvm_hal_ce_memcopy_is_valid_stub to skip the validation for
|
||||
// a given architecture.
|
||||
typedef bool (*uvm_hal_ce_memcopy_validate)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_ce_memcopy_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
typedef bool (*uvm_hal_ce_memcopy_is_valid)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_ce_memcopy_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
|
||||
// Patching of the memcopy source; if not needed for a given architecture use
|
||||
// the (empty) uvm_hal_ce_memcopy_patch_src_stub implementation
|
||||
@@ -321,6 +299,7 @@ void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_
|
||||
// UVM_PUSH_FLAG_NEXT_CE_* flags with uvm_push_set_flag().
|
||||
typedef void (*uvm_hal_memcopy_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
|
||||
void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
|
||||
void uvm_hal_volta_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
|
||||
|
||||
// Simple wrapper for uvm_hal_memcopy_t with both addresses being virtual
|
||||
typedef void (*uvm_hal_memcopy_v_to_v_t)(uvm_push_t *push, NvU64 dst, NvU64 src, size_t size);
|
||||
@@ -328,11 +307,12 @@ void uvm_hal_maxwell_ce_memcopy_v_to_v(uvm_push_t *push, NvU64 dst, NvU64 src, s
|
||||
|
||||
// Memset validation.
|
||||
// The validation happens at the start of the memset (uvm_hal_memset_*_t)
|
||||
// execution. Use uvm_hal_ce_memset_validate_stub to skip the validation for
|
||||
// execution. Use uvm_hal_ce_memset_is_valid_stub to skip the validation for
|
||||
// a given architecture.
|
||||
typedef bool (*uvm_hal_ce_memset_validate)(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_ce_memset_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
typedef bool (*uvm_hal_ce_memset_is_valid)(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
|
||||
// Memset size bytes at dst to a given N-byte input value.
|
||||
//
|
||||
@@ -354,11 +334,13 @@ void uvm_hal_maxwell_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32
|
||||
void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
|
||||
void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);
|
||||
|
||||
void uvm_hal_volta_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
|
||||
void uvm_hal_volta_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
|
||||
void uvm_hal_volta_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
|
||||
void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
|
||||
void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
|
||||
|
||||
// Increments the semaphore by 1, or resets to 0 if the incremented value would
|
||||
// exceed the payload.
|
||||
@@ -369,9 +351,8 @@ void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value,
|
||||
typedef void (*uvm_hal_semaphore_reduction_inc_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
|
||||
|
||||
void uvm_hal_volta_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
// Initialize GPU architecture dependent properties
|
||||
typedef void (*uvm_hal_arch_init_properties_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
@@ -380,12 +361,8 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Retrieve the page-tree HAL for a given big page size
|
||||
typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU32 big_page_size);
|
||||
@@ -396,9 +373,7 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size);
|
||||
|
||||
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size);
|
||||
void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
@@ -412,18 +387,14 @@ uvm_mmu_engine_type_t uvm_hal_maxwell_mmu_engine_id_to_type_unsupported(NvU16 mm
|
||||
uvm_mmu_engine_type_t uvm_hal_volta_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
||||
uvm_mmu_engine_type_t uvm_hal_turing_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
||||
uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
||||
|
||||
|
||||
|
||||
uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
||||
|
||||
typedef NvU16 (*uvm_hal_mmu_client_id_to_utlb_id_t)(NvU16 client_id);
|
||||
NvU16 uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported(NvU16 client_id);
|
||||
NvU16 uvm_hal_pascal_mmu_client_id_to_utlb_id(NvU16 client_id);
|
||||
NvU16 uvm_hal_volta_mmu_client_id_to_utlb_id(NvU16 client_id);
|
||||
NvU16 uvm_hal_ampere_mmu_client_id_to_utlb_id(NvU16 client_id);
|
||||
|
||||
|
||||
|
||||
NvU16 uvm_hal_hopper_mmu_client_id_to_utlb_id(NvU16 client_id);
|
||||
|
||||
// Replayable faults
|
||||
typedef void (*uvm_hal_enable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
@@ -477,9 +448,7 @@ void uvm_hal_volta_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_fault_buffer_entry_t *buffer_entry);
|
||||
void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_turing_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
|
||||
|
||||
|
||||
NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
|
||||
|
||||
bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
@@ -529,12 +498,10 @@ void uvm_hal_volta_cancel_faults_va(uvm_push_t *push,
|
||||
const uvm_fault_buffer_entry_t *fault_entry,
|
||||
uvm_fault_cancel_va_mode_t cancel_va_mode);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void uvm_hal_hopper_cancel_faults_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
const uvm_fault_buffer_entry_t *fault_entry,
|
||||
uvm_fault_cancel_va_mode_t cancel_va_mode);
|
||||
|
||||
typedef void (*uvm_hal_host_clear_faulted_channel_method_t)(uvm_push_t *push,
|
||||
uvm_user_channel_t *user_channel,
|
||||
@@ -619,44 +586,11 @@ void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
|
||||
void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
struct uvm_host_hal_struct
|
||||
{
|
||||
uvm_hal_init_t init;
|
||||
uvm_hal_host_method_validate method_validate;
|
||||
uvm_hal_host_sw_method_validate sw_method_validate;
|
||||
uvm_hal_host_method_is_valid method_is_valid;
|
||||
uvm_hal_host_sw_method_is_valid sw_method_is_valid;
|
||||
uvm_hal_wait_for_idle_t wait_for_idle;
|
||||
uvm_hal_membar_sys_t membar_sys;
|
||||
uvm_hal_membar_gpu_t membar_gpu;
|
||||
@@ -666,6 +600,8 @@ struct uvm_host_hal_struct
|
||||
uvm_hal_semaphore_acquire_t semaphore_acquire;
|
||||
uvm_hal_semaphore_timestamp_t semaphore_timestamp;
|
||||
uvm_hal_host_set_gpfifo_entry_t set_gpfifo_entry;
|
||||
uvm_hal_host_set_gpfifo_noop_t set_gpfifo_noop;
|
||||
uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t set_gpfifo_pushbuffer_segment_base;
|
||||
uvm_hal_host_write_gpu_put_t write_gpu_put;
|
||||
uvm_hal_host_tlb_invalidate_all_t tlb_invalidate_all;
|
||||
uvm_hal_host_tlb_invalidate_va_t tlb_invalidate_va;
|
||||
@@ -686,18 +622,18 @@ struct uvm_host_hal_struct
|
||||
struct uvm_ce_hal_struct
|
||||
{
|
||||
uvm_hal_init_t init;
|
||||
uvm_hal_ce_method_validate method_validate;
|
||||
uvm_hal_ce_method_is_valid method_is_valid;
|
||||
uvm_hal_semaphore_release_t semaphore_release;
|
||||
uvm_hal_semaphore_timestamp_t semaphore_timestamp;
|
||||
uvm_hal_ce_offset_out_t offset_out;
|
||||
uvm_hal_ce_offset_in_out_t offset_in_out;
|
||||
uvm_hal_ce_phys_mode_t phys_mode;
|
||||
uvm_hal_ce_plc_mode_t plc_mode;
|
||||
uvm_hal_ce_memcopy_validate memcopy_validate;
|
||||
uvm_hal_ce_memcopy_is_valid memcopy_is_valid;
|
||||
uvm_hal_ce_memcopy_patch_src memcopy_patch_src;
|
||||
uvm_hal_memcopy_t memcopy;
|
||||
uvm_hal_memcopy_v_to_v_t memcopy_v_to_v;
|
||||
uvm_hal_ce_memset_validate memset_validate;
|
||||
uvm_hal_ce_memset_is_valid memset_is_valid;
|
||||
uvm_hal_memset_1_t memset_1;
|
||||
uvm_hal_memset_4_t memset_4;
|
||||
uvm_hal_memset_8_t memset_8;
|
||||
@@ -742,17 +678,6 @@ struct uvm_access_counter_buffer_hal_struct
|
||||
uvm_hal_access_counter_buffer_entry_size_t entry_size;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// id is either a hardware class or GPU architecture
|
||||
@@ -775,10 +700,6 @@ typedef struct
|
||||
// access_counter_buffer_ops: id is an architecture
|
||||
uvm_access_counter_buffer_hal_t access_counter_buffer_ops;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} u;
|
||||
} uvm_hal_class_ops_t;
|
||||
|
||||
@@ -815,4 +736,20 @@ static void uvm_hal_wfi_membar(uvm_push_t *push, uvm_membar_t membar)
|
||||
// appropriate Host membar(s) after a TLB invalidate.
|
||||
void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar);
|
||||
|
||||
// Internal helper used by architectures/engines that don't support a FLUSH
|
||||
// operation with a FLUSH_TYPE on the semaphore release method, e.g., pre-Volta
|
||||
// CE. It inspects and clears the MEMBAR push flags, issues a Host WFI +
|
||||
// membar.gpu for MEMBAR_GPU or returns true to indicate the caller to use the
|
||||
// engine's FLUSH for MEMBAR_SYS.
|
||||
bool uvm_hal_membar_before_semaphore(uvm_push_t *push);
|
||||
|
||||
// Determine the appropriate membar to use on TLB invalidates for GPU PTE
|
||||
// permissions downgrades.
|
||||
//
|
||||
// gpu is the GPU on which the TLB invalidate is happening.
|
||||
//
|
||||
// is_local_vidmem indicates whether all mappings being invalidated pointed to
|
||||
// the local GPU's memory.
|
||||
uvm_membar_t uvm_hal_downgrade_membar_type(uvm_gpu_t *gpu, bool is_local_vidmem);
|
||||
|
||||
#endif // __UVM_HAL_H__
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -37,19 +37,10 @@ typedef struct
|
||||
// This stores pointers to uvm_va_block_t for HMM blocks.
|
||||
uvm_range_tree_t blocks;
|
||||
uvm_mutex_t blocks_lock;
|
||||
|
||||
// TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
|
||||
// This flag is set true by default for each va_space so most processes
|
||||
// don't see partially implemented UVM-HMM behavior but can be enabled by
|
||||
// test code for a given va_space so the test process can do some interim
|
||||
// testing. It needs to be a separate flag instead of modifying
|
||||
// uvm_disable_hmm or va_space->flags since those are user inputs and are
|
||||
// visible/checked by test code.
|
||||
// Remove this when UVM-HMM is fully integrated into chips_a.
|
||||
bool disable;
|
||||
} uvm_hmm_va_space_t;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
||||
// Tells whether HMM is enabled for the given va_space.
|
||||
// If it is not enabled, all of the functions below are no-ops.
|
||||
bool uvm_hmm_is_enabled(uvm_va_space_t *va_space);
|
||||
@@ -62,15 +53,25 @@ typedef struct
|
||||
// and the va_space lock must be held in write mode.
|
||||
NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space);
|
||||
|
||||
// Initialize HMM for the given the va_space for testing.
|
||||
// Bug 1750144: UVM: Add HMM (Heterogeneous Memory Management) support to
|
||||
// the UVM driver. Remove this when enough HMM functionality is implemented.
|
||||
NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space);
|
||||
|
||||
// Destroy any HMM state for the given the va_space.
|
||||
// Locking: va_space lock must be held in write mode.
|
||||
void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space);
|
||||
|
||||
// Unmap all page tables in this VA space which map memory owned by this
|
||||
// GPU. Any memory still resident on this GPU will be evicted to system
|
||||
// memory. Note that 'mm' can be NULL (e.g., when closing the UVM file)
|
||||
// in which case any GPU memory is simply freed.
|
||||
// Locking: if mm is not NULL, the caller must hold mm->mmap_lock in at
|
||||
// least read mode and the va_space lock must be held in write mode.
|
||||
void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_struct *mm);
|
||||
|
||||
// Destroy the VA space's mappings on the GPU, if it has any.
|
||||
// Locking: if mm is not NULL, the caller must hold mm->mmap_lock in at
|
||||
// least read mode and the va_space lock must be held in write mode.
|
||||
void uvm_hmm_remove_gpu_va_space(uvm_va_space_t *va_space,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm);
|
||||
|
||||
// Find an existing HMM va_block.
|
||||
// This function can be called without having retained and locked the mm,
|
||||
// but in that case, the only allowed operations on the returned block are
|
||||
@@ -83,10 +84,33 @@ typedef struct
|
||||
NvU64 addr,
|
||||
uvm_va_block_t **va_block_ptr);
|
||||
|
||||
// Find an existing HMM va_block when processing a CPU fault and try to
|
||||
// isolate and lock the faulting page.
|
||||
// Return NV_ERR_INVALID_ADDRESS if the block is not found,
|
||||
// NV_ERR_BUSY_RETRY if the page could not be locked, and
|
||||
// NV_OK if the block is found and the page is locked. Also,
|
||||
// uvm_hmm_cpu_fault_finish() must be called if NV_OK is returned.
|
||||
// Locking: This must be called with the vma->vm_mm locked and the va_space
|
||||
// read locked.
|
||||
NV_STATUS uvm_hmm_va_block_cpu_find(uvm_va_space_t *va_space,
|
||||
uvm_service_block_context_t *service_context,
|
||||
struct vm_fault *vmf,
|
||||
uvm_va_block_t **va_block_ptr);
|
||||
|
||||
// This must be called after uvm_va_block_cpu_fault() if
|
||||
// uvm_hmm_va_block_cpu_find() returns NV_OK.
|
||||
// Locking: This must be called with the vma->vm_mm locked and the va_space
|
||||
// read locked.
|
||||
void uvm_hmm_cpu_fault_finish(uvm_service_block_context_t *service_context);
|
||||
|
||||
// Find or create a new HMM va_block.
|
||||
//
|
||||
// Return NV_ERR_INVALID_ADDRESS if there is no VMA associated with the
|
||||
// address 'addr' or the VMA does not have at least PROT_READ permission.
|
||||
// The caller is also responsible for checking that there is no UVM
|
||||
// va_range covering the given address before calling this function.
|
||||
// If va_block_context is not NULL, the VMA is cached in
|
||||
// va_block_context->hmm.vma.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read and the va_space lock at least for read.
|
||||
NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
@@ -94,23 +118,53 @@ typedef struct
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_t **va_block_ptr);
|
||||
|
||||
// Find the VMA for the given address and set va_block_context->hmm.vma.
|
||||
// Return NV_ERR_INVALID_ADDRESS if va_block_context->mm is NULL or there
|
||||
// is no VMA associated with the address 'addr' or the VMA does not have at
|
||||
// least PROT_READ permission.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read or mm equal to NULL.
|
||||
NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr);
|
||||
|
||||
// If va_block is a HMM va_block, check that va_block_context->hmm.vma is
|
||||
// not NULL and covers the given region. This always returns true and is
|
||||
// intended to only be used with UVM_ASSERT().
|
||||
// Locking: This function must be called with the va_block lock held and if
|
||||
// va_block is a HMM block, va_block_context->mm must be retained and
|
||||
// locked for at least read.
|
||||
bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Find or create a HMM va_block and mark it so the next va_block split
|
||||
// will fail for testing purposes.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read and the va_space lock at least for read.
|
||||
NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr);
|
||||
|
||||
// Reclaim any HMM va_blocks that overlap the given range.
|
||||
// Note that 'end' is inclusive.
|
||||
// A HMM va_block can be reclaimed if it doesn't contain any "valid" VMAs.
|
||||
// See uvm_hmm_vma_is_valid() for details.
|
||||
// Note that 'end' is inclusive. If mm is NULL, any HMM va_block in the
|
||||
// range will be reclaimed which assumes that the mm is being torn down
|
||||
// and was not retained.
|
||||
// Return values:
|
||||
// NV_ERR_NO_MEMORY: Reclaim required a block split, which failed.
|
||||
// NV_OK: There were no HMM blocks in the range, or all HMM
|
||||
// blocks in the range were successfully reclaimed.
|
||||
// Locking: If mm is not NULL, it must equal va_space_mm.mm, the caller
|
||||
// must hold a reference on it, and it must be locked for at least read
|
||||
// mode. Also, the va_space lock must be held in write mode.
|
||||
// must retain it with uvm_va_space_mm_or_current_retain() or be sure that
|
||||
// mm->mm_users is not zero, and it must be locked for at least read mode.
|
||||
// Also, the va_space lock must be held in write mode.
|
||||
// TODO: Bug 3372166: add asynchronous va_block reclaim.
|
||||
NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 start,
|
||||
NvU64 end);
|
||||
|
||||
// This is called to update the va_space tree of HMM va_blocks after an
|
||||
// existing va_block is split.
|
||||
// Locking: the va_space lock must be held in write mode.
|
||||
void uvm_hmm_va_block_split_tree(uvm_va_block_t *existing_va_block, uvm_va_block_t *new_block);
|
||||
|
||||
// Find a HMM policy range that needs to be split. The callback function
|
||||
// 'split_needed_cb' returns true if the policy range needs to be split.
|
||||
// If a policy range is split, the existing range is truncated to
|
||||
@@ -132,7 +186,8 @@ typedef struct
|
||||
NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
|
||||
uvm_processor_id_t preferred_location,
|
||||
NvU64 base,
|
||||
NvU64 last_address);
|
||||
NvU64 last_address,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// Set the accessed by policy for the given range. This also tries to
|
||||
// map the range. Note that 'last_address' is inclusive.
|
||||
@@ -142,13 +197,23 @@ typedef struct
|
||||
uvm_processor_id_t processor_id,
|
||||
bool set_bit,
|
||||
NvU64 base,
|
||||
NvU64 last_address);
|
||||
NvU64 last_address,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// Deferred work item to reestablish accessed by mappings after eviction. On
|
||||
// GPUs with access counters enabled, the evicted GPU will also get remote
|
||||
// mappings.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked
|
||||
// and the va_space lock must be held in at least read mode.
|
||||
void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *block_context);
|
||||
|
||||
// Set the read duplication policy for the given range.
|
||||
// Note that 'last_address' is inclusive.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
|
||||
// and the va_space lock must be held in write mode.
|
||||
// TODO: Bug 2046423: need to implement read duplication support in Linux.
|
||||
// TODO: Bug 3660922: need to implement HMM read duplication support.
|
||||
static NV_STATUS uvm_hmm_set_read_duplication(uvm_va_space_t *va_space,
|
||||
uvm_read_duplication_policy_t new_policy,
|
||||
NvU64 base,
|
||||
@@ -159,10 +224,11 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Set va_block_context->policy to the policy covering the given address
|
||||
// 'addr' and update the ending address '*endp' to the minimum of *endp,
|
||||
// va_block_context->hmm.vma->vm_end - 1, and the ending address of the
|
||||
// policy range.
|
||||
// This function assigns va_block_context->policy to the policy covering
|
||||
// the given address 'addr' and assigns the ending address '*endp' to the
|
||||
// minimum of va_block->end, va_block_context->hmm.vma->vm_end - 1, and the
|
||||
// ending address of the policy range. Note that va_block_context->hmm.vma
|
||||
// is expected to be initialized before calling this function.
|
||||
// Locking: This function must be called with
|
||||
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
|
||||
// the va_block lock held.
|
||||
@@ -171,11 +237,11 @@ typedef struct
|
||||
unsigned long addr,
|
||||
NvU64 *endp);
|
||||
|
||||
// Find the VMA for the page index 'page_index',
|
||||
// set va_block_context->policy to the policy covering the given address,
|
||||
// and update the ending page range '*outerp' to the minimum of *outerp,
|
||||
// va_block_context->hmm.vma->vm_end - 1, and the ending address of the
|
||||
// policy range.
|
||||
// This function finds the VMA for the page index 'page_index' and assigns
|
||||
// it to va_block_context->vma, sets va_block_context->policy to the policy
|
||||
// covering the given address, and sets the ending page range '*outerp'
|
||||
// to the minimum of *outerp, va_block_context->hmm.vma->vm_end - 1, the
|
||||
// ending address of the policy range, and va_block->end.
|
||||
// Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise, NV_OK.
|
||||
// Locking: This function must be called with
|
||||
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
|
||||
@@ -185,6 +251,149 @@ typedef struct
|
||||
uvm_page_index_t page_index,
|
||||
uvm_page_index_t *outerp);
|
||||
|
||||
// Clear thrashing policy information from all HMM va_blocks.
|
||||
// Locking: va_space lock must be held in write mode.
|
||||
NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space);
|
||||
|
||||
// Return the expanded region around 'address' limited to the intersection
|
||||
// of va_block start/end, vma start/end, and policy start/end.
|
||||
// va_block_context must not be NULL, va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()), and
|
||||
// va_block_context->policy must be valid.
|
||||
// Locking: the caller must hold mm->mmap_lock in at least read mode, the
|
||||
// va_space lock must be held in at least read mode, and the va_block lock
|
||||
// held.
|
||||
uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 address);
|
||||
|
||||
// Return the logical protection allowed of a HMM va_block for the page at
|
||||
// the given address.
|
||||
// va_block_context must not be NULL and va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()).
|
||||
// Locking: the caller must hold va_block_context->mm mmap_lock in at least
|
||||
// read mode.
|
||||
uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 addr);
|
||||
|
||||
// This is called to service a GPU fault.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked,
|
||||
// the va_space read lock must be held, and the va_block lock held.
|
||||
NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
uvm_processor_id_t new_residency,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_service_block_context_t *service_context);
|
||||
|
||||
// This is called to migrate a region within a HMM va_block.
|
||||
// va_block_context must not be NULL and va_block_context->policy and
|
||||
// va_block_context->hmm.vma must be valid.
|
||||
// Locking: the va_block_context->mm must be retained, mmap_lock must be
|
||||
// locked, and the va_block lock held.
|
||||
NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_make_resident_cause_t cause);
|
||||
|
||||
// This is called to migrate an address range of HMM allocations via
|
||||
// UvmMigrate().
|
||||
//
|
||||
// va_block_context must not be NULL. The caller is not required to set
|
||||
// va_block_context->policy or va_block_context->hmm.vma.
|
||||
//
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked and
|
||||
// the va_space read lock must be held.
|
||||
NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 base,
|
||||
NvU64 length,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_migrate_mode_t mode,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// This sets the va_block_context->hmm.src_pfns[] to the ZONE_DEVICE private
|
||||
// PFN for the GPU chunk memory.
|
||||
NV_STATUS uvm_hmm_va_block_evict_chunk_prep(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_gpu_chunk_t *gpu_chunk,
|
||||
uvm_va_block_region_t chunk_region);
|
||||
|
||||
// Migrate pages to system memory for the given page mask.
|
||||
// Note that the mmap lock is not held and there is no MM retained.
|
||||
// This must be called after uvm_hmm_va_block_evict_chunk_prep() has
|
||||
// initialized va_block_context->hmm.src_pfns[] for the source GPU physical
|
||||
// PFNs being migrated. Note that the input mask 'pages_to_evict' can be
|
||||
// modified. If any of the evicted pages has the accessed by policy set,
|
||||
// then record that by setting out_accessed_by_set.
|
||||
// Locking: the va_block lock must be locked.
|
||||
NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region,
|
||||
bool *out_accessed_by_set);
|
||||
|
||||
// Migrate pages from the given GPU to system memory for the given page
|
||||
// mask and region. va_block_context must not be NULL.
|
||||
// Note that the mmap lock is not held and there is no MM retained.
|
||||
// Locking: the va_block lock must be locked.
|
||||
NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Migrate a GPU chunk to system memory. This called to remove CPU page
|
||||
// table references to device private struct pages for the given GPU after
|
||||
// all other references in va_blocks have been released and the GPU is
|
||||
// in the process of being removed/torn down. Note that there is no mm,
|
||||
// VMA, va_block or any user channel activity on this GPU.
|
||||
NV_STATUS uvm_hmm_pmm_gpu_evict_chunk(uvm_gpu_t *gpu,
|
||||
uvm_gpu_chunk_t *gpu_chunk);
|
||||
|
||||
// This returns what would be the intersection of va_block start/end and
|
||||
// VMA start/end-1 for the given 'lookup_address' if
|
||||
// uvm_hmm_va_block_find_create() was called.
|
||||
// Locking: the caller must hold mm->mmap_lock in at least read mode and
|
||||
// the va_space lock must be held in at least read mode.
|
||||
NV_STATUS uvm_hmm_va_block_range_bounds(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 lookup_address,
|
||||
NvU64 *startp,
|
||||
NvU64 *endp,
|
||||
UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params);
|
||||
|
||||
// This updates the HMM va_block CPU residency information for a single
|
||||
// page at 'lookup_address' by calling hmm_range_fault(). If 'populate' is
|
||||
// true, the CPU page will be faulted in read/write or read-only
|
||||
// (depending on the permission of the underlying VMA at lookup_address).
|
||||
// Locking: the caller must hold mm->mmap_lock in at least read mode and
|
||||
// the va_space lock must be held in at least read mode.
|
||||
NV_STATUS uvm_hmm_va_block_update_residency_info(uvm_va_block_t *va_block,
|
||||
struct mm_struct *mm,
|
||||
NvU64 lookup_address,
|
||||
bool populate);
|
||||
|
||||
NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params,
|
||||
struct file *filp);
|
||||
|
||||
NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
UVM_TEST_VA_RANGE_INFO_PARAMS *params);
|
||||
|
||||
// Return true if GPU fault new residency location should be system memory.
|
||||
// va_block_context must not be NULL and va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()).
|
||||
// TODO: Bug 3660968: Remove this hack as soon as HMM migration is
|
||||
// implemented for VMAs other than anonymous memory.
|
||||
// Locking: the va_block lock must be held. If the va_block is a HMM
|
||||
// va_block, the va_block_context->mm must be retained and locked for least
|
||||
// read.
|
||||
bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context);
|
||||
|
||||
#else // UVM_IS_CONFIG_HMM()
|
||||
|
||||
static bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
|
||||
@@ -202,12 +411,17 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space)
|
||||
static void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
{
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
}
|
||||
|
||||
static void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
static void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
static void uvm_hmm_remove_gpu_va_space(uvm_va_space_t *va_space,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -218,6 +432,18 @@ typedef struct
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_cpu_find(uvm_va_space_t *va_space,
|
||||
uvm_service_block_context_t *service_context,
|
||||
struct vm_fault *vmf,
|
||||
uvm_va_block_t **va_block_ptr)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static void uvm_hmm_cpu_fault_finish(uvm_service_block_context_t *service_context)
|
||||
{
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
@@ -226,6 +452,23 @@ typedef struct
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 start,
|
||||
@@ -234,6 +477,10 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_hmm_va_block_split_tree(uvm_va_block_t *existing_va_block, uvm_va_block_t *new_block)
|
||||
{
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_split_as_needed(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_policy_is_split_needed_t split_needed_cb,
|
||||
@@ -245,7 +492,8 @@ typedef struct
|
||||
static NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
|
||||
uvm_processor_id_t preferred_location,
|
||||
NvU64 base,
|
||||
NvU64 last_address)
|
||||
NvU64 last_address,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
@@ -254,11 +502,18 @@ typedef struct
|
||||
uvm_processor_id_t processor_id,
|
||||
bool set_bit,
|
||||
NvU64 base,
|
||||
NvU64 last_address)
|
||||
NvU64 last_address,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *block_context)
|
||||
{
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_set_read_duplication(uvm_va_space_t *va_space,
|
||||
uvm_read_duplication_policy_t new_policy,
|
||||
NvU64 base,
|
||||
@@ -282,6 +537,124 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 address)
|
||||
{
|
||||
return (uvm_va_block_region_t){};
|
||||
}
|
||||
|
||||
static uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 addr)
|
||||
{
|
||||
return UVM_PROT_NONE;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
uvm_processor_id_t new_residency,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_service_block_context_t *service_context)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_make_resident_cause_t cause)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 base,
|
||||
NvU64 length,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_migrate_mode_t mode,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_evict_chunk_prep(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_gpu_chunk_t *gpu_chunk,
|
||||
uvm_va_block_region_t chunk_region)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region,
|
||||
bool *out_accessed_by_set)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_pmm_gpu_evict_chunk(uvm_gpu_t *gpu,
|
||||
uvm_gpu_chunk_t *gpu_chunk)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_range_bounds(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 lookup_address,
|
||||
NvU64 *startp,
|
||||
NvU64 *endp,
|
||||
UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_update_residency_info(uvm_va_block_t *va_block,
|
||||
struct mm_struct *mm,
|
||||
NvU64 lookup_address,
|
||||
bool populate)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params,
|
||||
struct file *filp)
|
||||
{
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
UVM_TEST_VA_RANGE_INFO_PARAMS *params)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
|
||||
#endif // _UVM_HMM_H_
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_test.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_va_range.h"
|
||||
#include "uvm_hmm.h"
|
||||
|
||||
NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
struct mm_struct *mm;
|
||||
uvm_va_block_t *hmm_block = NULL;
|
||||
NV_STATUS status;
|
||||
|
||||
mm = uvm_va_space_mm_retain(va_space);
|
||||
if (!mm)
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
uvm_down_write_mmap_lock(mm);
|
||||
uvm_va_space_down_write(va_space);
|
||||
|
||||
// TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
|
||||
// By default, HMM is enabled system wide but disabled per va_space.
|
||||
// This will initialize the va_space for HMM.
|
||||
status = uvm_hmm_va_space_initialize_test(va_space);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_up_write_mmap_lock(mm);
|
||||
|
||||
uvm_down_read_mmap_lock(mm);
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// Try to create an HMM va_block to virtual address zero (NULL).
|
||||
// It should fail. There should be no VMA but a va_block for range
|
||||
// [0x0 0x1fffff] is possible.
|
||||
status = uvm_hmm_va_block_find_create(va_space, 0UL, NULL, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
|
||||
|
||||
// Try to create an HMM va_block which overlaps a UVM managed block.
|
||||
// It should fail.
|
||||
status = uvm_hmm_va_block_find_create(va_space, params->uvm_address, NULL, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
|
||||
|
||||
// Try to create an HMM va_block; it should succeed.
|
||||
status = uvm_hmm_va_block_find_create(va_space, params->hmm_address, NULL, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
// Try to find an existing HMM va_block; it should succeed.
|
||||
status = uvm_hmm_va_block_find(va_space, params->hmm_address, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
done:
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_up_read_mmap_lock(mm);
|
||||
uvm_va_space_mm_release(va_space);
|
||||
|
||||
return status;
|
||||
|
||||
out:
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_up_write_mmap_lock(mm);
|
||||
uvm_va_space_mm_release(va_space);
|
||||
|
||||
return status;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user