Compare commits

...

34 Commits

Author SHA1 Message Date
Bernhard Stoeckner
ef65a13097 535.288.01 2026-01-13 18:04:57 +01:00
Maneet Singh
66ab8e8596 535.274.02 2025-09-30 12:40:20 -07:00
Bernhard Stoeckner
9c67f19366 535.261.03 2025-07-17 17:13:07 +02:00
Bernhard Stoeckner
f468568958 535.247.01 2025-04-17 17:45:32 +02:00
Bernhard Stoeckner
855c3c9d3c 535.230.02 2025-01-16 17:34:27 +01:00
Bernhard Stoeckner
8845de1ce4 535.216.03 2024-11-19 17:42:03 +01:00
Bernhard Stoeckner
60d85c464b 535.216.01 2024-10-22 17:35:00 +02:00
Bernhard Stoeckner
c588c3877f 535.183.06 2024-07-09 17:24:25 +02:00
Bernhard Stoeckner
4459285b60 535.183.01 2024-06-04 10:45:14 +02:00
Gaurav Juvekar
f4bdce9a0a 535.179 2024-05-08 08:14:09 -07:00
Bernhard Stoeckner
c042c7903d 535.171.04 2024-03-21 14:23:59 +01:00
Bernhard Stoeckner
044f70bbb8 535.161.08 2024-03-18 17:57:23 +01:00
Bernhard Stoeckner
6d33efe502 535.161.07 2024-02-22 17:28:26 +01:00
Bernhard Stoeckner
ee55481a49 535.154.05 2024-01-16 14:59:49 +01:00
Bernhard Stoeckner
7165299dee 535.146.02 2023-12-07 15:10:34 +01:00
Bernhard Stoeckner
e573018659 535.129.03 2023-10-31 14:22:38 +01:00
Maneet Singh
f59818b751 535.113.01 2023-09-21 10:43:43 -07:00
Bernhard Stoeckner
a8e01be6b2 535.104.05 2023-08-22 15:09:37 +02:00
Bernhard Stoeckner
12c0739352 535.98 2023-08-08 18:28:38 +02:00
Bernhard Stoeckner
29f830f1bb 535.86.10 2023-07-31 18:17:14 +02:00
Bernhard Stoeckner
337e28efda 535.86.05 2023-07-18 16:00:22 +02:00
Bernhard Stoeckner
22a077c4fe issue template: be clearer about issues with prop driver 2023-07-10 15:58:02 +02:00
Andy Ritger
26458140be 535.54.03 2023-06-14 12:37:59 -07:00
Andy Ritger
eb5c7665a1 535.43.02 2023-05-30 10:11:36 -07:00
Andy Ritger
6dd092ddb7 530.41.03 2023-03-23 11:00:12 -07:00
Andy Ritger
4397463e73 530.30.02 2023-02-28 11:12:44 -08:00
Andy Ritger
e598191e8e 525.89.02 2023-02-08 10:15:15 -08:00
Maneet Singh
1dc88ff75e 525.85.12 2023-01-30 16:30:12 -08:00
Andy Ritger
811073c51e 525.85.05 2023-01-19 10:41:59 -08:00
Andy Ritger
dac2350c7f 525.78.01 2023-01-05 10:40:27 -08:00
Andy Ritger
9594cc0169 525.60.13 2022-12-05 10:49:53 -08:00
Andy Ritger
5f40a5aee5 525.60.11 2022-11-28 13:39:27 -08:00
Andy Ritger
758b4ee818 525.53 2022-11-10 08:39:33 -08:00
Andy Ritger
7c345b838b 520.56.06 2022-10-12 10:30:46 -07:00
2193 changed files with 562502 additions and 131556 deletions

View File

@@ -1,5 +1,8 @@
name: Report a functional bug 🐛
description: Functional bugs affect operation or stability of the driver and/or hardware.
description: |
Functional bugs affect operation or stability of the driver or hardware.
Bugs with the closed source driver must be reported on the forums (see link on New Issue page below).
labels:
- "bug"
body:
@@ -18,14 +21,12 @@ body:
description: "Which open-gpu-kernel-modules version are you running? Be as specific as possible: SHA is best when built from specific commit."
validations:
required: true
- type: dropdown
- type: checkboxes
id: sw_driver_proprietary
attributes:
label: "Does this happen with the proprietary driver (of the same version) as well?"
label: "Please confirm this issue does not happen with the proprietary driver (of the same version). This issue tracker is only for bugs specific to the open kernel driver."
options:
- "Yes"
- "No"
- "I cannot test this"
- label: "I confirm that this does not happen with the proprietary driver package."
validations:
required: true
- type: input
@@ -42,6 +43,14 @@ body:
description: "Which kernel are you running? (output of `uname -a`, say if you built it yourself)"
validations:
required: true
- type: checkboxes
id: sw_host_kernel_stable
attributes:
label: "Please confirm you are running a stable release kernel (e.g. not a -rc). We do not accept bug reports for unreleased kernels."
options:
- label: "I am running on a stable kernel release."
validations:
required: true
- type: input
id: hw_gpu_type
attributes:
@@ -78,7 +87,10 @@ body:
id: bug_report_gz
attributes:
label: nvidia-bug-report.log.gz
description: "Please reproduce the problem, after that run `nvidia-bug-report.sh`, and attach the resulting nvidia-bug-report.log.gz here."
description: |
Please reproduce the problem, after that run `nvidia-bug-report.sh`, and attach the resulting nvidia-bug-report.log.gz here.
Reports without this file will be closed.
placeholder: You can usually just drag & drop the file into this textbox.
validations:
required: true

View File

@@ -1,14 +1,14 @@
blank_issues_enabled: false
contact_links:
- name: Report a bug with the proprietary driver
url: https://forums.developer.nvidia.com/c/gpu-graphics/linux/148
about: Bugs that aren't specific to the open source driver in this repository must be reported with the linked forums instead.
- name: Report a cosmetic issue
url: https://github.com/NVIDIA/open-gpu-kernel-modules/discussions/categories/general
about: We are not currently accepting cosmetic-only changes such as whitespace, typos, or simple renames. You can still discuss and collect them on the boards.
- name: Ask a question
url: https://github.com/NVIDIA/open-gpu-kernel-modules/discussions/categories/q-a
about: Unsure of what to click, where to go, what the process for your thing is? We're happy to help. Click to visit the discussion board and say hello!
- name: Report a bug with the proprietary driver
url: https://forums.developer.nvidia.com/c/gpu-graphics/linux/148
about: Bugs that aren't specific to the open source driver in this repository should be reported with the linked forums instead. If you are unsure on what kind of bug you have, feel free to open a thread in Discussions. We're here to help!
- name: Suggest a feature
url: https://github.com/NVIDIA/open-gpu-kernel-modules/discussions/categories/ideas
about: Please do not open Issues for feature requests; instead, suggest and discuss new features on the Github discussion board. If you have a feature you worked on and want to PR it, please also open a discussion before doing so.

View File

@@ -1,65 +0,0 @@
# Changelog
## Release 520 Entries
### [520.61.05] 2022-10-10
#### Added
- Introduce support for NVIDIA H100 GPUs.
#### Fixed
- Fix/Improve Makefile, [#308](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/308/) by @izenynn
- Make nvLogBase2 more efficient, [#177](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/177/) by @DMaroo
- nv-pci: fixed always true expression, [#195](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/195/) by @ValZapod
## Release 515 Entries
### [515.76] 2022-09-20
#### Fixed
- Improved compatibility with new Linux kernel releases
- Fixed possible excessive GPU power draw on an idle X11 or Wayland desktop when driving high resolutions or refresh rates
### [515.65.01] 2022-08-02
#### Fixed
- Collection of minor fixes to issues, [#6](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/61) by @Joshua-Ashton
- Remove unnecessary use of acpi_bus_get_device().
### [515.57] 2022-06-28
#### Fixed
- Backtick is deprecated, [#273](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/273) by @arch-user-france1
### [515.48.07] 2022-05-31
#### Added
- List of compatible GPUs in README.md.
#### Fixed
- Fix various README capitalizations, [#8](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/8) by @27lx
- Automatically tag bug report issues, [#15](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/15) by @thebeanogamer
- Improve conftest.sh Script, [#37](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/37) by @Nitepone
- Update HTTP link to HTTPS, [#101](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/101) by @alcaparra
- moved array sanity check to before the array access, [#117](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/117) by @RealAstolfo
- Fixed some typos, [#122](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/122) by @FEDOyt
- Fixed capitalization, [#123](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/123) by @keroeslux
- Fix typos in NVDEC Engine Descriptor, [#126](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/126) from @TrickyDmitriy
- Extranous apostrohpes in a makefile script [sic], [#14](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/14) by @kiroma
- HDMI no audio @ 4K above 60Hz, [#75](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/75) by @adolfotregosa
- dp_configcaps.cpp:405: array index sanity check in wrong place?, [#110](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/110) by @dcb314
- NVRM kgspInitRm_IMPL: missing NVDEC0 engine, cannot initialize GSP-RM, [#116](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/116) by @kfazz
- ERROR: modpost: "backlight_device_register" [...nvidia-modeset.ko] undefined, [#135](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/135) by @sndirsch
- aarch64 build fails, [#151](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/151) by @frezbo
### [515.43.04] 2022-05-11
- Initial release.

134
README.md
View File

@@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules,
version 520.61.05.
version 535.288.01.
## How to Build
@@ -15,9 +15,9 @@ as root:
make modules_install -j$(nproc)
Note that the kernel modules built here must be used with gsp.bin
Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
520.61.05 driver release. This can be achieved by installing
535.288.01 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,
@@ -162,12 +162,25 @@ for the target kernel.
- `src/nvidia/` The OS-agnostic code for nvidia.ko
- `src/nvidia-modeset/` The OS-agnostic code for nvidia-modeset.ko
- `src/common/` Utility code used by one or more of nvidia.ko and nvidia-modeset.ko
- `nouveau/` Tools for integration with the Nouveau device driver
## Nouveau device driver integration
The Python script in the 'nouveau' directory is used to extract some of the
firmware binary images (and related data) encoded in the source code and
store them as distinct files. These files are used by the Nouveau device
driver to load and communicate with the GSP firmware.
The layout of the binary files is described in nouveau_firmware_layout.ods,
which is an OpenDocument Spreadsheet file, compatible with most spreadsheet
software applications.
## Compatible GPUs
The open-gpu-kernel-modules can be used on any Turing or later GPU
(see the table below). However, in the 520.61.05 release,
(see the table below). However, in the 535.288.01 release,
GeForce and Workstation support is still considered alpha-quality.
To enable use of the open kernel modules on GeForce and Workstation GPUs,
@@ -175,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
parameter to 1. For more details, see the NVIDIA GPU driver end user
README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/520.61.05/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.288.01/README/kernel_open.html
In the below table, if three IDs are listed, the first is the PCI Device
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@@ -635,6 +648,7 @@ Subsystem Device ID.
| NVIDIA T1000 8GB | 1FF0 17AA 1612 |
| NVIDIA T400 4GB | 1FF2 1028 1613 |
| NVIDIA T400 4GB | 1FF2 103C 1613 |
| NVIDIA T400E | 1FF2 103C 18FF |
| NVIDIA T400 4GB | 1FF2 103C 8A80 |
| NVIDIA T400 4GB | 1FF2 10DE 1613 |
| NVIDIA T400 4GB | 1FF2 17AA 1613 |
@@ -645,13 +659,32 @@ Subsystem Device ID.
| NVIDIA A100-SXM4-80GB | 20B2 10DE 147F |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1622 |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1623 |
| NVIDIA PG506-242 | 20B3 10DE 14A7 |
| NVIDIA PG506-243 | 20B3 10DE 14A8 |
| NVIDIA PG509-210 | 20B2 10DE 1625 |
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A7 |
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A8 |
| NVIDIA A100 80GB PCIe | 20B5 10DE 1533 |
| NVIDIA A100 80GB PCIe | 20B5 10DE 1642 |
| NVIDIA PG506-232 | 20B6 10DE 1492 |
| NVIDIA A30 | 20B7 10DE 1532 |
| NVIDIA A30 | 20B7 10DE 1804 |
| NVIDIA A30 | 20B7 10DE 1852 |
| NVIDIA A800-SXM4-40GB | 20BD 10DE 17F4 |
| NVIDIA A100-PCIE-40GB | 20F1 10DE 145F |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179C |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179D |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179E |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179F |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A0 |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A1 |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A2 |
| NVIDIA A800 80GB PCIe | 20F5 10DE 1799 |
| NVIDIA A800 80GB PCIe LC | 20F5 10DE 179A |
| NVIDIA A800 40GB Active | 20F6 1028 180A |
| NVIDIA A800 40GB Active | 20F6 103C 180A |
| NVIDIA A800 40GB Active | 20F6 10DE 180A |
| NVIDIA A800 40GB Active | 20F6 17AA 180A |
| NVIDIA AX800 | 20FD 10DE 17F8 |
| NVIDIA GeForce GTX 1660 Ti | 2182 |
| NVIDIA GeForce GTX 1660 | 2184 |
| NVIDIA GeForce GTX 1650 SUPER | 2187 |
@@ -685,6 +718,7 @@ Subsystem Device ID.
| NVIDIA GeForce RTX 3090 Ti | 2203 |
| NVIDIA GeForce RTX 3090 | 2204 |
| NVIDIA GeForce RTX 3080 | 2206 |
| NVIDIA GeForce RTX 3070 Ti | 2207 |
| NVIDIA GeForce RTX 3080 Ti | 2208 |
| NVIDIA GeForce RTX 3080 | 220A |
| NVIDIA CMP 90HX | 220D |
@@ -709,6 +743,21 @@ Subsystem Device ID.
| NVIDIA A10 | 2236 10DE 1482 |
| NVIDIA A10G | 2237 10DE 152F |
| NVIDIA A10M | 2238 10DE 1677 |
| NVIDIA H100 NVL | 2321 10DE 1839 |
| NVIDIA H800 PCIe | 2322 10DE 17A4 |
| NVIDIA H800 | 2324 10DE 17A6 |
| NVIDIA H800 | 2324 10DE 17A8 |
| NVIDIA H20 | 2329 10DE 198B |
| NVIDIA H20 | 2329 10DE 198C |
| NVIDIA H20-3e | 232C 10DE 2063 |
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
| NVIDIA H100 PCIe | 2331 10DE 1626 |
| NVIDIA H100 | 2339 10DE 17FC |
| NVIDIA H800 NVL | 233A 10DE 183A |
| NVIDIA GH200 120GB | 2342 10DE 16EB |
| NVIDIA GH200 120GB | 2342 10DE 1805 |
| NVIDIA GH200 480GB | 2342 10DE 1809 |
| NVIDIA GeForce RTX 3060 Ti | 2414 |
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
| NVIDIA RTX A5500 Laptop GPU | 2438 |
@@ -736,6 +785,8 @@ Subsystem Device ID.
| NVIDIA RTX A3000 12GB Laptop GPU | 24B9 |
| NVIDIA RTX A4500 Laptop GPU | 24BA |
| NVIDIA RTX A3000 12GB Laptop GPU | 24BB |
| NVIDIA GeForce RTX 3060 | 24C7 |
| NVIDIA GeForce RTX 3060 Ti | 24C9 |
| NVIDIA GeForce RTX 3080 Laptop GPU | 24DC |
| NVIDIA GeForce RTX 3070 Laptop GPU | 24DD |
| NVIDIA GeForce RTX 3070 Ti Laptop GPU | 24E0 |
@@ -751,12 +802,15 @@ Subsystem Device ID.
| NVIDIA RTX A2000 | 2531 103C 151D |
| NVIDIA RTX A2000 | 2531 10DE 151D |
| NVIDIA RTX A2000 | 2531 17AA 151D |
| NVIDIA GeForce RTX 3060 | 2544 |
| NVIDIA GeForce RTX 3060 Laptop GPU | 2560 |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 2563 |
| NVIDIA RTX A2000 12GB | 2571 1028 1611 |
| NVIDIA RTX A2000 12GB | 2571 103C 1611 |
| NVIDIA RTX A2000 12GB | 2571 10DE 1611 |
| NVIDIA RTX A2000 12GB | 2571 17AA 1611 |
| NVIDIA GeForce RTX 3050 | 2582 |
| NVIDIA GeForce RTX 3050 | 2584 |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25A0 |
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 8928 |
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 89F9 |
@@ -769,14 +823,80 @@ Subsystem Device ID.
| NVIDIA GeForce RTX 2050 | 25A7 |
| NVIDIA GeForce RTX 2050 | 25A9 |
| NVIDIA GeForce MX570 A | 25AA |
| NVIDIA GeForce RTX 3050 4GB Laptop GPU | 25AB |
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25AC |
| NVIDIA GeForce RTX 2050 | 25AD |
| NVIDIA A16 | 25B6 10DE 14A9 |
| NVIDIA A2 | 25B6 10DE 157E |
| NVIDIA RTX A2000 Laptop GPU | 25B8 |
| NVIDIA RTX A1000 Laptop GPU | 25B9 |
| NVIDIA RTX A2000 8GB Laptop GPU | 25BA |
| NVIDIA RTX A500 Laptop GPU | 25BB |
| NVIDIA RTX A1000 6GB Laptop GPU | 25BC |
| NVIDIA RTX A500 Laptop GPU | 25BD |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25E0 |
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E2 |
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E5 |
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25EC |
| NVIDIA GeForce RTX 2050 | 25ED |
| NVIDIA RTX A1000 Embedded GPU | 25F9 |
| NVIDIA RTX A2000 Embedded GPU | 25FA |
| NVIDIA RTX A500 Embedded GPU | 25FB |
| NVIDIA GeForce RTX 4090 | 2684 |
| NVIDIA RTX 6000 Ada Generation | 26B1 1028 16A1 |
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |
| NVIDIA RTX 6000 Ada Generation | 26B1 17AA 16A1 |
| NVIDIA RTX 5000 Ada Generation | 26B2 1028 17FA |
| NVIDIA RTX 5000 Ada Generation | 26B2 103C 17FA |
| NVIDIA RTX 5000 Ada Generation | 26B2 10DE 17FA |
| NVIDIA RTX 5000 Ada Generation | 26B2 17AA 17FA |
| NVIDIA RTX 5880 Ada Generation | 26B3 103C 1934 |
| NVIDIA RTX 5880 Ada Generation | 26B3 10DE 1934 |
| NVIDIA L40 | 26B5 10DE 169D |
| NVIDIA L40 | 26B5 10DE 17DA |
| NVIDIA L40S | 26B9 10DE 1851 |
| NVIDIA L40S | 26B9 10DE 18CF |
| NVIDIA L20 | 26BA 10DE 1957 |
| NVIDIA GeForce RTX 4080 | 2704 |
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
| NVIDIA RTX 5000 Ada Generation Laptop GPU | 2730 |
| NVIDIA GeForce RTX 4090 Laptop GPU | 2757 |
| NVIDIA RTX 5000 Ada Generation Embedded GPU | 2770 |
| NVIDIA GeForce RTX 4070 Ti | 2782 |
| NVIDIA GeForce RTX 4070 | 2786 |
| NVIDIA GeForce RTX 4080 Laptop GPU | 27A0 |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 1028 16FA |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 10DE 16FA |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 17AA 16FA |
| NVIDIA RTX 4500 Ada Generation | 27B1 1028 180C |
| NVIDIA RTX 4500 Ada Generation | 27B1 103C 180C |
| NVIDIA RTX 4500 Ada Generation | 27B1 10DE 180C |
| NVIDIA RTX 4500 Ada Generation | 27B1 17AA 180C |
| NVIDIA RTX 4000 Ada Generation | 27B2 1028 181B |
| NVIDIA RTX 4000 Ada Generation | 27B2 103C 181B |
| NVIDIA RTX 4000 Ada Generation | 27B2 10DE 181B |
| NVIDIA RTX 4000 Ada Generation | 27B2 17AA 181B |
| NVIDIA L2 | 27B6 10DE 1933 |
| NVIDIA L4 | 27B8 10DE 16CA |
| NVIDIA L4 | 27B8 10DE 16EE |
| NVIDIA RTX 4000 Ada Generation Laptop GPU | 27BA |
| NVIDIA RTX 3500 Ada Generation Laptop GPU | 27BB |
| NVIDIA GeForce RTX 4080 Laptop GPU | 27E0 |
| NVIDIA RTX 3500 Ada Generation Embedded GPU | 27FB |
| NVIDIA GeForce RTX 4060 Ti | 2803 |
| NVIDIA GeForce RTX 4060 Ti | 2805 |
| NVIDIA GeForce RTX 4070 Laptop GPU | 2820 |
| NVIDIA RTX 3000 Ada Generation Laptop GPU | 2838 |
| NVIDIA GeForce RTX 4070 Laptop GPU | 2860 |
| NVIDIA GeForce RTX 4060 | 2882 |
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
| NVIDIA RTX 2000 Ada Generation Laptop GPU | 28B8 |
| NVIDIA RTX 1000 Ada Generation Laptop GPU | 28B9 |
| NVIDIA RTX 500 Ada Generation Laptop GPU | 28BA |
| NVIDIA RTX 500 Ada Generation Laptop GPU | 28BB |
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
| NVIDIA RTX 2000 Ada Generation Embedded GPU | 28F8 |

View File

@@ -57,67 +57,82 @@ ifeq ($(NV_UNDEF_BEHAVIOR_SANITIZER),1)
UBSAN_SANITIZE := y
endif
#
# Command to create a symbolic link, explicitly resolving the symlink target
# to an absolute path to abstract away the difference between Linux < 6.13,
# where the CWD is the Linux kernel source tree for Kbuild extmod builds, and
# Linux >= 6.13, where the CWD is the external module source tree.
#
# This is used to create the nv*-kernel.o -> nv*-kernel.o_binary symlinks for
# kernel modules which use precompiled binary object files.
#
quiet_cmd_symlink = SYMLINK $@
cmd_symlink = ln -sf $(abspath $<) $@
$(foreach _module, $(NV_KERNEL_MODULES), \
$(eval include $(src)/$(_module)/$(_module).Kbuild))
#
# Define CFLAGS that apply to all the NVIDIA kernel modules. EXTRA_CFLAGS
# is deprecated since 2.6.24 in favor of ccflags-y, but we need to support
# older kernels which do not have ccflags-y. Newer kernels append
# $(EXTRA_CFLAGS) to ccflags-y for compatibility.
#
ccflags-y += -I$(src)/common/inc
ccflags-y += -I$(src)
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
ccflags-y += -DNV_VERSION_STRING=\"535.288.01\"
EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"520.61.05\"
EXTRA_CFLAGS += -Wno-unused-function
ifneq ($(NV_BUILD_TYPE),debug)
EXTRA_CFLAGS += -Wuninitialized
ifneq ($(SYSSRCHOST1X),)
ccflags-y += -I$(SYSSRCHOST1X)
endif
EXTRA_CFLAGS += -fno-strict-aliasing
ccflags-y += -Wno-unused-function
ifneq ($(NV_BUILD_TYPE),debug)
ccflags-y += -Wuninitialized
endif
ccflags-y += -fno-strict-aliasing
ifeq ($(ARCH),arm64)
EXTRA_CFLAGS += -mstrict-align
ccflags-y += -mstrict-align
endif
ifeq ($(NV_BUILD_TYPE),debug)
EXTRA_CFLAGS += -g -gsplit-dwarf
ccflags-y += -g
ccflags-y += $(call cc-option,-gsplit-dwarf,)
endif
EXTRA_CFLAGS += -ffreestanding
ccflags-y += -ffreestanding
ifeq ($(ARCH),arm64)
EXTRA_CFLAGS += -mgeneral-regs-only -march=armv8-a
EXTRA_CFLAGS += $(call cc-option,-mno-outline-atomics,)
ccflags-y += -mgeneral-regs-only -march=armv8-a
ccflags-y += $(call cc-option,-mno-outline-atomics,)
endif
ifeq ($(ARCH),x86_64)
EXTRA_CFLAGS += -mno-red-zone -mcmodel=kernel
ccflags-y += -mno-red-zone -mcmodel=kernel
endif
ifeq ($(ARCH),powerpc)
EXTRA_CFLAGS += -mlittle-endian -mno-strict-align -mno-altivec
ccflags-y += -mlittle-endian -mno-strict-align -mno-altivec
endif
EXTRA_CFLAGS += -DNV_UVM_ENABLE
EXTRA_CFLAGS += $(call cc-option,-Werror=undef,)
EXTRA_CFLAGS += -DNV_SPECTRE_V2=$(NV_SPECTRE_V2)
EXTRA_CFLAGS += -DNV_KERNEL_INTERFACE_LAYER
ccflags-y += -DNV_UVM_ENABLE
ccflags-y += $(call cc-option,-Werror=undef,)
ccflags-y += -DNV_SPECTRE_V2=$(NV_SPECTRE_V2)
ccflags-y += -DNV_KERNEL_INTERFACE_LAYER
#
# Detect SGI UV systems and apply system-specific optimizations.
#
ifneq ($(wildcard /proc/sgi_uv),)
EXTRA_CFLAGS += -DNV_CONFIG_X86_UV
ccflags-y += -DNV_CONFIG_X86_UV
endif
ifdef VGX_FORCE_VFIO_PCI_CORE
ccflags-y += -DNV_VGPU_FORCE_VFIO_PCI_CORE
endif
#
# The conftest.sh script tests various aspects of the target kernel.
@@ -143,7 +158,11 @@ NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \
NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(ccflags-y) -fno-pie
NV_CONFTEST_CFLAGS += $(filter -std=%,$(KBUILD_CFLAGS))
NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
NV_CONFTEST_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types,)
NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h
NV_CONFTEST_COMPILE_TEST_HEADERS += $(obj)/conftest/functions.h
@@ -212,8 +231,10 @@ NV_HEADER_PRESENCE_TESTS = \
drm/drm_auth.h \
drm/drm_gem.h \
drm/drm_crtc.h \
drm/drm_color_mgmt.h \
drm/drm_atomic.h \
drm/drm_atomic_helper.h \
drm/drm_atomic_state_helper.h \
drm/drm_encoder.h \
drm/drm_atomic_uapi.h \
drm/drm_drv.h \
@@ -229,20 +250,24 @@ NV_HEADER_PRESENCE_TESTS = \
drm/drm_ioctl.h \
drm/drm_device.h \
drm/drm_mode_config.h \
drm/drm_modeset_lock.h \
drm/drm_client_setup.h \
dt-bindings/interconnect/tegra_icc_id.h \
generated/autoconf.h \
generated/compile.h \
generated/utsrelease.h \
linux/aperture.h \
linux/efi.h \
linux/kconfig.h \
linux/platform/tegra/mc_utils.h \
linux/semaphore.h \
linux/printk.h \
linux/ratelimit.h \
linux/prio_tree.h \
linux/log2.h \
linux/of.h \
linux/bug.h \
linux/sched.h \
linux/sched/mm.h \
linux/sched/signal.h \
linux/sched/task.h \
linux/sched/task_stack.h \
@@ -256,6 +281,7 @@ NV_HEADER_PRESENCE_TESTS = \
linux/platform/tegra/dce/dce-client-ipc.h \
linux/nvhost.h \
linux/nvhost_t194.h \
linux/host1x-next.h \
asm/book3s/64/hash-64k.h \
asm/set_memory.h \
asm/prom.h \
@@ -265,6 +291,7 @@ NV_HEADER_PRESENCE_TESTS = \
asm/opal-api.h \
sound/hdaudio.h \
asm/pgtable_types.h \
asm/page.h \
linux/stringhash.h \
linux/dma-map-ops.h \
rdma/peer_mem.h \
@@ -286,7 +313,14 @@ NV_HEADER_PRESENCE_TESTS = \
linux/ioasid.h \
linux/stdarg.h \
linux/iosys-map.h \
asm/coco.h
asm/coco.h \
linux/vfio_pci_core.h \
linux/mdev.h \
soc/tegra/bpmp-abi.h \
soc/tegra/bpmp.h \
linux/cc_platform.h \
asm/cpufeature.h \
crypto/sig.h
# Filename to store the define for the header in $(1); this is only consumed by
# the rule below that concatenates all of these together.

View File

@@ -28,7 +28,7 @@ else
else
KERNEL_UNAME ?= $(shell uname -r)
KERNEL_MODLIB := /lib/modules/$(KERNEL_UNAME)
KERNEL_SOURCES := $(shell test -d $(KERNEL_MODLIB)/source && echo $(KERNEL_MODLIB)/source || echo $(KERNEL_MODLIB)/build)
KERNEL_SOURCES := $(shell ((test -d $(KERNEL_MODLIB)/source && echo $(KERNEL_MODLIB)/source) || (test -d $(KERNEL_MODLIB)/build/source && echo $(KERNEL_MODLIB)/build/source)) || echo $(KERNEL_MODLIB)/build)
endif
KERNEL_OUTPUT := $(KERNEL_SOURCES)
@@ -42,12 +42,32 @@ else
else
KERNEL_UNAME ?= $(shell uname -r)
KERNEL_MODLIB := /lib/modules/$(KERNEL_UNAME)
ifeq ($(KERNEL_SOURCES), $(KERNEL_MODLIB)/source)
# $(filter patter...,text) - Returns all whitespace-separated words in text that
# do match any of the pattern words, removing any words that do not match.
# Set the KERNEL_OUTPUT only if either $(KERNEL_MODLIB)/source or
# $(KERNEL_MODLIB)/build/source path matches the KERNEL_SOURCES.
ifneq ($(filter $(KERNEL_SOURCES),$(KERNEL_MODLIB)/source $(KERNEL_MODLIB)/build/source),)
KERNEL_OUTPUT := $(KERNEL_MODLIB)/build
KBUILD_PARAMS := KBUILD_OUTPUT=$(KERNEL_OUTPUT)
endif
endif
# If CC hasn't been set explicitly, check the value of CONFIG_CC_VERSION_TEXT.
# Look for the compiler specified there, and use it by default, if found.
ifeq ($(origin CC),default)
cc_version_text=$(firstword $(shell . $(KERNEL_OUTPUT)/.config; \
echo "$$CONFIG_CC_VERSION_TEXT"))
ifneq ($(cc_version_text),)
ifeq ($(shell command -v $(cc_version_text)),)
$(warning WARNING: Unable to locate the compiler $(cc_version_text) \
from CONFIG_CC_VERSION_TEXT in the kernel configuration.)
else
CC=$(cc_version_text)
endif
endif
endif
CC ?= cc
LD ?= ld
OBJDUMP ?= objdump
@@ -60,6 +80,16 @@ else
)
endif
KERNEL_ARCH = $(ARCH)
ifneq ($(filter $(ARCH),i386 x86_64),)
KERNEL_ARCH = x86
else
ifeq ($(filter $(ARCH),arm64 powerpc),)
$(error Unsupported architecture $(ARCH))
endif
endif
NV_KERNEL_MODULES ?= $(wildcard nvidia nvidia-uvm nvidia-vgpu-vfio nvidia-modeset nvidia-drm nvidia-peermem)
NV_KERNEL_MODULES := $(filter-out $(NV_EXCLUDE_KERNEL_MODULES), \
$(NV_KERNEL_MODULES))
@@ -99,8 +129,9 @@ else
# module symbols on which the Linux kernel's module resolution is dependent
# and hence must be used whenever present.
LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \
$(KERNEL_SOURCES)/arch/$(ARCH)/kernel/module.lds \
LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \
$(KERNEL_SOURCES)/arch/$(KERNEL_ARCH)/kernel/module.lds \
$(KERNEL_OUTPUT)/arch/$(KERNEL_ARCH)/module.lds \
$(KERNEL_OUTPUT)/scripts/module.lds
NV_MODULE_COMMON_SCRIPTS := $(foreach s, $(wildcard $(LD_SCRIPT)), -T $(s))

View File

@@ -242,7 +242,7 @@
#endif
/* For verification-only features not intended to be included in normal drivers */
#if (defined(NV_MODS) || defined(NV_GSP_MODS)) && defined(DEBUG) && !defined(DISABLE_VERIF_FEATURES)
#if defined(ENABLE_VERIF_FEATURES)
#define NV_VERIF_FEATURES
#endif
@@ -276,12 +276,6 @@
#define NV_IS_MODS 0
#endif
#if defined(NV_GSP_MODS)
#define NV_IS_GSP_MODS 1
#else
#define NV_IS_GSP_MODS 0
#endif
#if defined(NV_WINDOWS)
#define NVOS_IS_WINDOWS 1
#else

View File

@@ -0,0 +1,83 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
//
// This file holds GPU firmware related registry key definitions that are
// shared between Windows and Unix
//
#ifndef NV_FIRMWARE_REGISTRY_H
#define NV_FIRMWARE_REGISTRY_H
//
// Registry key that when enabled, will enable use of GPU firmware.
//
// Possible mode values:
// 0 - Do not enable GPU firmware
// 1 - Enable GPU firmware
// 2 - (Default) Use the default enablement policy for GPU firmware
//
// Setting this to anything other than 2 will alter driver firmware-
// enablement policies, possibly disabling GPU firmware where it would
// have otherwise been enabled by default.
//
// Policy bits:
//
// POLICY_ALLOW_FALLBACK:
// As the normal behavior is to fail GPU initialization if this registry
// entry is set in such a way that results in an invalid configuration, if
// instead the user would like the driver to automatically try to fallback
// to initializing the failing GPU with firmware disabled, then this bit can
// be set (ex: 0x11 means try to enable GPU firmware but fall back if needed).
// Note that this can result in a mixed mode configuration (ex: GPU0 has
// firmware enabled, but GPU1 does not).
//
#define NV_REG_STR_ENABLE_GPU_FIRMWARE "EnableGpuFirmware"
#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_MASK 0x0000000F
#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_DISABLED 0x00000000
#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_ENABLED 0x00000001
#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_DEFAULT 0x00000002
#define NV_REG_ENABLE_GPU_FIRMWARE_POLICY_MASK 0x000000F0
#define NV_REG_ENABLE_GPU_FIRMWARE_POLICY_ALLOW_FALLBACK 0x00000010
#define NV_REG_ENABLE_GPU_FIRMWARE_DEFAULT_VALUE 0x00000012
//
// Registry key that when enabled, will send GPU firmware logs
// to the system log, when possible.
//
// Possible values:
// 0 - Do not send GPU firmware logs to the system log
// 1 - Enable sending of GPU firmware logs to the system log
// 2 - (Default) Enable sending of GPU firmware logs to the system log for
// the debug kernel driver build only
//
#define NV_REG_STR_ENABLE_GPU_FIRMWARE_LOGS "EnableGpuFirmwareLogs"
#define NV_REG_ENABLE_GPU_FIRMWARE_LOGS_DISABLE 0x00000000
#define NV_REG_ENABLE_GPU_FIRMWARE_LOGS_ENABLE 0x00000001
#define NV_REG_ENABLE_GPU_FIRMWARE_LOGS_ENABLE_ON_DEBUG 0x00000002
#endif // NV_FIRMWARE_REGISTRY_H

View File

@@ -0,0 +1,132 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NV_FIRMWARE_H
#define NV_FIRMWARE_H
#include <nvtypes.h>
#include <nvmisc.h>
typedef enum
{
NV_FIRMWARE_TYPE_GSP,
NV_FIRMWARE_TYPE_GSP_LOG
} nv_firmware_type_t;
typedef enum
{
NV_FIRMWARE_CHIP_FAMILY_NULL = 0,
NV_FIRMWARE_CHIP_FAMILY_TU10X = 1,
NV_FIRMWARE_CHIP_FAMILY_TU11X = 2,
NV_FIRMWARE_CHIP_FAMILY_GA100 = 3,
NV_FIRMWARE_CHIP_FAMILY_GA10X = 4,
NV_FIRMWARE_CHIP_FAMILY_AD10X = 5,
NV_FIRMWARE_CHIP_FAMILY_GH100 = 6,
NV_FIRMWARE_CHIP_FAMILY_END,
} nv_firmware_chip_family_t;
static inline const char *nv_firmware_chip_family_to_string(
nv_firmware_chip_family_t fw_chip_family
)
{
switch (fw_chip_family) {
case NV_FIRMWARE_CHIP_FAMILY_GH100: return "gh100";
case NV_FIRMWARE_CHIP_FAMILY_AD10X: return "ad10x";
case NV_FIRMWARE_CHIP_FAMILY_GA10X: return "ga10x";
case NV_FIRMWARE_CHIP_FAMILY_GA100: return "ga100";
case NV_FIRMWARE_CHIP_FAMILY_TU11X: return "tu11x";
case NV_FIRMWARE_CHIP_FAMILY_TU10X: return "tu10x";
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
case NV_FIRMWARE_CHIP_FAMILY_NULL:
return NULL;
}
return NULL;
}
// The includer (presumably nv.c) may optionally define
// NV_FIRMWARE_PATH_FOR_FILENAME(filename)
// to return a string "path" given a gsp_*.bin or gsp_log_*.bin filename.
//
// The function nv_firmware_path will then be available.
#if defined(NV_FIRMWARE_PATH_FOR_FILENAME)
static inline const char *nv_firmware_path(
nv_firmware_type_t fw_type,
nv_firmware_chip_family_t fw_chip_family
)
{
if (fw_type == NV_FIRMWARE_TYPE_GSP)
{
switch (fw_chip_family)
{
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_ga10x.bin");
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_tu10x.bin");
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
case NV_FIRMWARE_CHIP_FAMILY_NULL:
return "";
}
}
else if (fw_type == NV_FIRMWARE_TYPE_GSP_LOG)
{
switch (fw_chip_family)
{
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_ga10x.bin");
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_tu10x.bin");
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
case NV_FIRMWARE_CHIP_FAMILY_NULL:
return "";
}
}
return "";
}
#endif // defined(NV_FIRMWARE_PATH_FOR_FILENAME)
// The includer (presumably nv.c) may optionally define
// NV_FIRMWARE_DECLARE_GSP_FILENAME(filename)
// which will then be invoked (at the top-level) for each
// gsp_*.bin (but not gsp_log_*.bin)
#if defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_ga10x.bin")
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_tu10x.bin")
#endif // defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
#endif // NV_FIRMWARE_DECLARE_GSP_FILENAME

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2020-22 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -91,6 +91,6 @@ static inline void _nv_hash_init(struct hlist_head *ht, unsigned int sz)
* @key: the key of the objects to iterate over
*/
#define nv_hash_for_each_possible(name, obj, member, key) \
nv_hlist_for_each_entry(obj, &name[NV_HASH_MIN(key, NV_HASH_BITS(name))], member)
hlist_for_each_entry(obj, &name[NV_HASH_MIN(key, NV_HASH_BITS(name))], member)
#endif // __NV_HASH_H__

View File

@@ -27,24 +27,21 @@
#include <nv-kernel-interface-api.h>
// Enums for supported hypervisor types.
// New hypervisor type should be added before OS_HYPERVISOR_CUSTOM_FORCED
// New hypervisor type should be added before OS_HYPERVISOR_UNKNOWN
typedef enum _HYPERVISOR_TYPE
{
OS_HYPERVISOR_XEN = 0,
OS_HYPERVISOR_VMWARE,
OS_HYPERVISOR_HYPERV,
OS_HYPERVISOR_KVM,
OS_HYPERVISOR_PARALLELS,
OS_HYPERVISOR_CUSTOM_FORCED,
OS_HYPERVISOR_UNKNOWN
} HYPERVISOR_TYPE;
#define CMD_VGPU_VFIO_WAKE_WAIT_QUEUE 0
#define CMD_VGPU_VFIO_INJECT_INTERRUPT 1
#define CMD_VGPU_VFIO_REGISTER_MDEV 2
#define CMD_VGPU_VFIO_PRESENT 3
#define CMD_VFIO_WAKE_REMOVE_GPU 1
#define CMD_VGPU_VFIO_PRESENT 2
#define CMD_VFIO_PCI_CORE_PRESENT 3
#define MAX_VF_COUNT_PER_GPU 64
#define MAX_VF_COUNT_PER_GPU 64
typedef enum _VGPU_TYPE_INFO
{
@@ -55,16 +52,11 @@ typedef enum _VGPU_TYPE_INFO
typedef struct
{
void *vgpuVfioRef;
void *waitQueue;
void *nv;
NvU32 *vgpuTypeIds;
NvU32 numVgpuTypes;
NvU32 domain;
NvU8 bus;
NvU8 slot;
NvU8 function;
NvBool is_virtfn;
NvU32 domain;
NvU32 bus;
NvU32 device;
NvU32 return_status;
} vgpu_vfio_info;
typedef struct
@@ -92,30 +84,6 @@ typedef enum VGPU_DEVICE_STATE_E
NV_VGPU_DEV_IN_USE = 2
} VGPU_DEVICE_STATE;
typedef enum _VMBUS_CMD_TYPE
{
VMBUS_CMD_TYPE_INVALID = 0,
VMBUS_CMD_TYPE_SETUP = 1,
VMBUS_CMD_TYPE_SENDPACKET = 2,
VMBUS_CMD_TYPE_CLEANUP = 3,
} VMBUS_CMD_TYPE;
typedef struct
{
NvU32 request_id;
NvU32 page_count;
NvU64 *pPfns;
void *buffer;
NvU32 bufferlen;
} vmbus_send_packet_cmd_params;
typedef struct
{
NvU32 override_sint;
NvU8 *nv_guid;
} vmbus_setup_cmd_params;
/*
* Function prototypes
*/

View File

@@ -62,6 +62,7 @@ typedef struct nv_ioctl_numa_info
uint64_t memblock_size __aligned(8);
uint64_t numa_mem_addr __aligned(8);
uint64_t numa_mem_size __aligned(8);
uint8_t use_auto_online;
nv_offline_addresses_t offline_addresses __aligned(8);
} nv_ioctl_numa_info_t;

View File

@@ -104,7 +104,7 @@ typedef struct nv_ioctl_rm_api_version
#define NV_RM_API_VERSION_CMD_STRICT 0
#define NV_RM_API_VERSION_CMD_RELAXED '1'
#define NV_RM_API_VERSION_CMD_OVERRIDE '2'
#define NV_RM_API_VERSION_CMD_QUERY '2'
#define NV_RM_API_VERSION_REPLY_UNRECOGNIZED 0
#define NV_RM_API_VERSION_REPLY_RECOGNIZED 1

View File

@@ -28,15 +28,10 @@
#include <linux/list.h> // list
#include <linux/sched.h> // task_struct
#include <linux/numa.h> // NUMA_NO_NODE
#include <linux/semaphore.h>
#include "conftest.h"
#if defined(NV_LINUX_SEMAPHORE_H_PRESENT)
#include <linux/semaphore.h>
#else
#include <asm/semaphore.h>
#endif
////////////////////////////////////////////////////////////////////////////////
// nv_kthread_q:
//
@@ -115,11 +110,6 @@ struct nv_kthread_q_item
void *function_args;
};
#if defined(NV_KTHREAD_CREATE_ON_NODE_PRESENT)
#define NV_KTHREAD_Q_SUPPORTS_AFFINITY() 1
#else
#define NV_KTHREAD_Q_SUPPORTS_AFFINITY() 0
#endif
#ifndef NUMA_NO_NODE
#define NUMA_NO_NODE (-1)
@@ -142,18 +132,12 @@ struct nv_kthread_q_item
//
// A short prefix of the qname arg will show up in []'s, via the ps(1) utility.
//
// The kernel thread stack is preferably allocated on the specified NUMA node if
// NUMA-affinity (NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1) is supported, but
// fallback to another node is possible because kernel allocators do not
// The kernel thread stack is preferably allocated on the specified NUMA node,
// but fallback to another node is possible because kernel allocators do not
// guarantee affinity. Note that NUMA-affinity applies only to
// the kthread stack. This API does not do anything about limiting the CPU
// affinity of the kthread. That is left to the caller.
//
// On kernels, which do not support NUMA-aware kthread stack allocations
// (NV_KTHTREAD_Q_SUPPORTS_AFFINITY() == 0), the API will return -ENOTSUPP
// if the value supplied for 'preferred_node' is anything other than
// NV_KTHREAD_NO_NODE.
//
// Reusing a queue: once a queue is initialized, it must be safely shut down
// (see "Stopping the queue(s)", below), before it can be reused. So, for
// a simple queue use case, the following will work:

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2001-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2001-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -191,13 +191,6 @@
*/
#define NV_CURRENT_EUID() (__kuid_val(current->cred->euid))
#if !defined(NV_KUID_T_PRESENT)
static inline uid_t __kuid_val(uid_t uid)
{
return uid;
}
#endif
#if defined(CONFIG_VGA_ARB)
#include <linux/vgaarb.h>
#endif
@@ -218,6 +211,7 @@ static inline uid_t __kuid_val(uid_t uid)
#include <linux/highmem.h>
#include <linux/nodemask.h>
#include <linux/memory.h>
#include <linux/workqueue.h> /* workqueue */
#include "nv-kthread-q.h" /* kthread based queue */
@@ -234,18 +228,6 @@ static inline uid_t __kuid_val(uid_t uid)
#include <asm-generic/pci-dma-compat.h>
#endif
#if defined(NV_EFI_ENABLED_PRESENT) && defined(NV_EFI_ENABLED_ARGUMENT_COUNT)
#if (NV_EFI_ENABLED_ARGUMENT_COUNT == 1)
#define NV_EFI_ENABLED() efi_enabled(EFI_BOOT)
#else
#error "NV_EFI_ENABLED_ARGUMENT_COUNT value unrecognized!"
#endif
#elif (defined(NV_EFI_ENABLED_PRESENT) || defined(efi_enabled))
#define NV_EFI_ENABLED() efi_enabled
#else
#define NV_EFI_ENABLED() 0
#endif
#if defined(CONFIG_CRAY_XT)
#include <cray/cray_nvidia.h>
NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32,
@@ -517,11 +499,13 @@ static inline void *nv_vmalloc(unsigned long size)
void *ptr = __vmalloc(size, GFP_KERNEL);
#endif
if (ptr)
{
NV_MEMDBG_ADD(ptr, size);
}
return ptr;
}
static inline void nv_vfree(void *ptr, NvU32 size)
static inline void nv_vfree(void *ptr, NvU64 size)
{
NV_MEMDBG_REMOVE(ptr, size);
vfree(ptr);
@@ -529,9 +513,15 @@ static inline void nv_vfree(void *ptr, NvU32 size)
static inline void *nv_ioremap(NvU64 phys, NvU64 size)
{
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_PRESENT)
void *ptr = ioremap_driver_hardened(phys, size);
#else
void *ptr = ioremap(phys, size);
#endif
if (ptr)
{
NV_MEMDBG_ADD(ptr, size);
}
return ptr;
}
@@ -542,11 +532,11 @@ static inline void *nv_ioremap_nocache(NvU64 phys, NvU64 size)
static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
{
#if defined(NV_IOREMAP_CACHE_PRESENT)
void *ptr = ioremap_cache(phys, size);
if (ptr)
NV_MEMDBG_ADD(ptr, size);
return ptr;
void *ptr = NULL;
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_CACHE_SHARED_PRESENT)
ptr = ioremap_cache_shared(phys, size);
#elif defined(NV_IOREMAP_CACHE_PRESENT)
ptr = ioremap_cache(phys, size);
#elif defined(NVCPU_PPC64LE)
//
// ioremap_cache() has been only implemented correctly for ppc64le with
@@ -561,25 +551,34 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
// (commit 40f1ce7fb7e8, kernel 3.0+) and that covers all kernels we
// support on power.
//
void *ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
if (ptr)
NV_MEMDBG_ADD(ptr, size);
return ptr;
ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
#else
return nv_ioremap(phys, size);
#endif
if (ptr)
{
NV_MEMDBG_ADD(ptr, size);
}
return ptr;
}
static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
{
#if defined(NV_IOREMAP_WC_PRESENT)
void *ptr = ioremap_wc(phys, size);
if (ptr)
NV_MEMDBG_ADD(ptr, size);
return ptr;
void *ptr = NULL;
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT)
ptr = ioremap_driver_hardened_wc(phys, size);
#elif defined(NV_IOREMAP_WC_PRESENT)
ptr = ioremap_wc(phys, size);
#else
return nv_ioremap_nocache(phys, size);
#endif
if (ptr)
{
NV_MEMDBG_ADD(ptr, size);
}
return ptr;
}
static inline void nv_iounmap(void *ptr, NvU64 size)
@@ -592,11 +591,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
{
if (node_id < 0 || node_id >= MAX_NUMNODES)
return NV_FALSE;
#if defined(NV_NODE_STATES_N_MEMORY_PRESENT)
return node_state(node_id, N_MEMORY) ? NV_TRUE : NV_FALSE;
#else
return node_state(node_id, N_HIGH_MEMORY) ? NV_TRUE : NV_FALSE;
#endif
}
#define NV_KMALLOC(ptr, size) \
@@ -606,6 +601,13 @@ static NvBool nv_numa_node_has_memory(int node_id)
NV_MEMDBG_ADD(ptr, size); \
}
#define NV_KZALLOC(ptr, size) \
{ \
(ptr) = kzalloc(size, NV_GFP_KERNEL); \
if (ptr) \
NV_MEMDBG_ADD(ptr, size); \
}
#define NV_KMALLOC_ATOMIC(ptr, size) \
{ \
(ptr) = kmalloc(size, NV_GFP_ATOMIC); \
@@ -649,6 +651,26 @@ static NvBool nv_numa_node_has_memory(int node_id)
free_pages(ptr, order); \
}
static inline pgprot_t nv_sme_clr(pgprot_t prot)
{
#if defined(__sme_clr)
return __pgprot(__sme_clr(pgprot_val(prot)));
#else
return prot;
#endif // __sme_clr
}
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
{
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
#if defined(pgprot_decrypted)
return pgprot_decrypted(prot);
#else
return nv_sme_clr(prot);
#endif // pgprot_decrypted
}
#if defined(PAGE_KERNEL_NOENC)
#if defined(__pgprot_mask)
#define NV_PAGE_KERNEL_NOCACHE_NOENC __pgprot_mask(__PAGE_KERNEL_NOCACHE)
@@ -670,7 +692,8 @@ static inline NvUPtr nv_vmap(struct page **pages, NvU32 page_count,
#if defined(PAGE_KERNEL_NOENC)
if (unencrypted)
{
prot = cached ? PAGE_KERNEL_NOENC : NV_PAGE_KERNEL_NOCACHE_NOENC;
prot = cached ? nv_adjust_pgprot(PAGE_KERNEL_NOENC, 0) :
nv_adjust_pgprot(NV_PAGE_KERNEL_NOCACHE_NOENC, 0);
}
else
#endif
@@ -683,7 +706,9 @@ static inline NvUPtr nv_vmap(struct page **pages, NvU32 page_count,
/* All memory cached in PPC64LE; can't honor 'cached' input. */
ptr = vmap(pages, page_count, VM_MAP, prot);
if (ptr)
{
NV_MEMDBG_ADD(ptr, page_count * PAGE_SIZE);
}
return (NvUPtr)ptr;
}
@@ -838,18 +863,16 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
})
#endif
#if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.4.9
#if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.18-rc1 for aarch64
#define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_stop_and_remove_bus_device(pci_dev)
#elif defined(NV_PCI_REMOVE_BUS_DEVICE_PRESENT) // introduced in 2.6
#define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_remove_bus_device(pci_dev)
#endif
#define NV_PRINT_AT(nv_debug_level,at) \
{ \
nv_printf(nv_debug_level, \
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, flags = 0x%08x, " \
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %lld, flags = 0x%08x, " \
"page_table = 0x%p\n", __FUNCTION__, __LINE__, at, \
at->num_pages, NV_ATOMIC_READ(at->usage_count), \
at->num_pages, (long long)atomic64_read(&at->usage_count), \
at->flags, at->page_table); \
}
@@ -957,26 +980,6 @@ static inline int nv_remap_page_range(struct vm_area_struct *vma,
return ret;
}
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
{
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
#if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(NV_PGPROT_DECRYPTED_PRESENT)
/*
* When AMD memory encryption is enabled, device memory mappings with the
* C-bit set read as 0xFF, so ensure the bit is cleared for user mappings.
*
* If cc_mkdec() is present, then pgprot_decrypted() can't be used.
*/
#if defined(NV_CC_MKDEC_PRESENT)
prot = __pgprot(__sme_clr(pgprot_val(vm_prot)));
#else
prot = pgprot_decrypted(prot);
#endif
#endif
return prot;
}
static inline int nv_io_remap_page_range(struct vm_area_struct *vma,
NvU64 phys_addr, NvU64 size, NvU32 extra_prot)
{
@@ -1139,11 +1142,14 @@ static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
{
nvidia_stack_t *sp = NULL;
#if defined(NVCPU_X86_64)
sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache);
if (sp == NULL)
return -ENOMEM;
sp->size = sizeof(sp->stack);
sp->top = sp->stack + sp->size;
if (rm_is_altstack_in_use())
{
sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache);
if (sp == NULL)
return -ENOMEM;
sp->size = sizeof(sp->stack);
sp->top = sp->stack + sp->size;
}
#endif
*stack = sp;
return 0;
@@ -1152,7 +1158,7 @@ static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
static inline void nv_kmem_cache_free_stack(nvidia_stack_t *stack)
{
#if defined(NVCPU_X86_64)
if (stack != NULL)
if (stack != NULL && rm_is_altstack_in_use())
{
NV_KMEM_CACHE_FREE(stack, nvidia_stack_t_cache);
}
@@ -1190,14 +1196,14 @@ typedef struct nvidia_pte_s {
typedef struct nv_alloc_s {
struct nv_alloc_s *next;
struct device *dev;
atomic_t usage_count;
atomic64_t usage_count;
struct {
NvBool contig : 1;
NvBool guest : 1;
NvBool zeroed : 1;
NvBool aliased : 1;
NvBool user : 1;
NvBool node0 : 1;
NvBool node : 1;
NvBool peer_io : 1;
NvBool physical : 1;
NvBool unencrypted : 1;
@@ -1211,6 +1217,7 @@ typedef struct nv_alloc_s {
unsigned int pid;
struct page **user_pages;
NvU64 guest_id; /* id of guest VM */
NvS32 node_id; /* Node id for memory allocation when node is set in flags */
void *import_priv;
struct sg_table *import_sgt;
} nv_alloc_t;
@@ -1323,7 +1330,7 @@ nv_dma_maps_swiotlb(struct device *dev)
* SEV memory encryption") forces SWIOTLB to be enabled when AMD SEV
* is active in all cases.
*/
if (os_sev_enabled)
if (os_cc_enabled)
swiotlb_in_use = NV_TRUE;
#endif
@@ -1386,8 +1393,7 @@ typedef struct nv_dma_map_s {
* xen_swiotlb_map_sg_attrs may try to route to the SWIOTLB. We must only use
* single-page sg elements on Xen Server.
*/
#if defined(NV_SG_ALLOC_TABLE_FROM_PAGES_PRESENT) && \
!defined(NV_DOM0_KERNEL_PRESENT)
#if !defined(NV_DOM0_KERNEL_PRESENT)
#define NV_ALLOC_DMA_SUBMAP_SCATTERLIST(dm, sm, i) \
((sg_alloc_table_from_pages(&sm->sgt, \
&dm->pages[NV_DMA_SUBMAP_IDX_TO_PAGE_IDX(i)], \
@@ -1452,13 +1458,43 @@ struct nv_dma_device {
NvBool nvlink;
};
/* Properties of the coherent link */
typedef struct coherent_link_info_s {
/* Physical Address of the GPU memory in SOC AMAP. In the case of
* baremetal OS environment it is System Physical Address(SPA) and in the case
* of virutalized OS environment it is Intermediate Physical Address(IPA) */
NvU64 gpu_mem_pa;
/* Bitmap of NUMA node ids, corresponding to the reserved PXMs,
* available for adding GPU memory to the kernel as system RAM */
DECLARE_BITMAP(free_node_bitmap, MAX_NUMNODES);
} coherent_link_info_t;
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
/*
* acpi data storage structure
*
* This structure retains the pointer to the device,
* and any other baggage we want to carry along
*
*/
typedef struct
{
nvidia_stack_t *sp;
struct acpi_device *device;
struct acpi_handle *handle;
void *notifier_data;
int notify_handler_installed;
} nv_acpi_t;
#endif
/* linux-specific version of old nv_state_t */
/* this is a general os-specific state structure. the first element *must* be
the general state structure, for the generic unix-based code */
typedef struct nv_linux_state_s {
nv_state_t nv_state;
atomic_t usage_count;
atomic64_t usage_count;
NvU32 suspend_count;
struct device *dev;
@@ -1467,6 +1503,13 @@ typedef struct nv_linux_state_s {
/* IBM-NPU info associated with this GPU */
nv_ibmnpu_info_t *npu;
/* coherent link information */
coherent_link_info_t coherent_link_info;
/* Dedicated queue to be used for removing FB memory which is onlined
* to kernel as a NUMA node. Refer Bug : 3879845*/
nv_kthread_q_t remove_numa_memory_q;
/* NUMA node information for the platforms where GPU memory is presented
* as a NUMA node to the kernel */
struct {
@@ -1477,6 +1520,7 @@ typedef struct nv_linux_state_s {
/* NUMA online/offline status for platforms that support GPU memory as
* NUMA node */
atomic_t status;
NvBool use_auto_online;
} numa_info;
nvidia_stack_t *sp[NV_DEV_STACK_COUNT];
@@ -1546,8 +1590,13 @@ typedef struct nv_linux_state_s {
/* Per-device notifier block for ACPI events */
struct notifier_block acpi_nb;
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
nv_acpi_t* nv_acpi_object;
#endif
/* Lock serializing ISRs for different SOC vectors */
nv_spinlock_t soc_isr_lock;
void *soc_bh_mutex;
struct nv_timer snapshot_timer;
nv_spinlock_t snapshot_timer_lock;
@@ -1563,6 +1612,10 @@ typedef struct nv_linux_state_s {
struct nv_dma_device dma_dev;
struct nv_dma_device niso_dma_dev;
#if defined(NV_VGPU_KVM_BUILD)
wait_queue_head_t wait;
NvS32 return_status;
#endif
} nv_linux_state_t;
extern nv_linux_state_t *nv_linux_devices;
@@ -1593,24 +1646,6 @@ extern struct rw_semaphore nv_system_pm_lock;
extern NvBool nv_ats_supported;
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
/*
* acpi data storage structure
*
* This structure retains the pointer to the device,
* and any other baggage we want to carry along
*
*/
typedef struct
{
nvidia_stack_t *sp;
struct acpi_device *device;
struct acpi_handle *handle;
int notify_handler_installed;
} nv_acpi_t;
#endif
/*
* file-private data
* hide a pointer to our data structures in a file-private ptr
@@ -1667,6 +1702,27 @@ static inline nv_linux_file_private_t *nv_get_nvlfp_from_nvfp(nv_file_private_t
#define NV_STATE_PTR(nvl) &(((nv_linux_state_t *)(nvl))->nv_state)
static inline nvidia_stack_t *nv_nvlfp_get_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which)
{
#if defined(NVCPU_X86_64)
if (rm_is_altstack_in_use())
{
down(&nvlfp->fops_sp_lock[which]);
return nvlfp->fops_sp[which];
}
#endif
return NULL;
}
static inline void nv_nvlfp_put_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which)
{
#if defined(NVCPU_X86_64)
if (rm_is_altstack_in_use())
{
up(&nvlfp->fops_sp_lock[which]);
}
#endif
}
#define NV_ATOMIC_READ(data) atomic_read(&(data))
#define NV_ATOMIC_SET(data,val) atomic_set(&(data), (val))
@@ -1739,6 +1795,7 @@ static inline NV_STATUS nv_check_gpu_state(nv_state_t *nv)
extern NvU32 NVreg_EnableUserNUMAManagement;
extern NvU32 NVreg_RegisterPCIDriver;
extern NvU32 NVreg_EnableResizableBar;
extern NvU32 num_probed_nv_devices;
extern NvU32 num_nv_devices;
@@ -1776,9 +1833,9 @@ static inline NvBool nv_alloc_release(nv_linux_file_private_t *nvlfp, nv_alloc_t
{
NV_PRINT_AT(NV_DBG_MEMINFO, at);
if (NV_ATOMIC_DEC_AND_TEST(at->usage_count))
if (atomic64_dec_and_test(&at->usage_count))
{
NV_ATOMIC_INC(at->usage_count);
atomic64_inc(&at->usage_count);
at->next = nvlfp->free_list;
nvlfp->free_list = at;
@@ -1895,20 +1952,12 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
#define NV_GET_UNUSED_FD_FLAGS(flags) (-1)
#endif
#if defined(NV_SET_CLOSE_ON_EXEC_PRESENT)
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) __set_close_on_exec(fd, fdt)
#elif defined(NV_LINUX_TIME_H_PRESENT) && defined(FD_SET)
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) FD_SET(fd, fdt->close_on_exec)
#else
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) __set_bit(fd, fdt->close_on_exec)
#endif
#define MODULE_BASE_NAME "nvidia"
#define MODULE_INSTANCE_NUMBER 0
#define MODULE_INSTANCE_STRING ""
#define MODULE_NAME MODULE_BASE_NAME MODULE_INSTANCE_STRING
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32);
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32, const char*);
static inline void nv_mutex_destroy(struct mutex *lock)
{
@@ -1941,6 +1990,11 @@ static inline int nv_set_numa_status(nv_linux_state_t *nvl, int status)
return 0;
}
static inline NvBool nv_platform_use_auto_online(nv_linux_state_t *nvl)
{
return nvl->numa_info.use_auto_online;
}
typedef enum
{
NV_NUMA_STATUS_DISABLED = 0,
@@ -2001,4 +2055,7 @@ typedef enum
#include <linux/clk-provider.h>
#endif
#define NV_EXPORT_SYMBOL(symbol) EXPORT_SYMBOL_GPL(symbol)
#define NV_CHECK_EXPORT_SYMBOL(symbol) NV_IS_EXPORT_SYMBOL_PRESENT_##symbol
#endif /* _NV_LINUX_H_ */

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2013-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -73,21 +73,4 @@
}
#endif
#if defined(NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT)
#if NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT == 3
#define nv_hlist_for_each_entry(pos, head, member) \
hlist_for_each_entry(pos, head, member)
#else
#if !defined(hlist_entry_safe)
#define hlist_entry_safe(ptr, type, member) \
(ptr) ? hlist_entry(ptr, type, member) : NULL
#endif
#define nv_hlist_for_each_entry(pos, head, member) \
for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member); \
pos; \
pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
#endif
#endif // NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT
#endif // __NV_LIST_HELPERS_H__

View File

@@ -29,28 +29,12 @@
#include <linux/spinlock.h>
#include <linux/rwsem.h>
#include <linux/sched.h> /* signal_pending, cond_resched */
#include <linux/semaphore.h>
#if defined(NV_LINUX_SCHED_SIGNAL_H_PRESENT)
#include <linux/sched/signal.h> /* signal_pending for kernels >= 4.11 */
#endif
#if defined(NV_LINUX_SEMAPHORE_H_PRESENT)
#include <linux/semaphore.h>
#else
#include <asm/semaphore.h>
#endif
#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL)
typedef raw_spinlock_t nv_spinlock_t;
#define NV_SPIN_LOCK_INIT(lock) raw_spin_lock_init(lock)
#define NV_SPIN_LOCK_IRQ(lock) raw_spin_lock_irq(lock)
#define NV_SPIN_UNLOCK_IRQ(lock) raw_spin_unlock_irq(lock)
#define NV_SPIN_LOCK_IRQSAVE(lock,flags) raw_spin_lock_irqsave(lock,flags)
#define NV_SPIN_UNLOCK_IRQRESTORE(lock,flags) raw_spin_unlock_irqrestore(lock,flags)
#define NV_SPIN_LOCK(lock) raw_spin_lock(lock)
#define NV_SPIN_UNLOCK(lock) raw_spin_unlock(lock)
#define NV_SPIN_UNLOCK_WAIT(lock) raw_spin_unlock_wait(lock)
#else
typedef spinlock_t nv_spinlock_t;
#define NV_SPIN_LOCK_INIT(lock) spin_lock_init(lock)
#define NV_SPIN_LOCK_IRQ(lock) spin_lock_irq(lock)
@@ -60,22 +44,8 @@ typedef spinlock_t nv_spinlock_t;
#define NV_SPIN_LOCK(lock) spin_lock(lock)
#define NV_SPIN_UNLOCK(lock) spin_unlock(lock)
#define NV_SPIN_UNLOCK_WAIT(lock) spin_unlock_wait(lock)
#endif
#if defined(NV_CONFIG_PREEMPT_RT)
#define NV_INIT_SEMA(sema, val) sema_init(sema,val)
#else
#if !defined(__SEMAPHORE_INITIALIZER) && defined(__COMPAT_SEMAPHORE_INITIALIZER)
#define __SEMAPHORE_INITIALIZER __COMPAT_SEMAPHORE_INITIALIZER
#endif
#define NV_INIT_SEMA(sema, val) \
{ \
struct semaphore __sema = \
__SEMAPHORE_INITIALIZER(*(sema), val); \
*(sema) = __sema; \
}
#endif
#define NV_INIT_MUTEX(mutex) NV_INIT_SEMA(mutex, 1)
#define NV_INIT_MUTEX(mutex) sema_init(mutex, 1)
static inline int nv_down_read_interruptible(struct rw_semaphore *lock)
{

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2016-2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -29,6 +29,34 @@
typedef int vm_fault_t;
#endif
/* pin_user_pages
* Presence of pin_user_pages() also implies the presence of unpin-user_page().
* Both were added in the v5.6-rc1
*
* pin_user_pages() was added by commit eddb1c228f7951d399240
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6-rc1 (2020-01-30)
*
* Removed vmas parameter from pin_user_pages() by commit 40896a02751
* ("mm/gup: remove vmas parameter from pin_user_pages()")
* in linux-next, expected in v6.5-rc1 (2023-05-17)
*
*/
#include <linux/mm.h>
#include <linux/sched.h>
#if defined(NV_PIN_USER_PAGES_PRESENT)
#if defined(NV_PIN_USER_PAGES_HAS_ARGS_VMAS)
#define NV_PIN_USER_PAGES pin_user_pages
#else
#define NV_PIN_USER_PAGES(start, nr_pages, gup_flags, pages, vmas) \
pin_user_pages(start, nr_pages, gup_flags, pages)
#endif // NV_PIN_USER_PAGES_HAS_ARGS_VMAS
#define NV_UNPIN_USER_PAGE unpin_user_page
#else
#define NV_PIN_USER_PAGES NV_GET_USER_PAGES
#define NV_UNPIN_USER_PAGE put_page
#endif // NV_PIN_USER_PAGES_PRESENT
/* get_user_pages
*
* The 8-argument version of get_user_pages was deprecated by commit
@@ -45,53 +73,73 @@ typedef int vm_fault_t;
* commit 8e50b8b07f462ab4b91bc1491b1c91bd75e4ad40 which cherry-picked the
* replacement of the write and force parameters with gup_flags
*
* Removed vmas parameter from get_user_pages() by commit 7bbf9c8c99
* ("mm/gup: remove unused vmas parameter from get_user_pages()")
* in linux-next, expected in v6.5-rc1 (2023-05-17)
*
*/
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE)
#if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages, vmas) \
get_user_pages(start, nr_pages, flags, pages)
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS)
#define NV_GET_USER_PAGES get_user_pages
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE)
#define NV_GET_USER_PAGES(start, nr_pages, write, force, pages, vmas) \
get_user_pages(current, current->mm, start, nr_pages, write, force, pages, vmas)
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS_VMAS)
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages, vmas) \
get_user_pages(current, current->mm, start, nr_pages, flags, pages, vmas)
#else
#include <linux/mm.h>
#include <linux/sched.h>
static inline long NV_GET_USER_PAGES(unsigned long start,
unsigned long nr_pages,
int write,
int force,
unsigned int flags,
struct page **pages,
struct vm_area_struct **vmas)
{
unsigned int flags = 0;
int write = flags & FOLL_WRITE;
int force = flags & FOLL_FORCE;
if (write)
flags |= FOLL_WRITE;
if (force)
flags |= FOLL_FORCE;
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS)
return get_user_pages(current, current->mm, start, nr_pages, flags,
pages, vmas);
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE_VMAS)
return get_user_pages(start, nr_pages, write, force, pages, vmas);
#else
// remaining defination(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
return get_user_pages(start, nr_pages, flags, pages, vmas);
#endif
// NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS
return get_user_pages(current, current->mm, start, nr_pages, write,
force, pages, vmas);
#endif // NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE_VMAS
}
#endif
#endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS
/* pin_user_pages_remote
*
* pin_user_pages_remote() was added by commit eddb1c228f7951d399240
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6 (2020-01-30)
*
* pin_user_pages_remote() removed 'tsk' parameter by commit
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
* in v5.9-rc1 (2020-08-11). *
*
* Removed unused vmas parameter from pin_user_pages_remote() by commit
* 83bcc2e132("mm/gup: remove unused vmas parameter from pin_user_pages_remote()")
* in linux-next, expected in v6.5-rc1 (2023-05-14)
*
*/
#if defined(NV_PIN_USER_PAGES_REMOTE_PRESENT)
#if defined(NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK_VMAS)
#define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
pin_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
#elif defined(NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_VMAS)
#define NV_PIN_USER_PAGES_REMOTE pin_user_pages_remote
#else
#define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
pin_user_pages_remote(mm, start, nr_pages, flags, pages, locked)
#endif // NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK_VMAS
#else
#define NV_PIN_USER_PAGES_REMOTE NV_GET_USER_PAGES_REMOTE
#endif // NV_PIN_USER_PAGES_REMOTE_PRESENT
/*
* get_user_pages_remote() was added by commit 1e9877902dc7
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6 (2016-02-12).
*
* The very next commit cde70140fed8 ("mm/gup: Overload get_user_pages()
* functions") deprecated the 8-argument version of get_user_pages for the
* non-remote case (calling get_user_pages with current and current->mm).
*
* The guidelines are: call NV_GET_USER_PAGES_REMOTE if you need the 8-argument
* version that uses something other than current and current->mm. Use
* NV_GET_USER_PAGES if you are refering to current and current->mm.
*
* Note that get_user_pages_remote() requires the caller to hold a reference on
* the task_struct (if non-NULL and if this API has tsk argument) and the mm_struct.
* This will always be true when using current and current->mm. If the kernel passes
@@ -110,69 +158,66 @@ typedef int vm_fault_t;
* commit 64019a2e467a ("mm/gup: remove task_struct pointer for
* all gup code") in v5.9-rc1 (2020-08-11).
*
* Removed vmas parameter from get_user_pages_remote() by commit a4bde14d549
* ("mm/gup: remove vmas parameter from get_user_pages_remote()")
* in linux-next, expected in v6.5-rc1 (2023-05-14)
*
*/
#if defined(NV_GET_USER_PAGES_REMOTE_PRESENT)
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE)
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
get_user_pages_remote(mm, start, nr_pages, flags, pages, locked)
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED_VMAS)
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED_VMAS)
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_VMAS)
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas)
#else
static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk,
struct mm_struct *mm,
// NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE_VMAS
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
int write,
int force,
unsigned int flags,
struct page **pages,
struct vm_area_struct **vmas)
struct vm_area_struct **vmas,
int *locked)
{
unsigned int flags = 0;
int write = flags & FOLL_WRITE;
int force = flags & FOLL_FORCE;
if (write)
flags |= FOLL_WRITE;
if (force)
flags |= FOLL_FORCE;
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS)
return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
return get_user_pages_remote(NULL, mm, start, nr_pages, write, force,
pages, vmas);
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED)
return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
pages, vmas, NULL);
#else
// remaining defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
return get_user_pages_remote(mm, start, nr_pages, flags,
pages, vmas, NULL);
#endif
}
#endif
#endif // NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED
#else
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE)
#define NV_GET_USER_PAGES_REMOTE get_user_pages
#else
#include <linux/mm.h>
#include <linux/sched.h>
static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk,
struct mm_struct *mm,
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS)
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
int write,
int force,
unsigned int flags,
struct page **pages,
struct vm_area_struct **vmas)
struct vm_area_struct **vmas,
int *locked)
{
unsigned int flags = 0;
int write = flags & FOLL_WRITE;
int force = flags & FOLL_FORCE;
if (write)
flags |= FOLL_WRITE;
if (force)
flags |= FOLL_FORCE;
return get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
return get_user_pages(NULL, mm, start, nr_pages, write, force, pages, vmas);
}
#endif
#endif
#else
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
get_user_pages(NULL, mm, start, nr_pages, flags, pages, vmas)
#endif // NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS
#endif // NV_GET_USER_PAGES_REMOTE_PRESENT
/*
* The .virtual_address field was effectively renamed to .address, by these
@@ -247,4 +292,37 @@ static inline struct rw_semaphore *nv_mmap_get_lock(struct mm_struct *mm)
#endif
}
#define NV_CAN_CALL_VMA_START_WRITE 1
#if !NV_CAN_CALL_VMA_START_WRITE
/*
* Commit 45ad9f5290dc updated vma_start_write() to call __vma_start_write().
*/
void nv_vma_start_write(struct vm_area_struct *);
#endif
static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
{
#if !NV_CAN_CALL_VMA_START_WRITE
nv_vma_start_write(vma);
ACCESS_PRIVATE(vma, __vm_flags) |= flags;
#elif defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
vm_flags_set(vma, flags);
#else
vma->vm_flags |= flags;
#endif
}
static inline void nv_vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags)
{
#if !NV_CAN_CALL_VMA_START_WRITE
nv_vma_start_write(vma);
ACCESS_PRIVATE(vma, __vm_flags) &= ~flags;
#elif defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
vm_flags_clear(vma, flags);
#else
vma->vm_flags &= ~flags;
#endif
}
#endif // __NV_MM_H__

View File

@@ -27,16 +27,8 @@
#include <linux/pci.h>
#include "nv-linux.h"
#if defined(NV_DEV_IS_PCI_PRESENT)
#define nv_dev_is_pci(dev) dev_is_pci(dev)
#else
/*
* Non-PCI devices are only supported on kernels which expose the
* dev_is_pci() function. For older kernels, we only support PCI
* devices, hence returning true to take all the PCI code paths.
*/
#define nv_dev_is_pci(dev) (true)
#endif
#define NV_GPU_BAR1 1
#define NV_GPU_BAR3 3
int nv_pci_register_driver(void);
void nv_pci_unregister_driver(void);

View File

@@ -74,21 +74,8 @@ typedef struct file_operations nv_proc_ops_t;
__entry; \
})
/*
* proc_mkdir_mode exists in Linux 2.6.9, but isn't exported until Linux 3.0.
* Use the older interface instead unless the newer interface is necessary.
*/
#if defined(NV_PROC_REMOVE_PRESENT)
# define NV_PROC_MKDIR_MODE(name, mode, parent) \
proc_mkdir_mode(name, mode, parent)
#else
# define NV_PROC_MKDIR_MODE(name, mode, parent) \
({ \
struct proc_dir_entry *__entry; \
__entry = create_proc_entry(name, mode, parent); \
__entry; \
})
#endif
#define NV_CREATE_PROC_DIR(name,parent) \
({ \
@@ -104,16 +91,6 @@ typedef struct file_operations nv_proc_ops_t;
#define NV_PDE_DATA(inode) PDE_DATA(inode)
#endif
#if defined(NV_PROC_REMOVE_PRESENT)
# define NV_REMOVE_PROC_ENTRY(entry) \
proc_remove(entry);
#else
# define NV_REMOVE_PROC_ENTRY(entry) \
remove_proc_entry(entry->name, entry->parent);
#endif
void nv_procfs_unregister_all(struct proc_dir_entry *entry,
struct proc_dir_entry *delimiter);
#define NV_DEFINE_SINGLE_PROCFS_FILE_HELPER(name, lock) \
static int nv_procfs_open_##name( \
struct inode *inode, \

View File

@@ -54,8 +54,6 @@ void nv_free_contig_pages (nv_alloc_t *);
NV_STATUS nv_alloc_system_pages (nv_state_t *, nv_alloc_t *);
void nv_free_system_pages (nv_alloc_t *);
void nv_address_space_init_once (struct address_space *mapping);
int nv_uvm_init (void);
void nv_uvm_exit (void);
NV_STATUS nv_uvm_suspend (void);

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2017-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -63,4 +63,13 @@ static inline void nv_timer_setup(struct nv_timer *nv_timer,
#endif
}
static inline void nv_timer_delete_sync(struct timer_list *timer)
{
#if !defined(NV_BSD) && NV_IS_EXPORT_SYMBOL_PRESENT_timer_delete_sync
timer_delete_sync(timer);
#else
del_timer_sync(timer);
#endif
}
#endif // __NV_TIMER_H__

View File

@@ -40,6 +40,7 @@
#include <nvstatus.h>
#include "nv_stdarg.h"
#include <nv-caps.h>
#include <nv-firmware.h>
#include <nv-ioctl.h>
#include <nvmisc.h>
@@ -160,8 +161,14 @@ typedef enum _TEGRASOC_WHICH_CLK
TEGRASOC_WHICH_CLK_MAUD,
TEGRASOC_WHICH_CLK_AZA_2XBIT,
TEGRASOC_WHICH_CLK_AZA_BIT,
TEGRA234_CLK_MIPI_CAL,
TEGRA234_CLK_UART_FST_MIPI_CAL,
TEGRASOC_WHICH_CLK_MIPI_CAL,
TEGRASOC_WHICH_CLK_UART_FST_MIPI_CAL,
TEGRASOC_WHICH_CLK_SOR0_DIV,
TEGRASOC_WHICH_CLK_DISP_ROOT,
TEGRASOC_WHICH_CLK_HUB_ROOT,
TEGRASOC_WHICH_CLK_PLLA_DISP,
TEGRASOC_WHICH_CLK_PLLA_DISPHUB,
TEGRASOC_WHICH_CLK_PLLA,
TEGRASOC_WHICH_CLK_MAX, // TEGRASOC_WHICH_CLK_MAX is defined for boundary checks only.
} TEGRASOC_WHICH_CLK;
@@ -304,10 +311,11 @@ typedef struct nv_alloc_mapping_context_s {
typedef enum
{
NV_SOC_IRQ_DISPLAY_TYPE,
NV_SOC_IRQ_DISPLAY_TYPE = 0x1,
NV_SOC_IRQ_DPAUX_TYPE,
NV_SOC_IRQ_GPIO_TYPE,
NV_SOC_IRQ_HDACODEC_TYPE,
NV_SOC_IRQ_TCPC2DISP_TYPE,
NV_SOC_IRQ_INVALID_TYPE
} nv_soc_irq_type_t;
@@ -322,6 +330,7 @@ typedef struct nv_soc_irq_info_s {
NvU32 gpio_num;
NvU32 dpaux_instance;
} irq_data;
NvS32 ref_count;
} nv_soc_irq_info_t;
#define NV_MAX_SOC_IRQS 6
@@ -338,6 +347,12 @@ typedef struct nv_soc_irq_info_s {
/* DMA-capable device data, defined by kernel interface layer */
typedef struct nv_dma_device nv_dma_device_t;
typedef struct nv_phys_addr_range
{
NvU64 addr;
NvU64 len;
} nv_phys_addr_range_t;
typedef struct nv_state_t
{
void *priv; /* private data */
@@ -368,6 +383,7 @@ typedef struct nv_state_t
nv_aperture_t *mipical_regs;
nv_aperture_t *fb, ud;
nv_aperture_t *simregs;
nv_aperture_t *emc_regs;
NvU32 num_dpaux_instance;
NvU32 interrupt_line;
@@ -376,9 +392,11 @@ typedef struct nv_state_t
NvS32 current_soc_irq;
NvU32 num_soc_irqs;
NvU32 hdacodec_irq;
NvU32 tcpc2disp_irq;
NvU8 *soc_dcb_blob;
NvU32 soc_dcb_size;
NvU32 disp_sw_soc_chip_id;
NvBool soc_is_dpalt_mode_supported;
NvU32 igpu_stall_irq[NV_IGPU_MAX_STALL_IRQS];
NvU32 igpu_nonstall_irq;
@@ -430,9 +448,6 @@ typedef struct nv_state_t
/* Variable to force allocation of 32-bit addressable memory */
NvBool force_dma32_alloc;
/* Variable to track if device has entered dynamic power state */
NvBool dynamic_power_entered;
/* PCI power state should be D0 during system suspend */
NvBool d0_state_in_suspend;
@@ -457,6 +472,9 @@ typedef struct nv_state_t
/* Bool to check if ISO iommu enabled */
NvBool iso_iommu_present;
/* Bool to check if NISO iommu enabled */
NvBool niso_iommu_present;
/* Bool to check if dma-buf is supported */
NvBool dma_buf_supported;
@@ -465,6 +483,11 @@ typedef struct nv_state_t
/* Check if NVPCF DSM function is implemented under NVPCF or GPU device scope */
NvBool nvpcf_dsm_in_gpu_scope;
/* Bool to check if the device received a shutdown notification */
NvBool is_shutdown;
/* Bool to check if the GPU has a coherent sysmem link */
NvBool coherent;
} nv_state_t;
// These define need to be in sync with defines in system.h
@@ -473,6 +496,10 @@ typedef struct nv_state_t
#define OS_TYPE_SUNOS 0x3
#define OS_TYPE_VMWARE 0x4
#define NVFP_TYPE_NONE 0x0
#define NVFP_TYPE_REFCOUNTED 0x1
#define NVFP_TYPE_REGISTERED 0x2
struct nv_file_private_t
{
NvHandle *handles;
@@ -482,12 +509,21 @@ struct nv_file_private_t
nv_file_private_t *ctl_nvfp;
void *ctl_nvfp_priv;
NvU32 register_or_refcount;
//
// True if a client or an event was ever allocated on this fd.
// If false, RMAPI cleanup is skipped.
//
NvBool bCleanupRmapi;
};
// Forward define the gpu ops structures
typedef struct gpuSession *nvgpuSessionHandle_t;
typedef struct gpuDevice *nvgpuDeviceHandle_t;
typedef struct gpuAddressSpace *nvgpuAddressSpaceHandle_t;
typedef struct gpuTsg *nvgpuTsgHandle_t;
typedef struct UvmGpuTsgAllocParams_tag nvgpuTsgAllocParams_t;
typedef struct gpuChannel *nvgpuChannelHandle_t;
typedef struct UvmGpuChannelInfo_tag *nvgpuChannelInfo_t;
typedef struct UvmGpuChannelAllocParams_tag nvgpuChannelAllocParams_t;
@@ -513,8 +549,9 @@ typedef struct UvmGpuChannelResourceBindParams_tag *nvgpuChannelResourceBindPar
typedef struct UvmGpuPagingChannelAllocParams_tag nvgpuPagingChannelAllocParams_t;
typedef struct UvmGpuPagingChannel_tag *nvgpuPagingChannelHandle_t;
typedef struct UvmGpuPagingChannelInfo_tag *nvgpuPagingChannelInfo_t;
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU32, NvU64 *, NvU32, NvU64, NvU64);
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64);
typedef enum UvmPmaGpuMemoryType_tag nvgpuGpuMemoryType_t;
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU64, NvU64 *, NvU32, NvU64, NvU64, nvgpuGpuMemoryType_t);
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemoryType_t);
/*
* flags
@@ -566,12 +603,6 @@ typedef enum
NV_POWER_STATE_RUNNING
} nv_power_state_t;
typedef enum
{
NV_FIRMWARE_GSP,
NV_FIRMWARE_GSP_LOG
} nv_firmware_t;
#define NV_PRIMARY_VGA(nv) ((nv)->primary_vga)
#define NV_IS_CTL_DEVICE(nv) ((nv)->flags & NV_FLAG_CONTROL)
@@ -584,15 +615,19 @@ typedef enum
#define NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv) \
(((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0)
/*
* For console setup by EFI GOP, the base address is BAR1.
* For console setup by VBIOS, the base address is BAR2 + 16MB.
*/
#define NV_IS_CONSOLE_MAPPED(nv, addr) \
(((addr) == (nv)->bars[NV_GPU_BAR_INDEX_FB].cpu_address) || \
((addr) == ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000)))
#define NV_SOC_IS_ISO_IOMMU_PRESENT(nv) \
((nv)->iso_iommu_present)
/*
* NVIDIA ACPI event ID to be passed into the core NVIDIA driver for
* AC/DC event.
*/
#define NV_SYSTEM_ACPI_BATTERY_POWER_EVENT 0x8002
#define NV_SOC_IS_NISO_IOMMU_PRESENT(nv) \
((nv)->niso_iommu_present)
/*
* GPU add/remove events
*/
@@ -604,8 +639,6 @@ typedef enum
* to core NVIDIA driver for ACPI events.
*/
#define NV_SYSTEM_ACPI_EVENT_VALUE_DISPLAY_SWITCH_DEFAULT 0
#define NV_SYSTEM_ACPI_EVENT_VALUE_POWER_EVENT_AC 0
#define NV_SYSTEM_ACPI_EVENT_VALUE_POWER_EVENT_BATTERY 1
#define NV_SYSTEM_ACPI_EVENT_VALUE_DOCK_EVENT_UNDOCKED 0
#define NV_SYSTEM_ACPI_EVENT_VALUE_DOCK_EVENT_DOCKED 1
@@ -616,14 +649,18 @@ typedef enum
#define NV_EVAL_ACPI_METHOD_NVIF 0x01
#define NV_EVAL_ACPI_METHOD_WMMX 0x02
#define NV_I2C_CMD_READ 1
#define NV_I2C_CMD_WRITE 2
#define NV_I2C_CMD_SMBUS_READ 3
#define NV_I2C_CMD_SMBUS_WRITE 4
#define NV_I2C_CMD_SMBUS_QUICK_WRITE 5
#define NV_I2C_CMD_SMBUS_QUICK_READ 6
#define NV_I2C_CMD_SMBUS_BLOCK_READ 7
#define NV_I2C_CMD_SMBUS_BLOCK_WRITE 8
typedef enum {
NV_I2C_CMD_READ = 1,
NV_I2C_CMD_WRITE,
NV_I2C_CMD_SMBUS_READ,
NV_I2C_CMD_SMBUS_WRITE,
NV_I2C_CMD_SMBUS_QUICK_WRITE,
NV_I2C_CMD_SMBUS_QUICK_READ,
NV_I2C_CMD_SMBUS_BLOCK_READ,
NV_I2C_CMD_SMBUS_BLOCK_WRITE,
NV_I2C_CMD_BLOCK_READ,
NV_I2C_CMD_BLOCK_WRITE
} nv_i2c_cmd_t;
// Flags needed by OSAllocPagesNode
#define NV_ALLOC_PAGES_NODE_NONE 0x0
@@ -639,12 +676,15 @@ typedef enum
static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((offset >= nv->regs->cpu_address) &&
((offset + (length - 1)) >= offset) &&
((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
}
static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->fb) && (offset >= nv->fb->cpu_address) &&
return ((nv->fb) && (nv->fb->size != 0) &&
(offset >= nv->fb->cpu_address) &&
((offset + (length - 1)) >= offset) &&
((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
}
@@ -652,6 +692,7 @@ static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
(offset >= nv->ud.cpu_address) &&
((offset + (length - 1)) >= offset) &&
((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
}
@@ -660,6 +701,7 @@ static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
(offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
((offset + (length - 1)) >= offset) &&
((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
}
@@ -731,7 +773,7 @@ nv_state_t* NV_API_CALL nv_get_ctl_state (void);
void NV_API_CALL nv_set_dma_address_size (nv_state_t *, NvU32 );
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU32, NvU32, NvU64, NvU64 *, void **);
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvBool, NvU32, NvBool, NvBool, NvU64 *, void **);
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
NV_STATUS NV_API_CALL nv_free_pages (nv_state_t *, NvU32, NvBool, NvU32, void *);
NV_STATUS NV_API_CALL nv_register_user_pages (nv_state_t *, NvU64, NvU64 *, void *, void **);
@@ -793,13 +835,14 @@ NV_STATUS NV_API_CALL nv_pci_trigger_recovery (nv_state_t *);
NvBool NV_API_CALL nv_requires_dma_remap (nv_state_t *);
NvBool NV_API_CALL nv_is_rm_firmware_active(nv_state_t *);
const void*NV_API_CALL nv_get_firmware(nv_state_t *, nv_firmware_t, const void **, NvU32 *);
const void*NV_API_CALL nv_get_firmware(nv_state_t *, nv_firmware_type_t, nv_firmware_chip_family_t, const void **, NvU32 *);
void NV_API_CALL nv_put_firmware(const void *);
nv_file_private_t* NV_API_CALL nv_get_file_private(NvS32, NvBool, void **);
void NV_API_CALL nv_put_file_private(void *);
NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU32 *, NvS32 *);
NV_STATUS NV_API_CALL nv_get_egm_info(nv_state_t *, NvU64 *, NvU64 *, NvS32 *);
NV_STATUS NV_API_CALL nv_get_ibmnpu_genreg_info(nv_state_t *, NvU64 *, NvU64 *, void**);
NV_STATUS NV_API_CALL nv_get_ibmnpu_relaxed_ordering_mode(nv_state_t *nv, NvBool *mode);
@@ -834,10 +877,13 @@ NV_STATUS NV_API_CALL nv_acquire_fabric_mgmt_cap (int, int*);
int NV_API_CALL nv_cap_drv_init(void);
void NV_API_CALL nv_cap_drv_exit(void);
NvBool NV_API_CALL nv_is_gpu_accessible(nv_state_t *);
NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
NvU32 NV_API_CALL nv_get_os_type(void);
void NV_API_CALL nv_get_updated_emu_seg(NvU32 *start, NvU32 *end);
void NV_API_CALL nv_get_screen_info(nv_state_t *, NvU64 *, NvU16 *, NvU16 *, NvU16 *, NvU16 *, NvU64 *);
struct dma_buf;
typedef struct nv_dma_buf nv_dma_buf_t;
struct drm_gem_object;
@@ -888,6 +934,7 @@ NV_STATUS NV_API_CALL rm_ioctl (nvidia_stack_t *, nv_state_t *
NvBool NV_API_CALL rm_isr (nvidia_stack_t *, nv_state_t *, NvU32 *);
void NV_API_CALL rm_isr_bh (nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_isr_bh_unlocked (nvidia_stack_t *, nv_state_t *);
NvBool NV_API_CALL rm_is_msix_allowed (nvidia_stack_t *, nv_state_t *);
NV_STATUS NV_API_CALL rm_power_management (nvidia_stack_t *, nv_state_t *, nv_pm_action_t);
NV_STATUS NV_API_CALL rm_stop_user_channels (nvidia_stack_t *, nv_state_t *);
NV_STATUS NV_API_CALL rm_restart_user_channels (nvidia_stack_t *, nv_state_t *);
@@ -906,6 +953,7 @@ NV_STATUS NV_API_CALL rm_write_registry_string (nvidia_stack_t *, nv_state_t *
void NV_API_CALL rm_parse_option_string (nvidia_stack_t *, const char *);
char* NV_API_CALL rm_remove_spaces (const char *);
char* NV_API_CALL rm_string_token (char **, const char);
void NV_API_CALL rm_vgpu_vfio_set_driver_vm(nvidia_stack_t *, NvBool);
NV_STATUS NV_API_CALL rm_run_rc_callback (nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_execute_work_item (nvidia_stack_t *, void *);
@@ -922,11 +970,13 @@ NvBool NV_API_CALL rm_is_supported_pci_device(NvU8 pci_class,
void NV_API_CALL rm_i2c_remove_adapters (nvidia_stack_t *, nv_state_t *);
NvBool NV_API_CALL rm_i2c_is_smbus_capable (nvidia_stack_t *, nv_state_t *, void *);
NV_STATUS NV_API_CALL rm_i2c_transfer (nvidia_stack_t *, nv_state_t *, void *, NvU8, NvU8, NvU8, NvU32, NvU8 *);
NV_STATUS NV_API_CALL rm_i2c_transfer (nvidia_stack_t *, nv_state_t *, void *, nv_i2c_cmd_t, NvU8, NvU8, NvU32, NvU8 *);
NV_STATUS NV_API_CALL rm_perform_version_check (nvidia_stack_t *, void *, NvU32);
NV_STATUS NV_API_CALL rm_system_event (nvidia_stack_t *, NvU32, NvU32);
void NV_API_CALL rm_power_source_change_event (nvidia_stack_t *, NvU32);
void NV_API_CALL rm_request_dnotifier_state (nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_disable_gpu_state_persistence (nvidia_stack_t *sp, nv_state_t *);
NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU64 *, NvU64 *, NvU64 *, NvU64 *, NvU64, NvU64, NvU64, NvU64, void (*)(void *), void *);
@@ -937,19 +987,19 @@ NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, N
NV_STATUS NV_API_CALL rm_p2p_register_callback (nvidia_stack_t *, NvU64, NvU64, NvU64, void *, void (*)(void *), void *);
NV_STATUS NV_API_CALL rm_p2p_put_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, void *);
NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *);
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU32, NvU32, NvU64 *, void **);
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *);
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
void NV_API_CALL rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
NV_STATUS NV_API_CALL rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64, NvU64 *);
NV_STATUS NV_API_CALL rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64);
NV_STATUS NV_API_CALL rm_dma_buf_get_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle *, NvHandle *, NvHandle *, void **);
NV_STATUS NV_API_CALL rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64, void *, nv_phys_addr_range_t **, NvU32 *);
void NV_API_CALL rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, nv_phys_addr_range_t **, NvU32);
NV_STATUS NV_API_CALL rm_dma_buf_get_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle *, NvHandle *, NvHandle *, void **, NvBool *);
void NV_API_CALL rm_dma_buf_put_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, void *);
NV_STATUS NV_API_CALL rm_log_gpu_crash (nv_stack_t *, nv_state_t *);
void NV_API_CALL rm_kernel_rmapi_op(nvidia_stack_t *sp, void *ops_cmd);
NvBool NV_API_CALL rm_get_device_remove_flag(nvidia_stack_t *sp, NvU32 gpu_id);
NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults_unlocked(nvidia_stack_t *, nv_state_t *, NvU32 *);
NV_STATUS NV_API_CALL rm_gpu_handle_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
NvBool NV_API_CALL rm_gpu_need_4k_page_isolation(nv_state_t *);
NvBool NV_API_CALL rm_is_chipset_io_coherent(nv_stack_t *);
NvBool NV_API_CALL rm_init_event_locks(nvidia_stack_t *, nv_state_t *);
@@ -975,24 +1025,29 @@ const char* NV_API_CALL rm_get_dynamic_power_management_status(nvidia_stack_t *,
const char* NV_API_CALL rm_get_gpu_gcx_support(nvidia_stack_t *, nv_state_t *, NvBool);
void NV_API_CALL rm_acpi_notify(nvidia_stack_t *, nv_state_t *, NvU32);
NV_STATUS NV_API_CALL rm_get_clientnvpcf_power_limits(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *);
void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
NvBool NV_API_CALL rm_is_altstack_in_use(void);
/* vGPU VFIO specific functions */
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32, NvBool *);
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32);
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 **, NvBool);
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *);
NV_STATUS NV_API_CALL nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32);
NV_STATUS NV_API_CALL nv_vgpu_get_sparse_mmap(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 **, NvU64 **, NvU32 *);
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *,
NvU64 *, NvU64 *, NvU32 *, NvU8 *);
NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *);
NV_STATUS NV_API_CALL nv_vgpu_update_request(nvidia_stack_t *, const NvU8 *, NvU32, NvU64 *, NvU64 *, const char *);
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *);
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *, NvU32, NvBool *);
NV_STATUS NV_API_CALL nv_gpu_unbind_event(nvidia_stack_t *, NvU32, NvBool *);
NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_access_params_t*);
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);
void NV_API_CALL nv_flush_coherent_cpu_cache_range(nv_state_t *nv, NvU64 cpu_virtual, NvU64 size);
#if defined(NV_VMWARE)
const void* NV_API_CALL rm_get_firmware(nv_firmware_type_t fw_type, const void **fw_buf, NvU32 *fw_size);
#endif
/* Callbacks should occur roughly every 10ms. */
#define NV_SNAPSHOT_TIMER_HZ 100
void NV_API_CALL nv_start_snapshot_timer(void (*snapshot_callback)(void *context));
@@ -1004,6 +1059,16 @@ static inline const NvU8 *nv_get_cached_uuid(nv_state_t *nv)
return nv->nv_uuid_cache.valid ? nv->nv_uuid_cache.uuid : NULL;
}
/* nano second resolution timer callback structure */
typedef struct nv_nano_timer nv_nano_timer_t;
/* nano timer functions */
void NV_API_CALL nv_create_nano_timer(nv_state_t *, void *pTmrEvent, nv_nano_timer_t **);
void NV_API_CALL nv_start_nano_timer(nv_state_t *nv, nv_nano_timer_t *, NvU64 timens);
NV_STATUS NV_API_CALL rm_run_nano_timer_callback(nvidia_stack_t *, nv_state_t *, void *pTmrEvent);
void NV_API_CALL nv_cancel_nano_timer(nv_state_t *, nv_nano_timer_t *);
void NV_API_CALL nv_destroy_nano_timer(nv_state_t *nv, nv_nano_timer_t *);
#if defined(NVCPU_X86_64)
static inline NvU64 nv_rdtsc(void)

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -327,14 +327,18 @@ NV_STATUS nvUvmInterfaceGetPmaObject(uvmGpuDeviceHandle device,
// Mirrors pmaEvictPagesCb_t, see its documentation in pma.h.
typedef NV_STATUS (*uvmPmaEvictPagesCallback)(void *callbackData,
NvU32 pageSize,
NvU64 pageSize,
NvU64 *pPages,
NvU32 count,
NvU64 physBegin,
NvU64 physEnd);
NvU64 physEnd,
UVM_PMA_GPU_MEMORY_TYPE mem_type);
// Mirrors pmaEvictRangeCb_t, see its documentation in pma.h.
typedef NV_STATUS (*uvmPmaEvictRangeCallback)(void *callbackData, NvU64 physBegin, NvU64 physEnd);
typedef NV_STATUS (*uvmPmaEvictRangeCallback)(void *callbackData,
NvU64 physBegin,
NvU64 physEnd,
UVM_PMA_GPU_MEMORY_TYPE mem_type);
/*******************************************************************************
nvUvmInterfacePmaRegisterEvictionCallbacks
@@ -386,7 +390,7 @@ void nvUvmInterfacePmaUnregisterEvictionCallbacks(void *pPma);
*/
NV_STATUS nvUvmInterfacePmaAllocPages(void *pPma,
NvLength pageCount,
NvU32 pageSize,
NvU64 pageSize,
UvmPmaAllocationOptions *pPmaAllocOptions,
NvU64 *pPages);
@@ -415,7 +419,7 @@ NV_STATUS nvUvmInterfacePmaAllocPages(void *pPma,
NV_STATUS nvUvmInterfacePmaPinPages(void *pPma,
NvU64 *pPages,
NvLength pageCount,
NvU32 pageSize,
NvU64 pageSize,
NvU32 flags);
/*******************************************************************************
@@ -443,7 +447,7 @@ NV_STATUS nvUvmInterfacePmaPinPages(void *pPma,
NV_STATUS nvUvmInterfacePmaUnpinPages(void *pPma,
NvU64 *pPages,
NvLength pageCount,
NvU32 pageSize);
NvU64 pageSize);
/*******************************************************************************
nvUvmInterfaceMemoryFree
@@ -484,7 +488,7 @@ void nvUvmInterfaceMemoryFree(uvmGpuAddressSpaceHandle vaSpace,
void nvUvmInterfacePmaFreePages(void *pPma,
NvU64 *pPages,
NvLength pageCount,
NvU32 pageSize,
NvU64 pageSize,
NvU32 flags);
/*******************************************************************************
@@ -503,7 +507,7 @@ void nvUvmInterfacePmaFreePages(void *pPma,
NV_STATUS nvUvmInterfaceMemoryCpuMap(uvmGpuAddressSpaceHandle vaSpace,
UvmGpuPointer gpuPointer,
NvLength length, void **cpuPtr,
NvU32 pageSize);
NvU64 pageSize);
/*******************************************************************************
uvmGpuMemoryCpuUnmap
@@ -513,16 +517,59 @@ NV_STATUS nvUvmInterfaceMemoryCpuMap(uvmGpuAddressSpaceHandle vaSpace,
void nvUvmInterfaceMemoryCpuUnMap(uvmGpuAddressSpaceHandle vaSpace,
void *cpuPtr);
/*******************************************************************************
nvUvmInterfaceTsgAllocate
This function allocates a Time-Slice Group (TSG).
allocParams must contain an engineIndex as TSGs need to be bound to an
engine type at allocation time. The possible values are [0,
UVM_COPY_ENGINE_COUNT_MAX) for CE engine type. Notably only the copy engines
that have UvmGpuCopyEngineCaps::supported set to true can be allocated.
Note that TSG is not supported on all GPU architectures for all engine
types, e.g., pre-Volta GPUs only support TSG for the GR/Compute engine type.
On devices that do not support HW TSGs on the requested engine, this API is
still required, i.e., a TSG handle is required in
nvUvmInterfaceChannelAllocate(), due to information stored in it necessary
for channel allocation. However, when HW TSGs aren't supported, a TSG handle
is essentially a "fake" TSG with no HW scheduling impact.
tsg is filled with the address of the corresponding TSG handle.
Arguments:
vaSpace[IN] - VA space linked to a client and a device under which
the TSG is allocated.
allocParams[IN] - structure with allocation settings.
tsg[OUT] - pointer to the new TSG handle.
Error codes:
NV_ERR_GENERIC
NV_ERR_INVALID_ARGUMENT
NV_ERR_NO_MEMORY
NV_ERR_NOT_SUPPORTED
*/
NV_STATUS nvUvmInterfaceTsgAllocate(uvmGpuAddressSpaceHandle vaSpace,
const UvmGpuTsgAllocParams *allocParams,
uvmGpuTsgHandle *tsg);
/*******************************************************************************
nvUvmInterfaceTsgDestroy
This function destroys a given TSG.
Arguments:
tsg[IN] - Tsg handle
*/
void nvUvmInterfaceTsgDestroy(uvmGpuTsgHandle tsg);
/*******************************************************************************
nvUvmInterfaceChannelAllocate
This function will allocate a channel bound to a copy engine
This function will allocate a channel bound to a copy engine(CE) or a SEC2
engine.
allocParams must contain an engineIndex as channels need to be bound to an
engine type at allocation time. The possible values are [0,
UVM_COPY_ENGINE_COUNT_MAX), but notably only the copy engines that have
UvmGpuCopyEngineCaps::supported set to true can be allocated. This struct
also contains information relative to GPFIFO and GPPut.
allocParams contains information relative to GPFIFO and GPPut.
channel is filled with the address of the corresponding channel handle.
@@ -532,17 +579,18 @@ void nvUvmInterfaceMemoryCpuUnMap(uvmGpuAddressSpaceHandle vaSpace,
Host channel submission doorbell.
Arguments:
vaSpace[IN] - VA space linked to a client and a device under which
the channel will be allocated
tsg[IN] - Time-Slice Group that the channel will be a member.
allocParams[IN] - structure with allocation settings
channel[OUT] - pointer to the new channel handle
channelInfo[OUT] - structure filled with channel information
Error codes:
NV_ERR_GENERIC
NV_ERR_INVALID_ARGUMENT
NV_ERR_NO_MEMORY
NV_ERR_NOT_SUPPORTED
*/
NV_STATUS nvUvmInterfaceChannelAllocate(uvmGpuAddressSpaceHandle vaSpace,
NV_STATUS nvUvmInterfaceChannelAllocate(const uvmGpuTsgHandle tsg,
const UvmGpuChannelAllocParams *allocParams,
uvmGpuChannelHandle *channel,
UvmGpuChannelInfo *channelInfo);
@@ -550,7 +598,7 @@ NV_STATUS nvUvmInterfaceChannelAllocate(uvmGpuAddressSpaceHandle vaSpace,
/*******************************************************************************
nvUvmInterfaceChannelDestroy
This function destroys a given channel
This function destroys a given channel.
Arguments:
channel[IN] - channel handle
@@ -571,7 +619,7 @@ void nvUvmInterfaceChannelDestroy(uvmGpuChannelHandle channel);
NV_ERR_NO_MEMORY
*/
NV_STATUS nvUvmInterfaceQueryCaps(uvmGpuDeviceHandle device,
UvmGpuCaps * caps);
UvmGpuCaps *caps);
/*******************************************************************************
nvUvmInterfaceQueryCopyEnginesCaps
@@ -917,6 +965,23 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
void *pFaultBuffer,
NvU32 *numFaults);
/*******************************************************************************
nvUvmInterfaceFlushReplayableFaultBuffer
This function sends an RPC to GSP in order to flush the HW replayable fault buffer.
NOTES:
- This function DOES NOT acquire the RM API or GPU locks. That is because
it is called during fault servicing, which could produce deadlocks.
Arguments:
device[IN] - Device handle associated with the gpu
Error codes:
NV_ERR_INVALID_ARGUMENT
*/
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
/*******************************************************************************
nvUvmInterfaceInitAccessCntrInfo
@@ -925,13 +990,15 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
Arguments:
device[IN] - Device handle associated with the gpu
pAccessCntrInfo[OUT] - Information provided by RM for access counter handling
accessCntrIndex[IN] - Access counter index
Error codes:
NV_ERR_GENERIC
NV_ERR_INVALID_ARGUMENT
*/
NV_STATUS nvUvmInterfaceInitAccessCntrInfo(uvmGpuDeviceHandle device,
UvmGpuAccessCntrInfo *pAccessCntrInfo);
UvmGpuAccessCntrInfo *pAccessCntrInfo,
NvU32 accessCntrIndex);
/*******************************************************************************
nvUvmInterfaceDestroyAccessCntrInfo
@@ -1050,11 +1117,13 @@ void nvUvmInterfaceP2pObjectDestroy(uvmGpuSessionHandle session,
hMemory[IN] - Memory handle.
offset [IN] - Offset from the beginning of the allocation
where PTE mappings should begin.
Should be aligned with pagesize associated
Should be aligned with mappingPagesize
in gpuExternalMappingInfo associated
with the allocation.
size [IN] - Length of the allocation for which PTEs
should be built.
Should be aligned with pagesize associated
Should be aligned with mappingPagesize
in gpuExternalMappingInfo associated
with the allocation.
size = 0 will be interpreted as the total size
of the allocation.
@@ -1360,8 +1429,6 @@ void nvUvmInterfacePagingChannelsUnmap(uvmGpuAddressSpaceHandle srcVaSpace,
a. pre-allocated stack
b. the fact that internal RPC infrastructure doesn't acquire GPU lock.
Therefore, locking is the caller's responsibility.
- This function DOES NOT sleep (does not allocate memory or acquire locks)
so it can be invoked while holding a spinlock.
Arguments:
channel[IN] - paging channel handle obtained via
@@ -1381,4 +1448,243 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
char *methodStream,
NvU32 methodStreamSize);
/*******************************************************************************
CSL Interface and Locking
The following functions do not acquire the RM API or GPU locks and must not be called
concurrently with the same UvmCslContext parameter in different threads. The caller must
guarantee this exclusion.
* nvUvmInterfaceCslRotateIv
* nvUvmInterfaceCslEncrypt
* nvUvmInterfaceCslDecrypt
* nvUvmInterfaceCslSign
* nvUvmInterfaceCslQueryMessagePool
* nvUvmInterfaceCslIncrementIv
*/
/*******************************************************************************
nvUvmInterfaceCslInitContext
Allocates and initializes a CSL context for a given secure channel.
The lifetime of the context is the same as the lifetime of the secure channel
it is paired with.
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
channel[IN] - Handle to a secure channel.
Error codes:
NV_ERR_INVALID_STATE - The system is not operating in Confidential Compute mode.
NV_ERR_INVALID_CHANNEL - The associated channel is not a secure channel.
NV_ERR_IN_USE - The context has already been initialized.
*/
NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
uvmGpuChannelHandle channel);
/*******************************************************************************
nvUvmInterfaceDeinitCslContext
Securely deinitializes and clears the contents of a context.
If context is already deinitialized then function returns immediately.
Arguments:
uvmCslContext[IN] - The CSL context.
*/
void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
/*******************************************************************************
nvUvmInterfaceCslRotateIv
Rotates the IV for a given channel and operation.
This function will rotate the IV on both the CPU and the GPU.
Outstanding messages that have been encrypted by the GPU should first be
decrypted before calling this function with operation equal to
UVM_CSL_OPERATION_DECRYPT. Similarly, outstanding messages that have been
encrypted by the CPU should first be decrypted before calling this function
with operation equal to UVM_CSL_OPERATION_ENCRYPT. For a given operation
the channel must be idle before calling this function. This function can be
called regardless of the value of the IV's message counter.
See "CSL Interface and Locking" for locking requirements.
This function does not perform dynamic memory allocation.
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
operation[IN] - Either
- UVM_CSL_OPERATION_ENCRYPT
- UVM_CSL_OPERATION_DECRYPT
Error codes:
NV_ERR_INSUFFICIENT_RESOURCES - The rotate operation would cause a counter
to overflow.
NV_ERR_INVALID_ARGUMENT - Invalid value for operation.
*/
NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
UvmCslOperation operation);
/*******************************************************************************
nvUvmInterfaceCslEncrypt
Encrypts data and produces an authentication tag.
Auth, input, and output buffers must not overlap. If they do then calling
this function produces undefined behavior. Performance is typically
maximized when the input and output buffers are 16-byte aligned. This is
natural alignment for AES block.
The encryptIV can be obtained from nvUvmInterfaceCslIncrementIv.
However, it is optional. If it is NULL, the next IV in line will be used.
See "CSL Interface and Locking" for locking requirements.
This function does not perform dynamic memory allocation.
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
bufferSize[IN] - Size of the input and output buffers in
units of bytes. Value can range from 1 byte
to (2^32) - 1 bytes.
inputBuffer[IN] - Address of plaintext input buffer.
encryptIv[IN/OUT] - IV to use for encryption. Can be NULL.
outputBuffer[OUT] - Address of ciphertext output buffer.
authTagBuffer[OUT] - Address of authentication tag buffer.
Its size is UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES.
Error codes:
NV_ERR_INVALID_ARGUMENT - The size of the data is 0 bytes.
- The encryptIv has already been used.
*/
NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
NvU32 bufferSize,
NvU8 const *inputBuffer,
UvmCslIv *encryptIv,
NvU8 *outputBuffer,
NvU8 *authTagBuffer);
/*******************************************************************************
nvUvmInterfaceCslDecrypt
Verifies the authentication tag and decrypts data.
Auth, input, and output buffers must not overlap. If they do then calling
this function produces undefined behavior. Performance is typically
maximized when the input and output buffers are 16-byte aligned. This is
natural alignment for AES block.
See "CSL Interface and Locking" for locking requirements.
This function does not perform dynamic memory allocation.
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
bufferSize[IN] - Size of the input and output buffers in units of bytes.
Value can range from 1 byte to (2^32) - 1 bytes.
decryptIv[IN] - IV used to decrypt the ciphertext. Its value can either be given by
nvUvmInterfaceCslIncrementIv, or, if NULL, the CSL context's
internal counter is used.
inputBuffer[IN] - Address of ciphertext input buffer.
outputBuffer[OUT] - Address of plaintext output buffer.
addAuthData[IN] - Address of the plaintext additional authenticated data used to
calculate the authentication tag. Can be NULL.
addAuthDataSize[IN] - Size of the additional authenticated data in units of bytes.
Value can range from 1 byte to (2^32) - 1 bytes.
This parameter is ignored if addAuthData is NULL.
authTagBuffer[IN] - Address of authentication tag buffer.
Its size is UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES.
Error codes:
NV_ERR_INSUFFICIENT_RESOURCES - The decryption operation would cause a
counter overflow to occur.
NV_ERR_INVALID_ARGUMENT - The size of the data is 0 bytes.
NV_ERR_INVALID_DATA - Verification of the authentication tag fails.
*/
NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
NvU32 bufferSize,
NvU8 const *inputBuffer,
UvmCslIv const *decryptIv,
NvU8 *outputBuffer,
NvU8 const *addAuthData,
NvU32 addAuthDataSize,
NvU8 const *authTagBuffer);
/*******************************************************************************
nvUvmInterfaceCslSign
Generates an authentication tag for secure work launch.
Auth and input buffers must not overlap. If they do then calling this function produces
undefined behavior.
See "CSL Interface and Locking" for locking requirements.
This function does not perform dynamic memory allocation.
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
bufferSize[IN] - Size of the input buffer in units of bytes.
Value can range from 1 byte to (2^32) - 1 bytes.
inputBuffer[IN] - Address of plaintext input buffer.
authTagBuffer[OUT] - Address of authentication tag buffer.
Its size is UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES.
Error codes:
NV_ERR_INSUFFICIENT_RESOURCES - The signing operation would cause a counter overflow to occur.
NV_ERR_INVALID_ARGUMENT - The size of the data is 0 bytes.
*/
NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
NvU32 bufferSize,
NvU8 const *inputBuffer,
NvU8 *authTagBuffer);
/*******************************************************************************
nvUvmInterfaceCslQueryMessagePool
Returns the number of messages that can be encrypted before the message counter will overflow.
See "CSL Interface and Locking" for locking requirements.
This function does not perform dynamic memory allocation.
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
operation[IN] - Either UVM_CSL_OPERATION_ENCRYPT or UVM_CSL_OPERATION_DECRYPT.
messageNum[OUT] - Number of messages left before overflow.
Error codes:
NV_ERR_INVALID_ARGUMENT - The value of the operation parameter is illegal.
*/
NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
UvmCslOperation operation,
NvU64 *messageNum);
/*******************************************************************************
nvUvmInterfaceCslIncrementIv
Increments the message counter by the specified amount.
If iv is non-NULL then the incremented value is returned.
If operation is UVM_CSL_OPERATION_ENCRYPT then the returned IV's "freshness" bit is set and
can be used in nvUvmInterfaceCslEncrypt. If operation is UVM_CSL_OPERATION_DECRYPT then
the returned IV can be used in nvUvmInterfaceCslDecrypt.
See "CSL Interface and Locking" for locking requirements.
This function does not perform dynamic memory allocation.
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
operation[IN] - Either
- UVM_CSL_OPERATION_ENCRYPT
- UVM_CSL_OPERATION_DECRYPT
increment[IN] - The amount by which the IV is incremented. Can be 0.
iv[out] - If non-NULL, a buffer to store the incremented IV.
Error codes:
NV_ERR_INVALID_ARGUMENT - The value of the operation parameter is illegal.
NV_ERR_INSUFFICIENT_RESOURCES - Incrementing the message counter would result
in an overflow.
*/
NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
UvmCslOperation operation,
NvU64 increment,
UvmCslIv *iv);
#endif // _NV_UVM_INTERFACE_H_

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -92,6 +92,7 @@ typedef unsigned long long UvmGpuPointer;
typedef struct uvmGpuSession_tag *uvmGpuSessionHandle; // gpuSessionHandle
typedef struct uvmGpuDevice_tag *uvmGpuDeviceHandle; // gpuDeviceHandle
typedef struct uvmGpuAddressSpace_tag *uvmGpuAddressSpaceHandle; // gpuAddressSpaceHandle
typedef struct uvmGpuTsg_tag *uvmGpuTsgHandle; // gpuTsgHandle
typedef struct uvmGpuChannel_tag *uvmGpuChannelHandle; // gpuChannelHandle
typedef struct uvmGpuCopyEngine_tag *uvmGpuCopyEngineHandle; // gpuObjectHandle
@@ -110,7 +111,7 @@ typedef struct UvmGpuMemoryInfo_tag
NvBool deviceDescendant;
// Out: Page size associated with the phys alloc.
NvU32 pageSize;
NvU64 pageSize;
// Out: Set to TRUE, if the allocation is contiguous.
NvBool contig;
@@ -280,6 +281,16 @@ typedef struct UvmGpuChannelInfo_tag
// to kick off the new work.
//
volatile NvU32 *pWorkSubmissionToken;
// GPU VAs of both GPFIFO and GPPUT are needed in Confidential Computing
// so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
NvU64 gpFifoGpuVa;
NvU64 gpPutGpuVa;
NvU64 gpGetGpuVa;
// GPU VA of work submission offset is needed in Confidential Computing
// so CE channels can ring doorbell of other channels as required for
// WLC/LCIC work submission
NvU64 workSubmissionOffsetGpuVa;
} UvmGpuChannelInfo;
typedef enum
@@ -292,6 +303,17 @@ typedef enum
UVM_BUFFER_LOCATION_VID = 2,
} UVM_BUFFER_LOCATION;
typedef struct UvmGpuTsgAllocParams_tag
{
// Interpreted as UVM_GPU_CHANNEL_ENGINE_TYPE
NvU32 engineType;
// Index of the engine the TSG is bound to.
// Ignored if engineType is anything other than
// UVM_GPU_CHANNEL_ENGINE_TYPE_CE.
NvU32 engineIndex;
} UvmGpuTsgAllocParams;
typedef struct UvmGpuChannelAllocParams_tag
{
NvU32 numGpFifoEntries;
@@ -299,13 +321,6 @@ typedef struct UvmGpuChannelAllocParams_tag
// The next two fields store UVM_BUFFER_LOCATION values
NvU32 gpFifoLoc;
NvU32 gpPutLoc;
// Index of the engine the channel will be bound to
// ignored if engineType is anything other than UVM_GPU_CHANNEL_ENGINE_TYPE_CE
NvU32 engineIndex;
// interpreted as UVM_GPU_CHANNEL_ENGINE_TYPE
NvU32 engineType;
} UvmGpuChannelAllocParams;
typedef struct UvmGpuPagingChannelAllocParams_tag
@@ -376,40 +391,16 @@ typedef enum
typedef struct UvmGpuCaps_tag
{
NvU32 sysmemLink; // UVM_LINK_TYPE
NvU32 sysmemLinkRateMBps; // See UvmGpuP2PCapsParams::totalLinkLineRateMBps
// If numaEnabled is NV_TRUE, then the system address of allocated GPU
// memory can be converted to struct pages. See
// UvmGpuInfo::systemMemoryWindowStart.
NvBool numaEnabled;
NvU32 numaNodeId;
// On ATS systems, GPUs connected to different CPU sockets can have peer
// traffic. They are called indirect peers. However, indirect peers are
// mapped using sysmem aperture. In order to disambiguate the location of a
// specific memory address, each GPU maps its memory to a different window
// in the System Physical Address (SPA) space. The following fields contain
// the base + size of such window for the GPU. systemMemoryWindowSize
// different than 0 indicates that the window is valid.
//
// - If the window is valid, then we can map GPU memory to the CPU as
// cache-coherent by adding the GPU address to the window start.
// - If numaEnabled is NV_TRUE, then we can also convert the system
// addresses of allocated GPU memory to struct pages.
//
// TODO: Bug 1986868: fix window start computation for SIMICS
NvU64 systemMemoryWindowStart;
NvU64 systemMemoryWindowSize;
// This tells if the GPU is connected to NVSwitch. On systems with NVSwitch
// all GPUs are connected to it. If connectedToSwitch is NV_TRUE,
// nvswitchMemoryWindowStart tells the base address for the GPU in the
// NVSwitch address space. It is used when creating PTEs of memory mappings
// to NVSwitch peers.
NvBool connectedToSwitch;
NvU64 nvswitchMemoryWindowStart;
} UvmGpuCaps;
typedef struct UvmGpuAddressSpaceInfo_tag
{
NvU32 bigPageSize;
NvU64 bigPageSize;
NvBool atsEnabled;
@@ -430,12 +421,14 @@ typedef struct UvmGpuAddressSpaceInfo_tag
typedef struct UvmGpuAllocInfo_tag
{
NvU64 gpuPhysOffset; // Returns gpuPhysOffset if contiguous requested
NvU32 pageSize; // default is RM big page size - 64K or 128 K" else use 4K or 2M
NvU64 pageSize; // default is RM big page size - 64K or 128 K" else use 4K or 2M
NvU64 alignment; // Virtual alignment
NvBool bContiguousPhysAlloc; // Flag to request contiguous physical allocation
NvBool bMemGrowsDown; // Causes RM to reserve physical heap from top of FB
NvBool bPersistentVidmem; // Causes RM to allocate persistent video memory
NvHandle hPhysHandle; // Handle for phys allocation either provided or retrieved
NvBool bUnprotected; // Allocation to be made in unprotected memory whenever
// SEV or GPU CC modes are enabled. Ignored otherwise
} UvmGpuAllocInfo;
typedef enum
@@ -516,6 +509,13 @@ typedef struct UvmGpuExternalMappingInfo_tag
// In: Size of the buffer to store PTEs (in bytes).
NvU64 pteBufferSize;
// In: Page size for mapping
// If this field is passed as 0, the page size
// of the allocation is used for mapping.
// nvUvmInterfaceGetExternalAllocPtes must pass
// this field as zero.
NvU64 mappingPageSize;
// In: Pointer to a buffer to store PTEs.
// Out: The interface will fill the buffer with PTEs
NvU64 *pteBuffer;
@@ -566,8 +566,11 @@ typedef struct UvmPlatformInfo_tag
// Out: ATS (Address Translation Services) is supported
NvBool atsSupported;
// Out: AMD SEV (Secure Encrypted Virtualization) is enabled
NvBool sevEnabled;
// Out: True if HW trusted execution, such as AMD's SEV-SNP or Intel's TDX,
// is enabled in the VM, indicating that Confidential Computing must be
// also enabled in the GPU(s); these two security features are either both
// enabled, or both disabled.
NvBool confComputingEnabled;
} UvmPlatformInfo;
typedef struct UvmGpuClientInfo_tag
@@ -577,6 +580,20 @@ typedef struct UvmGpuClientInfo_tag
NvHandle hSmcPartRef;
} UvmGpuClientInfo;
typedef enum
{
UVM_GPU_CONF_COMPUTE_MODE_NONE,
UVM_GPU_CONF_COMPUTE_MODE_APM,
UVM_GPU_CONF_COMPUTE_MODE_HCC,
UVM_GPU_CONF_COMPUTE_MODE_COUNT
} UvmGpuConfComputeMode;
typedef struct UvmGpuConfComputeCaps_tag
{
// Out: GPU's confidential compute mode
UvmGpuConfComputeMode mode;
} UvmGpuConfComputeCaps;
#define UVM_GPU_NAME_LENGTH 0x40
typedef struct UvmGpuInfo_tag
@@ -641,6 +658,31 @@ typedef struct UvmGpuInfo_tag
UvmGpuClientInfo smcUserClientInfo;
// Confidential Compute capabilities of this GPU
UvmGpuConfComputeCaps gpuConfComputeCaps;
// UVM_LINK_TYPE
NvU32 sysmemLink;
// See UvmGpuP2PCapsParams::totalLinkLineRateMBps
NvU32 sysmemLinkRateMBps;
// On coherent systems each GPU maps its memory to a window in the System
// Physical Address (SPA) space. The following fields describe that window.
//
// systemMemoryWindowSize > 0 indicates that the window is valid. meaning
// that GPU memory can be mapped by the CPU as cache-coherent by adding the
// GPU address to the window start.
NvU64 systemMemoryWindowStart;
NvU64 systemMemoryWindowSize;
// This tells if the GPU is connected to NVSwitch. On systems with NVSwitch
// all GPUs are connected to it. If connectedToSwitch is NV_TRUE,
// nvswitchMemoryWindowStart tells the base address for the GPU in the
// NVSwitch address space. It is used when creating PTEs of memory mappings
// to NVSwitch peers.
NvBool connectedToSwitch;
NvU64 nvswitchMemoryWindowStart;
} UvmGpuInfo;
typedef struct UvmGpuFbInfo_tag
@@ -683,6 +725,9 @@ typedef struct UvmPmaStatistics_tag
volatile NvU64 numPages2m; // PMA-wide 2MB pages count across all regions
volatile NvU64 numFreePages64k; // PMA-wide free 64KB page count across all regions
volatile NvU64 numFreePages2m; // PMA-wide free 2MB pages count across all regions
volatile NvU64 numPages2mProtected; // PMA-wide 2MB pages count in protected memory
volatile NvU64 numFreePages64kProtected; // PMA-wide free 64KB page count in protected memory
volatile NvU64 numFreePages2mProtected; // PMA-wide free 2MB pages count in protected memory
} UvmPmaStatistics;
/*******************************************************************************
@@ -790,24 +835,92 @@ struct UvmOpsUvmEvents
#endif
};
#define UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES 32
#define UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES 16
typedef union UvmFaultMetadataPacket_tag
{
struct {
NvU8 authTag[UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES];
NvBool valid;
};
// padding to 32Bytes
NvU8 _padding[32];
} UvmFaultMetadataPacket;
// This struct shall not be accessed nor modified directly by UVM as it is
// entirely managed by the RM layer
typedef struct UvmCslContext_tag
{
struct ccslContext_t *ctx;
void *nvidia_stack;
} UvmCslContext;
typedef struct UvmGpuFaultInfo_tag
{
struct
{
// Register mappings obtained from RM
// Fault buffer GET register mapping.
//
// When Confidential Computing is enabled, GET refers to the shadow
// buffer (see bufferAddress below), and not to the actual HW buffer.
// In this setup, writes of GET (by UVM) do not result on re-evaluation
// of any interrupt condition.
volatile NvU32* pFaultBufferGet;
// Fault buffer PUT register mapping.
//
// When Confidential Computing is enabled, PUT refers to the shadow
// buffer (see bufferAddress below), and not to the actual HW buffer.
// In this setup, writes of PUT (by GSP-RM) do not result on
// re-evaluation of any interrupt condition.
volatile NvU32* pFaultBufferPut;
// Note: this variable is deprecated since buffer overflow is not a separate
// register from future chips.
// Note: this variable is deprecated since buffer overflow is not a
// separate register from future chips.
volatile NvU32* pFaultBufferInfo;
// Register mapping used to clear a replayable fault interrupt in
// Turing+ GPUs.
volatile NvU32* pPmcIntr;
// Register mapping used to enable replayable fault interrupts.
volatile NvU32* pPmcIntrEnSet;
// Register mapping used to disable replayable fault interrupts.
volatile NvU32* pPmcIntrEnClear;
// Register used to enable, or disable, faults on prefetches.
volatile NvU32* pPrefetchCtrl;
// Replayable fault interrupt mask identifier.
NvU32 replayableFaultMask;
// fault buffer cpu mapping and size
void* bufferAddress;
// Fault buffer CPU mapping
void* bufferAddress;
//
// When Confidential Computing is disabled, the mapping points to the
// actual HW fault buffer.
//
// When Confidential Computing is enabled, the mapping points to a
// copy of the HW fault buffer. This "shadow buffer" is maintained
// by GSP-RM.
// Size, in bytes, of the fault buffer pointed by bufferAddress.
NvU32 bufferSize;
// Mapping pointing to the start of the fault buffer metadata containing
// a 16Byte authentication tag and a valid byte. Always NULL when
// Confidential Computing is disabled.
UvmFaultMetadataPacket *bufferMetadata;
// CSL context used for performing decryption of replayable faults when
// Confidential Computing is enabled.
UvmCslContext cslCtx;
// Indicates whether UVM owns the replayable fault buffer.
// The value of this field is always NV_TRUE When Confidential Computing
// is disabled.
NvBool bUvmOwnsHwFaultBuffer;
} replayable;
struct
{
@@ -826,10 +939,24 @@ typedef struct UvmGpuFaultInfo_tag
// Preallocated stack for functions called from the UVM isr bottom half
void *isr_bh_sp;
// Used only when Hopper Confidential Compute is enabled
// Register mappings obtained from RM
volatile NvU32* pFaultBufferPut;
// Used only when Hopper Confidential Compute is enabled
// Cached get index of the non-replayable shadow buffer
NvU32 shadowBufferGet;
// See replayable.bufferMetadata
UvmFaultMetadataPacket *shadowBufferMetadata;
} nonReplayable;
NvHandle faultBufferHandle;
struct Device *pDevice;
} UvmGpuFaultInfo;
struct Device;
typedef struct UvmGpuPagingChannel_tag
{
struct gpuDevice *device;
@@ -837,6 +964,7 @@ typedef struct UvmGpuPagingChannel_tag
NvHandle channelHandle;
NvHandle errorNotifierHandle;
void *pushStreamSp;
struct Device *pDevice;
} UvmGpuPagingChannel, *UvmGpuPagingChannelHandle;
typedef struct UvmGpuAccessCntrInfo_tag
@@ -860,12 +988,6 @@ typedef struct UvmGpuAccessCntrInfo_tag
void* bufferAddress;
NvU32 bufferSize;
NvHandle accessCntrBufferHandle;
// The Notification address in the access counter notification msg does not
// contain the correct upper bits 63-47 for GPA-based notifications. RM
// provides us with the correct offset to be added.
// See Bug 1803015
NvU64 baseDmaSysmemAddr;
} UvmGpuAccessCntrInfo;
typedef enum
@@ -897,7 +1019,18 @@ typedef struct UvmGpuAccessCntrConfig_tag
NvU32 threshold;
} UvmGpuAccessCntrConfig;
//
// When modifying this enum, make sure they are compatible with the mirrored
// MEMORY_PROTECTION enum in phys_mem_allocator.h.
//
typedef enum UvmPmaGpuMemoryType_tag
{
UVM_PMA_GPU_MEMORY_TYPE_UNPROTECTED = 0,
UVM_PMA_GPU_MEMORY_TYPE_PROTECTED = 1
} UVM_PMA_GPU_MEMORY_TYPE;
typedef UvmGpuChannelInfo gpuChannelInfo;
typedef UvmGpuTsgAllocParams gpuTsgAllocParams;
typedef UvmGpuChannelAllocParams gpuChannelAllocParams;
typedef UvmGpuCaps gpuCaps;
typedef UvmGpuCopyEngineCaps gpuCeCaps;
@@ -922,4 +1055,16 @@ typedef UvmGpuPagingChannelInfo gpuPagingChannelInfo;
typedef UvmGpuPagingChannelAllocParams gpuPagingChannelAllocParams;
typedef UvmPmaAllocationOptions gpuPmaAllocationOptions;
typedef struct UvmCslIv
{
NvU8 iv[12];
NvU8 fresh;
} UvmCslIv;
typedef enum UvmCslOperation
{
UVM_CSL_OPERATION_ENCRYPT,
UVM_CSL_OPERATION_DECRYPT
} UvmCslOperation;
#endif // _NV_UVM_TYPES_H_

View File

@@ -150,9 +150,7 @@ typedef struct NvSyncPointFenceRec {
|* *|
\***************************************************************************/
#if !defined(XAPIGEN) /* NvOffset is XAPIGEN builtin type, so skip typedef */
typedef NvU64 NvOffset; /* GPU address */
#endif
#define NvOffset_HI32(n) ((NvU32)(((NvU64)(n)) >> 32))
#define NvOffset_LO32(n) ((NvU32)((NvU64)(n)))

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2014-2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -29,6 +29,7 @@
#include <nvlimits.h>
#define NVKMS_MAX_SUBDEVICES NV_MAX_SUBDEVICES
#define NVKMS_MAX_HEADS_PER_DISP NV_MAX_HEADS
#define NVKMS_LEFT 0
#define NVKMS_RIGHT 1
@@ -530,4 +531,78 @@ typedef struct {
NvBool noncoherent;
} NvKmsDispIOCoherencyModes;
enum NvKmsInputColorSpace {
/* Unknown colorspace; no de-gamma will be applied */
NVKMS_INPUT_COLORSPACE_NONE = 0,
/* Linear, Rec.709 [-0.5, 7.5) */
NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR = 1,
/* PQ, Rec.2020 unity */
NVKMS_INPUT_COLORSPACE_BT2100_PQ = 2,
};
enum NvKmsOutputTf {
/*
* NVKMS itself won't apply any OETF (clients are still
* free to provide a custom OLUT)
*/
NVKMS_OUTPUT_TF_NONE = 0,
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR = 1,
NVKMS_OUTPUT_TF_PQ = 2,
};
/*!
* HDR Static Metadata Type1 Descriptor as per CEA-861.3 spec.
* This is expected to match exactly with the spec.
*/
struct NvKmsHDRStaticMetadata {
/*!
* Color primaries of the data.
* These are coded as unsigned 16-bit values in units of 0.00002,
* where 0x0000 represents zero and 0xC350 represents 1.0000.
*/
struct {
NvU16 x, y;
} displayPrimaries[3];
/*!
* White point of colorspace data.
* These are coded as unsigned 16-bit values in units of 0.00002,
* where 0x0000 represents zero and 0xC350 represents 1.0000.
*/
struct {
NvU16 x, y;
} whitePoint;
/**
* Maximum mastering display luminance.
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
*/
NvU16 maxDisplayMasteringLuminance;
/*!
* Minimum mastering display luminance.
* This value is coded as an unsigned 16-bit value in units of
* 0.0001 cd/m2, where 0x0001 represents 0.0001 cd/m2 and 0xFFFF
* represents 6.5535 cd/m2.
*/
NvU16 minDisplayMasteringLuminance;
/*!
* Maximum content light level.
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
*/
NvU16 maxCLL;
/*!
* Maximum frame-average light level.
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
*/
NvU16 maxFALL;
};
#endif /* NVKMS_API_TYPES_H */

View File

@@ -86,8 +86,9 @@ enum NvKmsSurfaceMemoryFormat {
NvKmsSurfaceMemoryFormatY12___V12U12_N420 = 32,
NvKmsSurfaceMemoryFormatY8___U8___V8_N444 = 33,
NvKmsSurfaceMemoryFormatY8___U8___V8_N420 = 34,
NvKmsSurfaceMemoryFormatRF16GF16BF16XF16 = 35,
NvKmsSurfaceMemoryFormatMin = NvKmsSurfaceMemoryFormatI8,
NvKmsSurfaceMemoryFormatMax = NvKmsSurfaceMemoryFormatY8___U8___V8_N420,
NvKmsSurfaceMemoryFormatMax = NvKmsSurfaceMemoryFormatRF16GF16BF16XF16,
};
typedef struct NvKmsSurfaceMemoryFormatInfo {

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2015-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -149,6 +149,7 @@ struct NvKmsKapiDeviceResourcesInfo {
} caps;
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
NvBool supportsHDR[NVKMS_KAPI_LAYER_MAX];
};
#define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
@@ -164,8 +165,6 @@ struct NvKmsKapiConnectorInfo {
NvU32 physicalIndex;
NvU32 headMask;
NvKmsConnectorSignalFormat signalFormat;
NvKmsConnectorType type;
@@ -193,6 +192,7 @@ struct NvKmsKapiStaticDisplayInfo {
NvU32 numPossibleClones;
NvKmsKapiDisplay possibleCloneHandles[NVKMS_KAPI_MAX_CLONE_DISPLAYS];
NvU32 headMask;
};
struct NvKmsKapiSyncpt {
@@ -218,6 +218,11 @@ struct NvKmsKapiLayerConfig {
struct NvKmsRRParams rrParams;
struct NvKmsKapiSyncpt syncptParams;
struct NvKmsHDRStaticMetadata hdrMetadata;
NvBool hdrMetadataSpecified;
enum NvKmsOutputTf tf;
NvU8 minPresentInterval;
NvBool tearing;
@@ -226,6 +231,8 @@ struct NvKmsKapiLayerConfig {
NvS16 dstX, dstY;
NvU16 dstWidth, dstHeight;
enum NvKmsInputColorSpace inputColorSpace;
};
struct NvKmsKapiLayerRequestedConfig {
@@ -277,6 +284,8 @@ struct NvKmsKapiHeadModeSetConfig {
NvKmsKapiDisplay displays[NVKMS_KAPI_MAX_CLONE_DISPLAYS];
struct NvKmsKapiDisplayMode mode;
NvBool vrrEnabled;
};
struct NvKmsKapiHeadRequestedConfig {
@@ -368,6 +377,9 @@ struct NvKmsKapiDynamicDisplayParams {
/* [OUT] Connection status */
NvU32 connected;
/* [OUT] VRR status */
NvBool vrrSupported;
/* [IN/OUT] EDID of connected monitor/ Input to override EDID */
struct {
NvU16 bufferSize;
@@ -484,6 +496,47 @@ struct NvKmsKapiFunctionsTable {
*/
void (*releaseOwnership)(struct NvKmsKapiDevice *device);
/*!
* Grant modeset permissions for a display to fd. Only one (dispIndex, head,
* display) is currently supported.
*
* \param [in] fd fd from opening /dev/nvidia-modeset.
*
* \param [in] device A device returned by allocateDevice().
*
* \param [in] head head of display.
*
* \param [in] display The display to grant.
*
* \return NV_TRUE on success, NV_FALSE on failure.
*/
NvBool (*grantPermissions)
(
NvS32 fd,
struct NvKmsKapiDevice *device,
NvU32 head,
NvKmsKapiDisplay display
);
/*!
* Revoke permissions previously granted. Only one (dispIndex, head,
* display) is currently supported.
*
* \param [in] device A device returned by allocateDevice().
*
* \param [in] head head of display.
*
* \param [in] display The display to revoke.
*
* \return NV_TRUE on success, NV_FALSE on failure.
*/
NvBool (*revokePermissions)
(
struct NvKmsKapiDevice *device,
NvU32 head,
NvKmsKapiDisplay display
);
/*!
* Registers for notification, via
* NvKmsKapiAllocateDeviceParams::eventCallback, of the events specified
@@ -1020,6 +1073,21 @@ struct NvKmsKapiFunctionsTable {
NvU64 *pPages
);
/*!
* Check if this memory object can be scanned out for display.
*
* \param [in] device A device allocated using allocateDevice().
*
* \param [in] memory The memory object to check for display support.
*
* \return NV_TRUE if this memory can be displayed, NV_FALSE if not.
*/
NvBool (*isMemoryValidForDisplay)
(
const struct NvKmsKapiDevice *device,
const struct NvKmsKapiMemory *memory
);
/*
* Import SGT as a memory handle.
*

View File

@@ -25,7 +25,7 @@
//
// This file was generated with FINN, an NVIDIA coding tool.
// Source file: nvlimits.finn
// Source file: nvlimits.finn
//

View File

@@ -234,12 +234,14 @@ extern "C" {
#define DRF_EXTENT(drf) (drf##_HIGH_FIELD)
#define DRF_SHIFT(drf) ((drf##_LOW_FIELD) % 32U)
#define DRF_SHIFT_RT(drf) ((drf##_HIGH_FIELD) % 32U)
#define DRF_SIZE(drf) ((drf##_HIGH_FIELD)-(drf##_LOW_FIELD)+1U)
#define DRF_MASK(drf) (0xFFFFFFFFU >> (31U - ((drf##_HIGH_FIELD) % 32U) + ((drf##_LOW_FIELD) % 32U)))
#else
#define DRF_BASE(drf) (NV_FALSE?drf) // much better
#define DRF_EXTENT(drf) (NV_TRUE?drf) // much better
#define DRF_SHIFT(drf) (((NvU32)DRF_BASE(drf)) % 32U)
#define DRF_SHIFT_RT(drf) (((NvU32)DRF_EXTENT(drf)) % 32U)
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
#define DRF_MASK(drf) (0xFFFFFFFFU>>(31U - DRF_SHIFT_RT(drf) + DRF_SHIFT(drf)))
#endif
#define DRF_DEF(d,r,f,c) (((NvU32)(NV ## d ## r ## f ## c))<<DRF_SHIFT(NV ## d ## r ## f))
@@ -249,12 +251,12 @@ extern "C" {
#define DRF_EXTENT(drf) (1?drf) // much better
#define DRF_SHIFT(drf) ((DRF_ISBIT(0,drf)) % 32)
#define DRF_SHIFT_RT(drf) ((DRF_ISBIT(1,drf)) % 32)
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
#define DRF_MASK(drf) (0xFFFFFFFFU>>(31-((DRF_ISBIT(1,drf)) % 32)+((DRF_ISBIT(0,drf)) % 32)))
#define DRF_DEF(d,r,f,c) ((NV ## d ## r ## f ## c)<<DRF_SHIFT(NV ## d ## r ## f))
#define DRF_NUM(d,r,f,n) (((n)&DRF_MASK(NV ## d ## r ## f))<<DRF_SHIFT(NV ## d ## r ## f))
#endif
#define DRF_SHIFTMASK(drf) (DRF_MASK(drf)<<(DRF_SHIFT(drf)))
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
#define DRF_VAL(d,r,f,v) (((v)>>DRF_SHIFT(NV ## d ## r ## f))&DRF_MASK(NV ## d ## r ## f))
#endif
@@ -692,6 +694,42 @@ nvPrevPow2_U64(const NvU64 x )
} \
}
//
// Bug 4851259: Newly added functions must be hidden from certain HS-signed
// ucode compilers to avoid signature mismatch.
//
#ifndef NVDEC_1_0
/*!
* Returns the position of nth set bit in the given mask.
*
* Returns -1 if mask has fewer than n bits set.
*
* n is 0 indexed and has valid values 0..31 inclusive, so "zeroth" set bit is
* the first set LSB.
*
* Example, if mask = 0x000000F0u and n = 1, the return value will be 5.
* Example, if mask = 0x000000F0u and n = 4, the return value will be -1.
*/
static NV_FORCEINLINE NvS32
nvGetNthSetBitIndex32(NvU32 mask, NvU32 n)
{
NvU32 seenSetBitsCount = 0;
NvS32 index;
FOR_EACH_INDEX_IN_MASK(32, index, mask)
{
if (seenSetBitsCount == n)
{
return index;
}
++seenSetBitsCount;
}
FOR_EACH_INDEX_IN_MASK_END;
return -1;
}
#endif // NVDEC_1_0
//
// Size to use when declaring variable-sized arrays
//
@@ -907,6 +945,16 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
return uAddr.p;
}
// Get bit at pos (k) from x
#define NV_BIT_GET(k, x) (((x) >> (k)) & 1)
// Get bit at pos (n) from (hi) if >= 64, otherwise from (lo). This is paired with NV_BIT_SET_128 which sets the bit.
#define NV_BIT_GET_128(n, lo, hi) (((n) < 64) ? NV_BIT_GET((n), (lo)) : NV_BIT_GET((n) - 64, (hi)))
//
// Set the bit at pos (b) for U64 which is < 128. Since the (b) can be >= 64, we need 2 U64 to store this.
// Use (lo) if (b) is less than 64, and (hi) if >= 64.
//
#define NV_BIT_SET_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) |= NVBIT64(b); else (hi) |= NVBIT64( b & 0x3F ); }
#ifdef __cplusplus
}
#endif //__cplusplus

View File

@@ -24,11 +24,6 @@
#ifndef SDK_NVSTATUS_H
#define SDK_NVSTATUS_H
/* XAPIGEN - this file is not suitable for (nor needed by) xapigen. */
/* Rather than #ifdef out every such include in every sdk */
/* file, punt here. */
#if !defined(XAPIGEN) /* rest of file */
#ifdef __cplusplus
extern "C" {
#endif
@@ -125,6 +120,4 @@ const char *nvstatusToString(NV_STATUS nvStatusIn);
}
#endif
#endif // XAPIGEN
#endif /* SDK_NVSTATUS_H */

View File

@@ -24,11 +24,6 @@
#ifndef SDK_NVSTATUSCODES_H
#define SDK_NVSTATUSCODES_H
/* XAPIGEN - this file is not suitable for (nor needed by) xapigen. */
/* Rather than #ifdef out every such include in every sdk */
/* file, punt here. */
#if !defined(XAPIGEN) /* rest of file */
NV_STATUS_CODE(NV_OK, 0x00000000, "Success")
NV_STATUS_CODE(NV_ERR_GENERIC, 0x0000FFFF, "Failure: Generic Error")
@@ -153,6 +148,8 @@ NV_STATUS_CODE(NV_ERR_NVLINK_CLOCK_ERROR, 0x00000076, "Nvlink Clock
NV_STATUS_CODE(NV_ERR_NVLINK_TRAINING_ERROR, 0x00000077, "Nvlink Training Error")
NV_STATUS_CODE(NV_ERR_NVLINK_CONFIGURATION_ERROR, 0x00000078, "Nvlink Configuration Error")
NV_STATUS_CODE(NV_ERR_RISCV_ERROR, 0x00000079, "Generic RISC-V assert or halt")
NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT, 0x0000007A, "Fabric Manager is not loaded")
NV_STATUS_CODE(NV_ERR_ALREADY_SIGNALLED, 0x0000007B, "Semaphore Surface value already >= requested wait value")
// Warnings:
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
@@ -164,6 +161,4 @@ NV_STATUS_CODE(NV_WARN_NOTHING_TO_DO, 0x00010006, "WARNING Noth
NV_STATUS_CODE(NV_WARN_NULL_OBJECT, 0x00010007, "WARNING NULL object found")
NV_STATUS_CODE(NV_WARN_OUT_OF_RANGE, 0x00010008, "WARNING value out of range")
#endif // XAPIGEN
#endif /* SDK_NVSTATUSCODES_H */

View File

@@ -513,6 +513,12 @@ typedef struct
// place to re-locate these from nvos.h which cannot be included by a number
// of builds that need them
#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
#define NV_ATTRIBUTE_UNUSED __attribute__((__unused__))
#else
#define NV_ATTRIBUTE_UNUSED
#endif
#if defined(_MSC_VER)
#if _MSC_VER >= 1310
@@ -536,8 +542,6 @@ typedef struct
#define NV_FORCERESULTCHECK
#define NV_ATTRIBUTE_UNUSED
#define NV_FORMAT_PRINTF(_f, _a)
#else // ! defined(_MSC_VER)
@@ -635,12 +639,6 @@ typedef struct
#define NV_FORCERESULTCHECK
#endif
#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
#define NV_ATTRIBUTE_UNUSED __attribute__((__unused__))
#else
#define NV_ATTRIBUTE_UNUSED
#endif
/*
* Functions decorated with NV_FORMAT_PRINTF(f, a) have a format string at
* parameter number 'f' and variadic arguments start at parameter number 'a'.

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -143,6 +143,14 @@ void NV_API_CALL os_free_semaphore (void *);
NV_STATUS NV_API_CALL os_acquire_semaphore (void *);
NV_STATUS NV_API_CALL os_cond_acquire_semaphore (void *);
NV_STATUS NV_API_CALL os_release_semaphore (void *);
void* NV_API_CALL os_alloc_rwlock (void);
void NV_API_CALL os_free_rwlock (void *);
NV_STATUS NV_API_CALL os_acquire_rwlock_read (void *);
NV_STATUS NV_API_CALL os_acquire_rwlock_write (void *);
NV_STATUS NV_API_CALL os_cond_acquire_rwlock_read (void *);
NV_STATUS NV_API_CALL os_cond_acquire_rwlock_write(void *);
void NV_API_CALL os_release_rwlock_read (void *);
void NV_API_CALL os_release_rwlock_write (void *);
NvBool NV_API_CALL os_semaphore_may_sleep (void);
NV_STATUS NV_API_CALL os_get_version_info (os_version_info*);
NvBool NV_API_CALL os_is_isr (void);
@@ -154,10 +162,9 @@ NvBool NV_API_CALL os_is_vgx_hyper (void);
NV_STATUS NV_API_CALL os_inject_vgx_msi (NvU16, NvU64, NvU32);
NvBool NV_API_CALL os_is_grid_supported (void);
NvU32 NV_API_CALL os_get_grid_csp_support (void);
void NV_API_CALL os_get_screen_info (NvU64 *, NvU16 *, NvU16 *, NvU16 *, NvU16 *, NvU64, NvU64);
void NV_API_CALL os_bug_check (NvU32, const char *);
NV_STATUS NV_API_CALL os_lock_user_pages (void *, NvU64, void **, NvU32);
NV_STATUS NV_API_CALL os_lookup_user_io_memory (void *, NvU64, NvU64 **, void**);
NV_STATUS NV_API_CALL os_lookup_user_io_memory (void *, NvU64, NvU64 **);
NV_STATUS NV_API_CALL os_unlock_user_pages (NvU64, void *);
NV_STATUS NV_API_CALL os_match_mmap_offset (void *, NvU64, NvU64 *);
NV_STATUS NV_API_CALL os_get_euid (NvU32 *);
@@ -173,7 +180,6 @@ NV_STATUS NV_API_CALL os_put_page (NvU64 address);
NvU32 NV_API_CALL os_get_page_refcount (NvU64 address);
NvU32 NV_API_CALL os_count_tail_pages (NvU64 address);
void NV_API_CALL os_free_pages_phys (NvU64, NvU32);
NV_STATUS NV_API_CALL os_call_nv_vmbus (NvU32, void *);
NV_STATUS NV_API_CALL os_open_temporary_file (void **);
void NV_API_CALL os_close_file (void *);
NV_STATUS NV_API_CALL os_write_file (void *, NvU8 *, NvU64, NvU64);
@@ -181,7 +187,7 @@ NV_STATUS NV_API_CALL os_read_file (void *, NvU8 *, NvU64, NvU
NV_STATUS NV_API_CALL os_open_readonly_file (const char *, void **);
NV_STATUS NV_API_CALL os_open_and_read_file (const char *, NvU8 *, NvU64);
NvBool NV_API_CALL os_is_nvswitch_present (void);
void NV_API_CALL os_get_random_bytes (NvU8 *, NvU16);
NV_STATUS NV_API_CALL os_get_random_bytes (NvU8 *, NvU16);
NV_STATUS NV_API_CALL os_alloc_wait_queue (os_wait_queue **);
void NV_API_CALL os_free_wait_queue (os_wait_queue *);
void NV_API_CALL os_wait_uninterruptible (os_wait_queue *);
@@ -200,12 +206,19 @@ enum os_pci_req_atomics_type {
OS_INTF_PCIE_REQ_ATOMICS_128BIT
};
NV_STATUS NV_API_CALL os_enable_pci_req_atomics (void *, enum os_pci_req_atomics_type);
NV_STATUS NV_API_CALL os_get_numa_node_memory_usage (NvS32, NvU64 *, NvU64 *);
NV_STATUS NV_API_CALL os_numa_add_gpu_memory (void *, NvU64, NvU64, NvU32 *);
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory (void *, NvU64, NvU64, NvU32);
NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
void* NV_API_CALL os_get_pid_info(void);
void NV_API_CALL os_put_pid_info(void *pid_info);
NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid);
extern NvU32 os_page_size;
extern NvU64 os_page_mask;
extern NvU8 os_page_shift;
extern NvU32 os_sev_status;
extern NvBool os_sev_enabled;
extern NvBool os_cc_enabled;
extern NvBool os_cc_tdx_enabled;
extern NvBool os_dma_buf_enabled;
/*

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -56,7 +56,9 @@ NV_STATUS NV_API_CALL rm_gpu_ops_get_p2p_caps(nvidia_stack_t *, nvgpuDeviceHan
NV_STATUS NV_API_CALL rm_gpu_ops_memory_cpu_map(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, NvLength, void **, NvU32);
NV_STATUS NV_API_CALL rm_gpu_ops_memory_cpu_ummap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, void*);
NV_STATUS NV_API_CALL rm_gpu_ops_channel_allocate(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, const nvgpuChannelAllocParams_t *, nvgpuChannelHandle_t *, nvgpuChannelInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_tsg_allocate(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, const nvgpuTsgAllocParams_t *, nvgpuTsgHandle_t *);
NV_STATUS NV_API_CALL rm_gpu_ops_tsg_destroy(nvidia_stack_t *, nvgpuTsgHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_channel_allocate(nvidia_stack_t *, const nvgpuTsgHandle_t, const nvgpuChannelAllocParams_t *, nvgpuChannelHandle_t *, nvgpuChannelInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_channel_destroy(nvidia_stack_t *, nvgpuChannelHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_memory_free(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64);
NV_STATUS NV_API_CALL rm_gpu_ops_query_caps(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuCaps_t);
@@ -74,8 +76,9 @@ NV_STATUS NV_API_CALL rm_gpu_ops_own_page_fault_intr(nvidia_stack_t *, nvgpuDevi
NV_STATUS NV_API_CALL rm_gpu_ops_init_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, void *, NvU32 *);
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuDeviceHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool *);
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_own_access_cntr_intr(nvidia_stack_t *, nvgpuSessionHandle_t, nvgpuAccessCntrInfo_t, NvBool);
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, nvgpuAccessCntrConfig_t);
@@ -98,4 +101,14 @@ NV_STATUS NV_API_CALL rm_gpu_ops_paging_channels_map(nvidia_stack_t *, nvgpuAdd
void NV_API_CALL rm_gpu_ops_paging_channels_unmap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, nvgpuPagingChannelHandle_t, char *, NvU32);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *, NvU8 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt_with_iv(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8*, NvU8 *, NvU8 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU8 *, NvU8 const *, NvU32, NvU8 const *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_sign(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_query_message_pool(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_increment_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64, NvU8 *);
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -27,6 +27,7 @@
#include "nvidia-drm-helper.h"
#include "nvidia-drm-priv.h"
#include "nvidia-drm-connector.h"
#include "nvidia-drm-crtc.h"
#include "nvidia-drm-utils.h"
#include "nvidia-drm-encoder.h"
@@ -42,6 +43,7 @@
#include <drm/drm_atomic.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_edid.h>
static void nv_drm_connector_destroy(struct drm_connector *connector)
{
@@ -98,7 +100,11 @@ __nv_drm_detect_encoder(struct NvKmsKapiDynamicDisplayParams *pDetectParams,
break;
}
#if defined(NV_DRM_CONNECTOR_HAS_OVERRIDE_EDID)
if (connector->override_edid) {
#else
if (drm_edid_override_connector_update(connector) > 0) {
#endif
const struct drm_property_blob *edid = connector->edid_blob_ptr;
if (edid->length <= sizeof(pDetectParams->edid.buffer)) {
@@ -118,6 +124,11 @@ __nv_drm_detect_encoder(struct NvKmsKapiDynamicDisplayParams *pDetectParams,
return false;
}
#if defined(NV_DRM_CONNECTOR_HAS_VRR_CAPABLE_PROPERTY)
drm_connector_attach_vrr_capable_property(&nv_connector->base);
drm_connector_set_vrr_capable_property(&nv_connector->base, pDetectParams->vrrSupported ? true : false);
#endif
if (pDetectParams->connected) {
if (!pDetectParams->overrideEdid && pDetectParams->edid.bufferSize) {
@@ -197,6 +208,11 @@ done:
nv_drm_free(pDetectParams);
if (status == connector_status_disconnected &&
nv_connector->modeset_permission_filep) {
nv_drm_connector_revoke_permissions(dev, nv_connector);
}
return status;
}
@@ -298,7 +314,11 @@ static int nv_drm_connector_get_modes(struct drm_connector *connector)
}
static int nv_drm_connector_mode_valid(struct drm_connector *connector,
#if defined(NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG)
const struct drm_display_mode *mode)
#else
struct drm_display_mode *mode)
#endif
{
struct drm_device *dev = connector->dev;
struct nv_drm_device *nv_dev = to_nv_device(dev);
@@ -362,6 +382,8 @@ nv_drm_connector_new(struct drm_device *dev,
nv_connector->physicalIndex = physicalIndex;
nv_connector->type = type;
nv_connector->internal = internal;
nv_connector->modeset_permission_filep = NULL;
nv_connector->modeset_permission_crtc = NULL;
strcpy(nv_connector->dpAddress, dpAddress);
@@ -464,4 +486,26 @@ done:
return connector;
}
/*
* Revoke the permissions on this connector.
*/
bool nv_drm_connector_revoke_permissions(struct drm_device *dev,
struct nv_drm_connector* nv_connector)
{
struct nv_drm_device *nv_dev = to_nv_device(dev);
bool ret = true;
if (nv_connector->modeset_permission_crtc) {
if (nv_connector->nv_detected_encoder) {
ret = nvKms->revokePermissions(
nv_dev->pDevice, nv_connector->modeset_permission_crtc->head,
nv_connector->nv_detected_encoder->hDisplay);
}
nv_connector->modeset_permission_crtc->modeset_permission_filep = NULL;
nv_connector->modeset_permission_crtc = NULL;
}
nv_connector->modeset_permission_filep = NULL;
return ret;
}
#endif

View File

@@ -51,6 +51,20 @@ struct nv_drm_connector {
atomic_t connection_status_dirty;
/**
* @modeset_permission_filep:
*
* The filep using this connector with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
*/
struct drm_file *modeset_permission_filep;
/**
* @modeset_permission_crtc:
*
* The crtc using this connector with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
*/
struct nv_drm_crtc *modeset_permission_crtc;
struct drm_connector base;
};
@@ -84,6 +98,9 @@ nv_drm_get_connector(struct drm_device *dev,
NvBool internal,
char dpAddress[NVKMS_DP_ADDRESS_STRING_LENGTH]);
bool nv_drm_connector_revoke_permissions(struct drm_device *dev,
struct nv_drm_connector *nv_connector);
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#endif /* __NVIDIA_DRM_CONNECTOR_H__ */

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -44,6 +44,37 @@
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
#include <linux/nvhost.h>
#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)
#include <linux/host1x-next.h>
#endif
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
static int
nv_drm_atomic_replace_property_blob_from_id(struct drm_device *dev,
struct drm_property_blob **blob,
uint64_t blob_id,
ssize_t expected_size)
{
struct drm_property_blob *new_blob = NULL;
if (blob_id != 0) {
new_blob = drm_property_lookup_blob(dev, blob_id);
if (new_blob == NULL) {
return -EINVAL;
}
if ((expected_size > 0) &&
(new_blob->length != expected_size)) {
drm_property_blob_put(new_blob);
return -EINVAL;
}
}
drm_property_replace_blob(blob, new_blob);
drm_property_blob_put(new_blob);
return 0;
}
#endif
static void nv_drm_plane_destroy(struct drm_plane *plane)
@@ -84,9 +115,6 @@ cursor_plane_req_config_update(struct drm_plane *plane,
{
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
struct NvKmsKapiCursorRequestedConfig old_config = *req_config;
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
struct nv_drm_plane_state *nv_drm_plane_state =
to_nv_drm_plane_state(plane_state);
if (plane_state->fb == NULL) {
cursor_req_config_disable(req_config);
@@ -186,7 +214,6 @@ plane_req_config_update(struct drm_plane *plane,
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
struct nv_drm_plane_state *nv_drm_plane_state =
to_nv_drm_plane_state(plane_state);
int ret = 0;
if (plane_state->fb == NULL) {
plane_req_config_disable(req_config);
@@ -309,6 +336,9 @@ plane_req_config_update(struct drm_plane *plane,
nv_plane->defaultCompositionMode;
#endif
req_config->config.inputColorSpace =
nv_drm_plane_state->input_colorspace;
req_config->config.syncptParams.preSyncptSpecified = false;
req_config->config.syncptParams.postSyncptRequested = false;
@@ -320,10 +350,10 @@ plane_req_config_update(struct drm_plane *plane,
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
#if defined(NV_NVHOST_DMA_FENCE_UNPACK_PRESENT)
if (plane_state->fence != NULL) {
ret = nvhost_dma_fence_unpack(
plane_state->fence,
&req_config->config.syncptParams.preSyncptId,
&req_config->config.syncptParams.preSyncptValue);
int ret = nvhost_dma_fence_unpack(
plane_state->fence,
&req_config->config.syncptParams.preSyncptId,
&req_config->config.syncptParams.preSyncptValue);
if (ret != 0) {
return ret;
}
@@ -333,12 +363,81 @@ plane_req_config_update(struct drm_plane *plane,
if (nv_drm_plane_state->fd_user_ptr) {
req_config->config.syncptParams.postSyncptRequested = true;
}
#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)
if (plane_state->fence != NULL) {
int ret = host1x_fence_extract(
plane_state->fence,
&req_config->config.syncptParams.preSyncptId,
&req_config->config.syncptParams.preSyncptValue);
if (ret != 0) {
return ret;
}
req_config->config.syncptParams.preSyncptSpecified = true;
}
if (nv_drm_plane_state->fd_user_ptr) {
req_config->config.syncptParams.postSyncptRequested = true;
}
#else
return -1;
#endif
}
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
if (nv_drm_plane_state->hdr_output_metadata != NULL) {
struct hdr_output_metadata *hdr_metadata =
nv_drm_plane_state->hdr_output_metadata->data;
struct hdr_metadata_infoframe *info_frame =
&hdr_metadata->hdmi_metadata_type1;
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
uint32_t i;
if (hdr_metadata->metadata_type != HDMI_STATIC_METADATA_TYPE1) {
NV_DRM_DEV_LOG_ERR(nv_dev, "Unsupported Metadata Type");
return -1;
}
for (i = 0; i < ARRAY_SIZE(info_frame->display_primaries); i ++) {
req_config->config.hdrMetadata.displayPrimaries[i].x =
info_frame->display_primaries[i].x;
req_config->config.hdrMetadata.displayPrimaries[i].y =
info_frame->display_primaries[i].y;
}
req_config->config.hdrMetadata.whitePoint.x =
info_frame->white_point.x;
req_config->config.hdrMetadata.whitePoint.y =
info_frame->white_point.y;
req_config->config.hdrMetadata.maxDisplayMasteringLuminance =
info_frame->max_display_mastering_luminance;
req_config->config.hdrMetadata.minDisplayMasteringLuminance =
info_frame->min_display_mastering_luminance;
req_config->config.hdrMetadata.maxCLL =
info_frame->max_cll;
req_config->config.hdrMetadata.maxFALL =
info_frame->max_fall;
req_config->config.hdrMetadataSpecified = true;
switch (info_frame->eotf) {
case HDMI_EOTF_SMPTE_ST2084:
req_config->config.tf = NVKMS_OUTPUT_TF_PQ;
break;
case HDMI_EOTF_TRADITIONAL_GAMMA_SDR:
req_config->config.tf =
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR;
break;
default:
NV_DRM_DEV_LOG_ERR(nv_dev, "Unsupported EOTF");
return -1;
}
} else {
req_config->config.hdrMetadataSpecified = false;
req_config->config.tf = NVKMS_OUTPUT_TF_NONE;
}
#endif
/*
* Unconditionally mark the surface as changed, even if nothing changed,
* so that we always get a flip event: a DRM client may flip with
@@ -509,9 +608,21 @@ static int nv_drm_plane_atomic_set_property(
nv_drm_plane_state->fd_user_ptr = u64_to_user_ptr(val);
#endif
return 0;
} else {
return -EINVAL;
} else if (property == nv_dev->nv_input_colorspace_property) {
nv_drm_plane_state->input_colorspace = val;
return 0;
}
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
else if (property == nv_dev->nv_hdr_output_metadata_property) {
return nv_drm_atomic_replace_property_blob_from_id(
nv_dev->dev,
&nv_drm_plane_state->hdr_output_metadata,
val,
sizeof(struct hdr_output_metadata));
}
#endif
return -EINVAL;
}
static int nv_drm_plane_atomic_get_property(
@@ -521,12 +632,26 @@ static int nv_drm_plane_atomic_get_property(
uint64_t *val)
{
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
const struct nv_drm_plane_state *nv_drm_plane_state =
to_nv_drm_plane_state_const(state);
if (property == nv_dev->nv_out_fence_property) {
return 0;
} else {
return -EINVAL;
} else if (property == nv_dev->nv_input_colorspace_property) {
*val = nv_drm_plane_state->input_colorspace;
return 0;
}
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
else if (property == nv_dev->nv_hdr_output_metadata_property) {
const struct nv_drm_plane_state *nv_drm_plane_state =
to_nv_drm_plane_state_const(state);
*val = nv_drm_plane_state->hdr_output_metadata ?
nv_drm_plane_state->hdr_output_metadata->base.id : 0;
return 0;
}
#endif
return -EINVAL;
}
static struct drm_plane_state *
@@ -544,6 +669,14 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
__drm_atomic_helper_plane_duplicate_state(plane, &nv_plane_state->base);
nv_plane_state->fd_user_ptr = nv_old_plane_state->fd_user_ptr;
nv_plane_state->input_colorspace = nv_old_plane_state->input_colorspace;
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
nv_plane_state->hdr_output_metadata = nv_old_plane_state->hdr_output_metadata;
if (nv_plane_state->hdr_output_metadata) {
drm_property_blob_get(nv_plane_state->hdr_output_metadata);
}
#endif
return &nv_plane_state->base;
}
@@ -557,6 +690,12 @@ static inline void __nv_drm_plane_atomic_destroy_state(
#else
__drm_atomic_helper_plane_destroy_state(state);
#endif
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
struct nv_drm_plane_state *nv_drm_plane_state =
to_nv_drm_plane_state(state);
drm_property_blob_put(nv_drm_plane_state->hdr_output_metadata);
#endif
}
static void nv_drm_plane_atomic_destroy_state(
@@ -803,7 +942,8 @@ static const struct drm_crtc_helper_funcs nv_crtc_helper_funcs = {
};
static void nv_drm_plane_install_properties(
struct drm_plane *plane)
struct drm_plane *plane,
NvBool supportsHDR)
{
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
@@ -811,6 +951,19 @@ static void nv_drm_plane_install_properties(
drm_object_attach_property(
&plane->base, nv_dev->nv_out_fence_property, 0);
}
if (nv_dev->nv_input_colorspace_property) {
drm_object_attach_property(
&plane->base, nv_dev->nv_input_colorspace_property,
NVKMS_INPUT_COLORSPACE_NONE);
}
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
if (supportsHDR && nv_dev->nv_hdr_output_metadata_property) {
drm_object_attach_property(
&plane->base, nv_dev->nv_hdr_output_metadata_property, 0);
}
#endif
}
static void
@@ -990,7 +1143,9 @@ nv_drm_plane_create(struct drm_device *dev,
drm_plane_helper_add(plane, &nv_plane_helper_funcs);
if (plane_type != DRM_PLANE_TYPE_CURSOR) {
nv_drm_plane_install_properties(plane);
nv_drm_plane_install_properties(
plane,
pResInfo->supportsHDR[layer_idx]);
}
__nv_drm_plane_create_alpha_blending_properties(
@@ -1043,6 +1198,7 @@ static struct drm_crtc *__nv_drm_crtc_create(struct nv_drm_device *nv_dev,
nv_crtc->head = head;
INIT_LIST_HEAD(&nv_crtc->flip_list);
spin_lock_init(&nv_crtc->flip_list_lock);
nv_crtc->modeset_permission_filep = NULL;
ret = drm_crtc_init_with_planes(nv_dev->dev,
&nv_crtc->base,
@@ -1141,11 +1297,13 @@ void nv_drm_enumerate_crtcs_and_planes(
}
for (layer = 0; layer < pResInfo->numLayers[i]; layer++) {
struct drm_plane *overlay_plane = NULL;
if (layer == NVKMS_KAPI_LAYER_PRIMARY_IDX) {
continue;
}
struct drm_plane *overlay_plane =
overlay_plane =
nv_drm_plane_create(nv_dev->dev,
DRM_PLANE_TYPE_OVERLAY,
layer,
@@ -1189,7 +1347,7 @@ int nv_drm_get_crtc_crc32_v2_ioctl(struct drm_device *dev,
return -ENOENT;
}
crtc = nv_drm_crtc_find(dev, params->crtc_id);
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
if (!crtc) {
return -ENOENT;
}
@@ -1217,7 +1375,7 @@ int nv_drm_get_crtc_crc32_ioctl(struct drm_device *dev,
return -ENOENT;
}
crtc = nv_drm_crtc_find(dev, params->crtc_id);
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
if (!crtc) {
return -ENOENT;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -35,38 +35,9 @@
#include <drm/drm_crtc.h>
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE) || defined(NV_DRM_ROTATION_AVAILABLE)
/* For DRM_ROTATE_* , DRM_REFLECT_* */
#include <drm/drm_blend.h>
#endif
#if defined(NV_DRM_ROTATION_AVAILABLE)
/* For DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* */
#include <uapi/drm/drm_mode.h>
#endif
#include "nvtypes.h"
#include "nvkms-kapi.h"
#if defined(NV_DRM_ROTATION_AVAILABLE)
/*
* 19-05-2017 c2c446ad29437bb92b157423c632286608ebd3ec has added
* DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* to UAPI and removed
* DRM_ROTATE_* and DRM_MODE_REFLECT_*
*/
#if !defined(DRM_MODE_ROTATE_0)
#define DRM_MODE_ROTATE_0 DRM_ROTATE_0
#define DRM_MODE_ROTATE_90 DRM_ROTATE_90
#define DRM_MODE_ROTATE_180 DRM_ROTATE_180
#define DRM_MODE_ROTATE_270 DRM_ROTATE_270
#define DRM_MODE_REFLECT_X DRM_REFLECT_X
#define DRM_MODE_REFLECT_Y DRM_REFLECT_Y
#define DRM_MODE_ROTATE_MASK DRM_ROTATE_MASK
#define DRM_MODE_REFLECT_MASK DRM_REFLECT_MASK
#endif
#endif //NV_DRM_ROTATION_AVAILABLE
struct nv_drm_crtc {
NvU32 head;
@@ -85,6 +56,13 @@ struct nv_drm_crtc {
*/
spinlock_t flip_list_lock;
/**
* @modeset_permission_filep:
*
* The filep using this crtc with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
*/
struct drm_file *modeset_permission_filep;
struct drm_crtc base;
};
@@ -205,6 +183,10 @@ static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
struct nv_drm_plane_state {
struct drm_plane_state base;
s32 __user *fd_user_ptr;
enum NvKmsInputColorSpace input_colorspace;
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
struct drm_property_blob *hdr_output_metadata;
#endif
};
static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_state *state)
@@ -212,6 +194,11 @@ static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_
return container_of(state, struct nv_drm_plane_state, base);
}
static inline const struct nv_drm_plane_state *to_nv_drm_plane_state_const(const struct drm_plane_state *state)
{
return container_of(state, const struct nv_drm_plane_state, base);
}
static inline struct nv_drm_crtc *to_nv_crtc(struct drm_crtc *crtc)
{
if (crtc == NULL) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -30,7 +30,7 @@
#include "nvidia-drm-connector.h"
#include "nvidia-drm-gem.h"
#include "nvidia-drm-crtc.h"
#include "nvidia-drm-prime-fence.h"
#include "nvidia-drm-fence.h"
#include "nvidia-drm-helper.h"
#include "nvidia-drm-gem-nvkms-memory.h"
#include "nvidia-drm-gem-user-memory.h"
@@ -86,8 +86,26 @@
static struct nv_drm_device *dev_list = NULL;
static const char* nv_get_input_colorspace_name(
enum NvKmsInputColorSpace colorSpace)
{
switch (colorSpace) {
case NVKMS_INPUT_COLORSPACE_NONE:
return "None";
case NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR:
return "IEC 61966-2-2 linear FP";
case NVKMS_INPUT_COLORSPACE_BT2100_PQ:
return "ITU-R BT.2100-PQ YCbCr";
default:
/* We shoudn't hit this */
WARN_ON("Unsupported input colorspace");
return "None";
}
};
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_OUTPUT_POLL_CHANGED_PRESENT)
static void nv_drm_output_poll_changed(struct drm_device *dev)
{
struct drm_connector *connector = NULL;
@@ -131,15 +149,19 @@ static void nv_drm_output_poll_changed(struct drm_device *dev)
nv_drm_connector_list_iter_end(&conn_iter);
#endif
}
#endif /* NV_DRM_OUTPUT_POLL_CHANGED_PRESENT */
static struct drm_framebuffer *nv_drm_framebuffer_create(
struct drm_device *dev,
struct drm_file *file,
#if defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG)
#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO)
const struct drm_format_info *info,
#endif
#if defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG)
const struct drm_mode_fb_cmd2 *cmd
#else
#else
struct drm_mode_fb_cmd2 *cmd
#endif
#endif
)
{
struct drm_mode_fb_cmd2 local_cmd;
@@ -150,11 +172,14 @@ static struct drm_framebuffer *nv_drm_framebuffer_create(
fb = nv_drm_internal_framebuffer_create(
dev,
file,
#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO)
info,
#endif
&local_cmd);
#if !defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG)
#if !defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG)
*cmd = local_cmd;
#endif
#endif
return fb;
}
@@ -168,7 +193,9 @@ static const struct drm_mode_config_funcs nv_mode_config_funcs = {
.atomic_check = nv_drm_atomic_check,
.atomic_commit = nv_drm_atomic_commit,
#if defined(NV_DRM_OUTPUT_POLL_CHANGED_PRESENT)
.output_poll_changed = nv_drm_output_poll_changed,
#endif
};
static void nv_drm_event_callback(const struct NvKmsKapiEvent *event)
@@ -240,10 +267,6 @@ nv_drm_init_mode_config(struct nv_drm_device *nv_dev,
dev->mode_config.preferred_depth = 24;
dev->mode_config.prefer_shadow = 1;
/* Currently unused. Update when needed. */
dev->mode_config.fb_base = 0;
#if defined(NV_DRM_CRTC_STATE_HAS_ASYNC_FLIP) || \
defined(NV_DRM_CRTC_STATE_HAS_PAGEFLIP_FLAGS)
dev->mode_config.async_page_flip = true;
@@ -332,6 +355,15 @@ static void nv_drm_enumerate_encoders_and_connectors
*/
static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
{
struct drm_prop_enum_list enum_list[3] = { };
int i, len = 0;
for (i = 0; i < 3; i++) {
enum_list[len].type = i;
enum_list[len].name = nv_get_input_colorspace_name(i);
len++;
}
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
if (!nv_dev->supportsSyncpts) {
return 0;
@@ -345,6 +377,23 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
}
#endif
nv_dev->nv_input_colorspace_property =
drm_property_create_enum(nv_dev->dev, 0, "NV_INPUT_COLORSPACE",
enum_list, len);
if (nv_dev->nv_input_colorspace_property == NULL) {
NV_DRM_LOG_ERR("Failed to create NV_INPUT_COLORSPACE property");
return -ENOMEM;
}
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
nv_dev->nv_hdr_output_metadata_property =
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
"NV_HDR_STATIC_METADATA", 0);
if (nv_dev->nv_hdr_output_metadata_property == NULL) {
return -ENOMEM;
}
#endif
return 0;
}
@@ -667,6 +716,16 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
return 0;
}
static int nv_drm_dmabuf_supported_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep)
{
/* check the pDevice since this only gets set if modeset = 1
* which is a requirement for the dma_buf extension to work
*/
struct nv_drm_device *nv_dev = to_nv_device(dev);
return nv_dev->pDevice ? 0 : -EINVAL;
}
static
int nv_drm_get_client_capability_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep)
@@ -696,6 +755,455 @@ int nv_drm_get_client_capability_ioctl(struct drm_device *dev,
return 0;
}
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
static bool nv_drm_connector_is_dpy_id(struct drm_connector *connector,
NvU32 dpyId)
{
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
return nv_connector->nv_detected_encoder &&
nv_connector->nv_detected_encoder->hDisplay == dpyId;
}
static int nv_drm_get_dpy_id_for_connector_id_ioctl(struct drm_device *dev,
void *data,
struct drm_file *filep)
{
struct drm_nvidia_get_dpy_id_for_connector_id_params *params = data;
// Importantly, drm_connector_lookup (with filep) will only return the
// connector if we are master, a lessee with the connector, or not master at
// all. It will return NULL if we are a lessee with other connectors.
struct drm_connector *connector =
nv_drm_connector_lookup(dev, filep, params->connectorId);
struct nv_drm_connector *nv_connector;
int ret = 0;
if (!connector) {
return -EINVAL;
}
nv_connector = to_nv_connector(connector);
if (!nv_connector) {
ret = -EINVAL;
goto done;
}
if (!nv_connector->nv_detected_encoder) {
ret = -EINVAL;
goto done;
}
params->dpyId = nv_connector->nv_detected_encoder->hDisplay;
done:
nv_drm_connector_put(connector);
return ret;
}
static int nv_drm_get_connector_id_for_dpy_id_ioctl(struct drm_device *dev,
void *data,
struct drm_file *filep)
{
struct drm_nvidia_get_connector_id_for_dpy_id_params *params = data;
struct drm_connector *connector;
int ret = -EINVAL;
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
struct drm_connector_list_iter conn_iter;
nv_drm_connector_list_iter_begin(dev, &conn_iter);
#endif
/* Lookup for existing connector with same dpyId */
nv_drm_for_each_connector(connector, &conn_iter, dev) {
if (nv_drm_connector_is_dpy_id(connector, params->dpyId)) {
params->connectorId = connector->base.id;
ret = 0;
break;
}
}
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_end(&conn_iter);
#endif
return ret;
}
static NvU32 nv_drm_get_head_bit_from_connector(struct drm_connector *connector)
{
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
if (connector->state && connector->state->crtc) {
struct nv_drm_crtc *nv_crtc = to_nv_crtc(connector->state->crtc);
return NVBIT(nv_crtc->head);
} else if (nv_connector->nv_detected_encoder &&
nv_connector->nv_detected_encoder->base.crtc) {
struct nv_drm_crtc *nv_crtc =
to_nv_crtc(nv_connector->nv_detected_encoder->base.crtc);
return NVBIT(nv_crtc->head);
}
return 0;
}
static int nv_drm_grant_permission_ioctl(struct drm_device *dev, void *data,
struct drm_file *filep)
{
struct drm_nvidia_grant_permissions_params *params = data;
struct nv_drm_device *nv_dev = to_nv_device(dev);
struct nv_drm_connector *target_nv_connector = NULL;
struct nv_drm_crtc *target_nv_crtc = NULL;
struct drm_connector *connector, *target_connector = NULL;
struct drm_crtc *crtc;
NvU32 head = 0, freeHeadBits, targetHeadBit, possible_crtcs;
int ret = 0;
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
struct drm_connector_list_iter conn_iter;
#endif
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
struct drm_modeset_acquire_ctx ctx;
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
ret);
#else
mutex_lock(&dev->mode_config.mutex);
#endif
/* Get the connector for the dpyId. */
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_begin(dev, &conn_iter);
#endif
nv_drm_for_each_connector(connector, &conn_iter, dev) {
if (nv_drm_connector_is_dpy_id(connector, params->dpyId)) {
target_connector =
nv_drm_connector_lookup(dev, filep, connector->base.id);
break;
}
}
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_end(&conn_iter);
#endif
// Importantly, drm_connector_lookup/drm_crtc_find (with filep) will only
// return the object if we are master, a lessee with the object, or not
// master at all. It will return NULL if we are a lessee with other objects.
if (!target_connector) {
ret = -EINVAL;
goto done;
}
target_nv_connector = to_nv_connector(target_connector);
possible_crtcs =
target_nv_connector->nv_detected_encoder->base.possible_crtcs;
/* Target connector must not be previously granted. */
if (target_nv_connector->modeset_permission_filep) {
ret = -EINVAL;
goto done;
}
/* Add all heads that are owned and not already granted. */
freeHeadBits = 0;
nv_drm_for_each_crtc(crtc, dev) {
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
if (nv_drm_crtc_find(dev, filep, crtc->base.id) &&
!nv_crtc->modeset_permission_filep &&
(drm_crtc_mask(crtc) & possible_crtcs)) {
freeHeadBits |= NVBIT(nv_crtc->head);
}
}
targetHeadBit = nv_drm_get_head_bit_from_connector(target_connector);
if (targetHeadBit & freeHeadBits) {
/* If a crtc is already being used by this connector, use it. */
freeHeadBits = targetHeadBit;
} else {
/* Otherwise, remove heads that are in use by other connectors. */
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_begin(dev, &conn_iter);
#endif
nv_drm_for_each_connector(connector, &conn_iter, dev) {
freeHeadBits &= ~nv_drm_get_head_bit_from_connector(connector);
}
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_end(&conn_iter);
#endif
}
/* Fail if no heads are available. */
if (!freeHeadBits) {
ret = -EINVAL;
goto done;
}
/*
* Loop through the crtc again and find a matching head.
* Record the filep that is using the crtc and the connector.
*/
nv_drm_for_each_crtc(crtc, dev) {
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
if (freeHeadBits & NVBIT(nv_crtc->head)) {
target_nv_crtc = nv_crtc;
head = nv_crtc->head;
break;
}
}
if (!nvKms->grantPermissions(params->fd, nv_dev->pDevice, head,
params->dpyId)) {
ret = -EINVAL;
goto done;
}
target_nv_connector->modeset_permission_crtc = target_nv_crtc;
target_nv_connector->modeset_permission_filep = filep;
target_nv_crtc->modeset_permission_filep = filep;
done:
if (target_connector) {
nv_drm_connector_put(target_connector);
}
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
#else
mutex_unlock(&dev->mode_config.mutex);
#endif
return ret;
}
static bool nv_drm_revoke_connector(struct nv_drm_device *nv_dev,
struct nv_drm_connector *nv_connector)
{
bool ret = true;
if (nv_connector->modeset_permission_crtc) {
if (nv_connector->nv_detected_encoder) {
ret = nvKms->revokePermissions(
nv_dev->pDevice, nv_connector->modeset_permission_crtc->head,
nv_connector->nv_detected_encoder->hDisplay);
}
nv_connector->modeset_permission_crtc->modeset_permission_filep = NULL;
nv_connector->modeset_permission_crtc = NULL;
}
nv_connector->modeset_permission_filep = NULL;
return ret;
}
static int nv_drm_revoke_permission(struct drm_device *dev,
struct drm_file *filep, NvU32 dpyId)
{
struct drm_connector *connector;
struct drm_crtc *crtc;
int ret = 0;
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
struct drm_connector_list_iter conn_iter;
#endif
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
struct drm_modeset_acquire_ctx ctx;
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
ret);
#else
mutex_lock(&dev->mode_config.mutex);
#endif
/*
* If dpyId is set, only revoke those specific resources. Otherwise,
* it is from closing the file so revoke all resources for that filep.
*/
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_begin(dev, &conn_iter);
#endif
nv_drm_for_each_connector(connector, &conn_iter, dev) {
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
if (nv_connector->modeset_permission_filep == filep &&
(!dpyId || nv_drm_connector_is_dpy_id(connector, dpyId))) {
if (!nv_drm_connector_revoke_permissions(dev, nv_connector)) {
ret = -EINVAL;
// Continue trying to revoke as much as possible.
}
}
}
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_end(&conn_iter);
#endif
nv_drm_for_each_crtc(crtc, dev) {
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
if (nv_crtc->modeset_permission_filep == filep && !dpyId) {
nv_crtc->modeset_permission_filep = NULL;
}
}
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
#else
mutex_unlock(&dev->mode_config.mutex);
#endif
return ret;
}
static int nv_drm_revoke_permission_ioctl(struct drm_device *dev, void *data,
struct drm_file *filep)
{
struct drm_nvidia_revoke_permissions_params *params = data;
if (!params->dpyId) {
return -EINVAL;
}
return nv_drm_revoke_permission(dev, filep, params->dpyId);
}
static void nv_drm_postclose(struct drm_device *dev, struct drm_file *filep)
{
/*
* Some systems like android can reach here without initializing the
* device, so check for that.
*/
if (dev->mode_config.num_crtc > 0 &&
dev->mode_config.crtc_list.next != NULL &&
dev->mode_config.crtc_list.prev != NULL &&
dev->mode_config.num_connector > 0 &&
dev->mode_config.connector_list.next != NULL &&
dev->mode_config.connector_list.prev != NULL) {
nv_drm_revoke_permission(dev, filep, 0);
}
}
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#if defined(NV_DRM_MASTER_HAS_LEASES)
static struct drm_master *nv_drm_find_lessee(struct drm_master *master,
int lessee_id)
{
int object;
void *entry;
while (master->lessor != NULL) {
master = master->lessor;
}
idr_for_each_entry(&master->lessee_idr, entry, object)
{
if (object == lessee_id) {
return entry;
}
}
return NULL;
}
static void nv_drm_get_revoked_objects(struct drm_device *dev,
struct drm_file *filep, unsigned int cmd,
unsigned long arg, int **objects,
int *objects_count)
{
unsigned int ioc_size;
struct drm_mode_revoke_lease revoke_lease;
struct drm_master *lessor, *lessee;
void *entry;
int *objs;
int obj, obj_count, obj_i;
ioc_size = _IOC_SIZE(cmd);
if (ioc_size > sizeof(revoke_lease)) {
return;
}
if (copy_from_user(&revoke_lease, (void __user *)arg, ioc_size) != 0) {
return;
}
lessor = nv_drm_file_get_master(filep);
if (lessor == NULL) {
return;
}
mutex_lock(&dev->mode_config.idr_mutex);
lessee = nv_drm_find_lessee(lessor, revoke_lease.lessee_id);
if (lessee == NULL) {
goto done;
}
obj_count = 0;
idr_for_each_entry(&lessee->leases, entry, obj) {
++obj_count;
}
if (obj_count == 0) {
goto done;
}
objs = nv_drm_calloc(obj_count, sizeof(int));
if (objs == NULL) {
goto done;
}
obj_i = 0;
idr_for_each_entry(&lessee->leases, entry, obj) {
objs[obj_i++] = obj;
}
*objects = objs;
*objects_count = obj_count;
done:
mutex_unlock(&dev->mode_config.idr_mutex);
drm_master_put(&lessor);
}
static bool nv_drm_is_in_objects(int object, int *objects, int objects_count)
{
int i;
for (i = 0; i < objects_count; ++i) {
if (objects[i] == object) {
return true;
}
}
return false;
}
static void nv_drm_finish_revoking_objects(struct drm_device *dev,
struct drm_file *filep, int *objects,
int objects_count)
{
struct drm_connector *connector;
struct drm_crtc *crtc;
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
struct drm_connector_list_iter conn_iter;
#endif
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
int ret = 0;
struct drm_modeset_acquire_ctx ctx;
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
ret);
#else
mutex_lock(&dev->mode_config.mutex);
#endif
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_begin(dev, &conn_iter);
#endif
nv_drm_for_each_connector(connector, &conn_iter, dev) {
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
if (nv_connector->modeset_permission_filep &&
nv_drm_is_in_objects(connector->base.id, objects, objects_count)) {
nv_drm_connector_revoke_permissions(dev, nv_connector);
}
}
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_end(&conn_iter);
#endif
nv_drm_for_each_crtc(crtc, dev) {
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
if (nv_crtc->modeset_permission_filep &&
nv_drm_is_in_objects(crtc->base.id, objects, objects_count)) {
nv_crtc->modeset_permission_filep = NULL;
}
}
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
#else
mutex_unlock(&dev->mode_config.mutex);
#endif
}
#endif /* NV_DRM_MASTER_HAS_LEASES */
#if defined(NV_DRM_BUS_PRESENT)
#if defined(NV_DRM_BUS_HAS_GET_IRQ)
@@ -727,12 +1235,50 @@ static struct drm_bus nv_drm_bus = {
#endif /* NV_DRM_BUS_PRESENT */
/*
* Wrapper around drm_ioctl to hook in to upstream ioctl.
*
* Currently used to add additional handling to REVOKE_LEASE.
*/
static long nv_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
long retcode;
#if defined(NV_DRM_MASTER_HAS_LEASES)
struct drm_file *file_priv = filp->private_data;
struct drm_device *dev = file_priv->minor->dev;
int *objects = NULL;
int objects_count = 0;
if (cmd == DRM_IOCTL_MODE_REVOKE_LEASE) {
// Save the revoked objects before revoking.
nv_drm_get_revoked_objects(dev, file_priv, cmd, arg, &objects,
&objects_count);
}
#endif
retcode = drm_ioctl(filp, cmd, arg);
#if defined(NV_DRM_MASTER_HAS_LEASES)
if (cmd == DRM_IOCTL_MODE_REVOKE_LEASE && objects) {
if (retcode == 0) {
// If revoking was successful, finish revoking the objects.
nv_drm_finish_revoking_objects(dev, file_priv, objects,
objects_count);
}
nv_drm_free(objects);
}
#endif
return retcode;
}
static const struct file_operations nv_drm_fops = {
.owner = THIS_MODULE,
.open = drm_open,
.release = drm_release,
.unlocked_ioctl = drm_ioctl,
.unlocked_ioctl = nv_drm_ioctl,
#if defined(CONFIG_COMPAT)
.compat_ioctl = drm_compat_ioctl,
#endif
@@ -745,6 +1291,10 @@ static const struct file_operations nv_drm_fops = {
.read = drm_read,
.llseek = noop_llseek,
#if defined(NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT)
.fop_flags = FOP_UNSIGNED_OFFSET,
#endif
};
static const struct drm_ioctl_desc nv_drm_ioctls[] = {
@@ -768,17 +1318,29 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_SUPPORTED,
nv_drm_fence_supported_ioctl,
DRM_RENDER_ALLOW|DRM_UNLOCKED),
DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_CONTEXT_CREATE,
nv_drm_fence_context_create_ioctl,
DRM_IOCTL_DEF_DRV(NVIDIA_PRIME_FENCE_CONTEXT_CREATE,
nv_drm_prime_fence_context_create_ioctl,
DRM_RENDER_ALLOW|DRM_UNLOCKED),
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_FENCE_ATTACH,
nv_drm_gem_fence_attach_ioctl,
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_PRIME_FENCE_ATTACH,
nv_drm_gem_prime_fence_attach_ioctl,
DRM_RENDER_ALLOW|DRM_UNLOCKED),
#endif
/*
* DRM_UNLOCKED is implicit for all non-legacy DRM driver IOCTLs since Linux
* v4.10 commit fa5386459f06 "drm: Used DRM_LEGACY for all legacy functions"
* (Linux v4.4 commit ea487835e887 "drm: Enforce unlocked ioctl operation
* for kms driver ioctls" previously did it only for drivers that set the
* DRM_MODESET flag), so this will race with SET_CLIENT_CAP. Linux v4.11
* commit dcf727ab5d17 "drm: setclientcap doesn't need the drm BKL" also
* removed locking from SET_CLIENT_CAP so there is no use attempting to lock
* manually. The latter commit acknowledges that this can expose userspace
* to inconsistent behavior when racing with itself, but accepts that risk.
*/
DRM_IOCTL_DEF_DRV(NVIDIA_GET_CLIENT_CAPABILITY,
nv_drm_get_client_capability_ioctl,
0),
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
DRM_IOCTL_DEF_DRV(NVIDIA_GET_CRTC_CRC32,
nv_drm_get_crtc_crc32_ioctl,
@@ -798,6 +1360,21 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_IDENTIFY_OBJECT,
nv_drm_gem_identify_object_ioctl,
DRM_RENDER_ALLOW|DRM_UNLOCKED),
DRM_IOCTL_DEF_DRV(NVIDIA_DMABUF_SUPPORTED,
nv_drm_dmabuf_supported_ioctl,
DRM_RENDER_ALLOW|DRM_UNLOCKED),
DRM_IOCTL_DEF_DRV(NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID,
nv_drm_get_dpy_id_for_connector_id_ioctl,
DRM_RENDER_ALLOW|DRM_UNLOCKED),
DRM_IOCTL_DEF_DRV(NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID,
nv_drm_get_connector_id_for_dpy_id_ioctl,
DRM_RENDER_ALLOW|DRM_UNLOCKED),
DRM_IOCTL_DEF_DRV(NVIDIA_GRANT_PERMISSIONS,
nv_drm_grant_permission_ioctl,
DRM_UNLOCKED|DRM_MASTER),
DRM_IOCTL_DEF_DRV(NVIDIA_REVOKE_PERMISSIONS,
nv_drm_revoke_permission_ioctl,
DRM_UNLOCKED|DRM_MASTER),
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
};
@@ -816,8 +1393,23 @@ static struct drm_driver nv_drm_driver = {
.ioctls = nv_drm_ioctls,
.num_ioctls = ARRAY_SIZE(nv_drm_ioctls),
/*
* linux-next commit 71a7974ac701 ("drm/prime: Unexport helpers for fd/handle
* conversion") unexports drm_gem_prime_handle_to_fd() and
* drm_gem_prime_fd_to_handle().
*
* Prior linux-next commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for
* all drivers") made these helpers the default when .prime_handle_to_fd /
* .prime_fd_to_handle are unspecified, so it's fine to just skip specifying
* them if the helpers aren't present.
*/
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_handle_to_fd
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
#endif
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_fd_to_handle
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
#endif
.gem_prime_import = nv_drm_gem_prime_import,
.gem_prime_import_sg_table = nv_drm_gem_prime_import_sg_table,
@@ -840,6 +1432,9 @@ static struct drm_driver nv_drm_driver = {
.load = nv_drm_load,
.unload = nv_drm_unload,
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
.postclose = nv_drm_postclose,
#endif
.fops = &nv_drm_fops,
@@ -850,7 +1445,10 @@ static struct drm_driver nv_drm_driver = {
.name = "nvidia-drm",
.desc = "NVIDIA DRM driver",
#if defined(NV_DRM_DRIVER_HAS_DATE)
.date = "20160202",
#endif
#if defined(NV_DRM_DRIVER_HAS_DEVICE_LIST)
.device_list = LIST_HEAD_INIT(nv_drm_driver.device_list),
@@ -882,7 +1480,9 @@ static void nv_drm_update_drm_driver_features(void)
nv_drm_driver.dumb_create = nv_drm_dumb_create;
nv_drm_driver.dumb_map_offset = nv_drm_dumb_map_offset;
#if defined(NV_DRM_DRIVER_HAS_DUMB_DESTROY)
nv_drm_driver.dumb_destroy = nv_drm_dumb_destroy;
#endif /* NV_DRM_DRIVER_HAS_DUMB_DESTROY */
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
}

View File

@@ -205,7 +205,7 @@ nv_drm_add_encoder(struct drm_device *dev, NvKmsKapiDisplay hDisplay)
encoder = nv_drm_encoder_new(dev,
displayInfo->handle,
connectorInfo->signalFormat,
get_crtc_mask(dev, connectorInfo->headMask));
get_crtc_mask(dev, displayInfo->headMask));
if (IS_ERR(encoder)) {
ret = PTR_ERR(encoder);

View File

@@ -150,6 +150,14 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
for (i = 0; i < ARRAY_SIZE(nv_fb->nv_gem); i++) {
if (nv_fb->nv_gem[i] != NULL) {
if (!nvKms->isMemoryValidForDisplay(nv_dev->pDevice,
nv_fb->nv_gem[i]->pMemory)) {
NV_DRM_DEV_LOG_INFO(
nv_dev,
"Framebuffer memory not appropriate for scanout");
goto fail;
}
params.planes[i].memory = nv_fb->nv_gem[i]->pMemory;
params.planes[i].offset = nv_fb->base.offsets[i];
params.planes[i].pitch = nv_fb->base.pitches[i];
@@ -164,6 +172,17 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
params.layout = (modifier & 0x10) ?
NvKmsSurfaceMemoryLayoutBlockLinear :
NvKmsSurfaceMemoryLayoutPitch;
// See definition of DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D, we are testing
// 'c', the lossless compression field of the modifier
if (params.layout == NvKmsSurfaceMemoryLayoutBlockLinear &&
(modifier >> 23) & 0x7) {
NV_DRM_DEV_LOG_ERR(
nv_dev,
"Cannot create FB from compressible surface allocation");
goto fail;
}
params.log2GobsPerBlockY = modifier & 0xf;
} else {
params.explicit_layout = false;
@@ -174,16 +193,22 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
nv_fb->pSurface = nvKms->createSurface(nv_dev->pDevice, &params);
if (nv_fb->pSurface == NULL) {
NV_DRM_DEV_DEBUG_DRIVER(nv_dev, "Failed to create NvKmsKapiSurface");
drm_framebuffer_cleanup(&nv_fb->base);
return -EINVAL;
goto fail;
}
return 0;
fail:
drm_framebuffer_cleanup(&nv_fb->base);
return -EINVAL;
}
struct drm_framebuffer *nv_drm_internal_framebuffer_create(
struct drm_device *dev,
struct drm_file *file,
#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO)
const struct drm_format_info *info,
#endif
struct drm_mode_fb_cmd2 *cmd)
{
struct nv_drm_device *nv_dev = to_nv_device(dev);
@@ -237,6 +262,9 @@ struct drm_framebuffer *nv_drm_internal_framebuffer_create(
dev,
#endif
&nv_fb->base,
#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO)
info,
#endif
cmd);
/*

View File

@@ -59,6 +59,9 @@ static inline struct nv_drm_framebuffer *to_nv_framebuffer(
struct drm_framebuffer *nv_drm_internal_framebuffer_create(
struct drm_device *dev,
struct drm_file *file,
#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO)
const struct drm_format_info *info,
#endif
struct drm_mode_fb_cmd2 *cmd);
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */

View File

@@ -31,17 +31,28 @@
#include "nvidia-drm-priv.h"
#include "nvidia-drm-ioctl.h"
#include "nvidia-drm-gem.h"
#include "nvidia-drm-prime-fence.h"
#include "nvidia-drm-fence.h"
#include "nvidia-dma-resv-helper.h"
#if defined(NV_DRM_FENCE_AVAILABLE)
#include "nvidia-dma-fence-helper.h"
struct nv_drm_fence_context {
struct nv_drm_device *nv_dev;
struct nv_drm_fence_context;
struct nv_drm_fence_context_ops {
void (*destroy)(struct nv_drm_fence_context *nv_fence_context);
};
struct nv_drm_fence_context {
const struct nv_drm_fence_context_ops *ops;
struct nv_drm_device *nv_dev;
uint32_t context;
};
struct nv_drm_prime_fence_context {
struct nv_drm_fence_context base;
NvU64 fenceSemIndex; /* Index into semaphore surface */
@@ -53,10 +64,10 @@ struct nv_drm_fence_context {
spinlock_t lock;
/*
* Software signaling structures. __nv_drm_fence_context_new()
* allocates channel event and __nv_drm_fence_context_destroy() frees it.
* There are no simultaneous read/write access to 'cb', therefore it does
* not require spin-lock protection.
* Software signaling structures. __nv_drm_prime_fence_context_new()
* allocates channel event and __nv_drm_prime_fence_context_destroy() frees
* it. There are no simultaneous read/write access to 'cb', therefore it
* does not require spin-lock protection.
*/
struct NvKmsKapiChannelEvent *cb;
@@ -79,7 +90,7 @@ struct nv_drm_prime_fence *to_nv_drm_prime_fence(nv_dma_fence_t *fence)
}
static const char*
nv_drm_gem_prime_fence_op_get_driver_name(nv_dma_fence_t *fence)
nv_drm_gem_fence_op_get_driver_name(nv_dma_fence_t *fence)
{
return "NVIDIA";
}
@@ -122,7 +133,7 @@ nv_drm_gem_prime_fence_op_wait(nv_dma_fence_t *fence,
}
static const nv_dma_fence_ops_t nv_drm_gem_prime_fence_ops = {
.get_driver_name = nv_drm_gem_prime_fence_op_get_driver_name,
.get_driver_name = nv_drm_gem_fence_op_get_driver_name,
.get_timeline_name = nv_drm_gem_prime_fence_op_get_timeline_name,
.enable_signaling = nv_drm_gem_prime_fence_op_enable_signaling,
.release = nv_drm_gem_prime_fence_op_release,
@@ -138,7 +149,7 @@ __nv_drm_prime_fence_signal(struct nv_drm_prime_fence *nv_fence)
}
static void nv_drm_gem_prime_force_fence_signal(
struct nv_drm_fence_context *nv_fence_context)
struct nv_drm_prime_fence_context *nv_fence_context)
{
WARN_ON(!spin_is_locked(&nv_fence_context->lock));
@@ -158,7 +169,7 @@ static void nv_drm_gem_prime_fence_event
NvU32 dataU32
)
{
struct nv_drm_fence_context *nv_fence_context = dataPtr;
struct nv_drm_prime_fence_context *nv_fence_context = dataPtr;
spin_lock(&nv_fence_context->lock);
@@ -187,11 +198,53 @@ static void nv_drm_gem_prime_fence_event
spin_unlock(&nv_fence_context->lock);
}
static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
struct nv_drm_device *nv_dev,
struct drm_nvidia_fence_context_create_params *p)
static inline struct nv_drm_prime_fence_context*
to_prime_fence_context(struct nv_drm_fence_context *nv_fence_context) {
return (struct nv_drm_prime_fence_context *)nv_fence_context;
}
static void __nv_drm_prime_fence_context_destroy(
struct nv_drm_fence_context *nv_fence_context)
{
struct nv_drm_fence_context *nv_fence_context;
struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
struct nv_drm_prime_fence_context *nv_prime_fence_context =
to_prime_fence_context(nv_fence_context);
/*
* Free channel event before destroying the fence context, otherwise event
* callback continue to get called.
*/
nvKms->freeChannelEvent(nv_dev->pDevice, nv_prime_fence_context->cb);
/* Force signal all pending fences and empty pending list */
spin_lock(&nv_prime_fence_context->lock);
nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);
spin_unlock(&nv_prime_fence_context->lock);
/* Free nvkms resources */
nvKms->unmapMemory(nv_dev->pDevice,
nv_prime_fence_context->pSemSurface,
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
(void *) nv_prime_fence_context->pLinearAddress);
nvKms->freeMemory(nv_dev->pDevice, nv_prime_fence_context->pSemSurface);
nv_drm_free(nv_fence_context);
}
static struct nv_drm_fence_context_ops nv_drm_prime_fence_context_ops = {
.destroy = __nv_drm_prime_fence_context_destroy,
};
static inline struct nv_drm_prime_fence_context *
__nv_drm_prime_fence_context_new(
struct nv_drm_device *nv_dev,
struct drm_nvidia_prime_fence_context_create_params *p)
{
struct nv_drm_prime_fence_context *nv_prime_fence_context;
struct NvKmsKapiMemory *pSemSurface;
NvU32 *pLinearAddress;
@@ -225,9 +278,9 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
* event for it.
*/
if ((nv_fence_context = nv_drm_calloc(
if ((nv_prime_fence_context = nv_drm_calloc(
1,
sizeof(*nv_fence_context))) == NULL) {
sizeof(*nv_prime_fence_context))) == NULL) {
goto failed_alloc_fence_context;
}
@@ -236,17 +289,18 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
* to check a return value.
*/
*nv_fence_context = (struct nv_drm_fence_context) {
.nv_dev = nv_dev,
.context = nv_dma_fence_context_alloc(1),
*nv_prime_fence_context = (struct nv_drm_prime_fence_context) {
.base.ops = &nv_drm_prime_fence_context_ops,
.base.nv_dev = nv_dev,
.base.context = nv_dma_fence_context_alloc(1),
.pSemSurface = pSemSurface,
.pLinearAddress = pLinearAddress,
.fenceSemIndex = p->index,
};
INIT_LIST_HEAD(&nv_fence_context->pending);
INIT_LIST_HEAD(&nv_prime_fence_context->pending);
spin_lock_init(&nv_fence_context->lock);
spin_lock_init(&nv_prime_fence_context->lock);
/*
* Except 'cb', the fence context should be completely initialized
@@ -256,22 +310,22 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
* There are no simultaneous read/write access to 'cb', therefore it does
* not require spin-lock protection.
*/
nv_fence_context->cb =
nv_prime_fence_context->cb =
nvKms->allocateChannelEvent(nv_dev->pDevice,
nv_drm_gem_prime_fence_event,
nv_fence_context,
nv_prime_fence_context,
p->event_nvkms_params_ptr,
p->event_nvkms_params_size);
if (!nv_fence_context->cb) {
if (!nv_prime_fence_context->cb) {
NV_DRM_DEV_LOG_ERR(nv_dev,
"Failed to allocate fence signaling event");
goto failed_to_allocate_channel_event;
}
return nv_fence_context;
return nv_prime_fence_context;
failed_to_allocate_channel_event:
nv_drm_free(nv_fence_context);
nv_drm_free(nv_prime_fence_context);
failed_alloc_fence_context:
@@ -287,38 +341,8 @@ failed:
return NULL;
}
static void __nv_drm_fence_context_destroy(
struct nv_drm_fence_context *nv_fence_context)
{
struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
/*
* Free channel event before destroying the fence context, otherwise event
* callback continue to get called.
*/
nvKms->freeChannelEvent(nv_dev->pDevice, nv_fence_context->cb);
/* Force signal all pending fences and empty pending list */
spin_lock(&nv_fence_context->lock);
nv_drm_gem_prime_force_fence_signal(nv_fence_context);
spin_unlock(&nv_fence_context->lock);
/* Free nvkms resources */
nvKms->unmapMemory(nv_dev->pDevice,
nv_fence_context->pSemSurface,
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
(void *) nv_fence_context->pLinearAddress);
nvKms->freeMemory(nv_dev->pDevice, nv_fence_context->pSemSurface);
nv_drm_free(nv_fence_context);
}
static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
struct nv_drm_fence_context *nv_fence_context,
static nv_dma_fence_t *__nv_drm_prime_fence_context_create_fence(
struct nv_drm_prime_fence_context *nv_prime_fence_context,
unsigned int seqno)
{
struct nv_drm_prime_fence *nv_fence;
@@ -329,14 +353,14 @@ static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
goto out;
}
spin_lock(&nv_fence_context->lock);
spin_lock(&nv_prime_fence_context->lock);
/*
* If seqno wrapped, force signal fences to make sure none of them
* get stuck.
*/
if (seqno < nv_fence_context->last_seqno) {
nv_drm_gem_prime_force_fence_signal(nv_fence_context);
if (seqno < nv_prime_fence_context->last_seqno) {
nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);
}
INIT_LIST_HEAD(&nv_fence->list_entry);
@@ -344,14 +368,17 @@ static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
spin_lock_init(&nv_fence->lock);
nv_dma_fence_init(&nv_fence->base, &nv_drm_gem_prime_fence_ops,
&nv_fence->lock, nv_fence_context->context,
&nv_fence->lock, nv_prime_fence_context->base.context,
seqno);
list_add_tail(&nv_fence->list_entry, &nv_fence_context->pending);
/* The context maintains a reference to any pending fences. */
nv_dma_fence_get(&nv_fence->base);
nv_fence_context->last_seqno = seqno;
list_add_tail(&nv_fence->list_entry, &nv_prime_fence_context->pending);
spin_unlock(&nv_fence_context->lock);
nv_prime_fence_context->last_seqno = seqno;
spin_unlock(&nv_prime_fence_context->lock);
out:
return ret != 0 ? ERR_PTR(ret) : &nv_fence->base;
@@ -385,12 +412,15 @@ static inline struct nv_drm_gem_fence_context *to_gem_fence_context(
* because tear down sequence calls to flush all existing
* worker thread.
*/
static void __nv_drm_gem_fence_context_free(struct nv_drm_gem_object *nv_gem)
static void
__nv_drm_gem_fence_context_free(struct nv_drm_gem_object *nv_gem)
{
struct nv_drm_gem_fence_context *nv_gem_fence_context =
to_gem_fence_context(nv_gem);
struct nv_drm_fence_context *nv_fence_context =
nv_gem_fence_context->nv_fence_context;
__nv_drm_fence_context_destroy(nv_gem_fence_context->nv_fence_context);
nv_fence_context->ops->destroy(nv_fence_context);
nv_drm_free(nv_gem_fence_context);
}
@@ -400,7 +430,8 @@ const struct nv_drm_gem_object_funcs nv_gem_fence_context_ops = {
};
static inline
struct nv_drm_gem_fence_context *__nv_drm_gem_object_fence_context_lookup(
struct nv_drm_gem_fence_context *
__nv_drm_gem_object_fence_context_lookup(
struct drm_device *dev,
struct drm_file *filp,
u32 handle)
@@ -416,11 +447,13 @@ struct nv_drm_gem_fence_context *__nv_drm_gem_object_fence_context_lookup(
return to_gem_fence_context(nv_gem);
}
int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep)
static int
__nv_drm_gem_fence_context_create(struct drm_device *dev,
struct nv_drm_fence_context *nv_fence_context,
u32 *handle,
struct drm_file *filep)
{
struct nv_drm_device *nv_dev = to_nv_device(dev);
struct drm_nvidia_fence_context_create_params *p = data;
struct nv_drm_gem_fence_context *nv_gem_fence_context = NULL;
if ((nv_gem_fence_context = nv_drm_calloc(
@@ -429,10 +462,7 @@ int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
goto done;
}
if ((nv_gem_fence_context->nv_fence_context =
__nv_drm_fence_context_new(nv_dev, p)) == NULL) {
goto fence_context_new_failed;
}
nv_gem_fence_context->nv_fence_context = nv_fence_context;
nv_drm_gem_object_init(nv_dev,
&nv_gem_fence_context->base,
@@ -442,26 +472,51 @@ int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
return nv_drm_gem_handle_create_drop_reference(filep,
&nv_gem_fence_context->base,
&p->handle);
fence_context_new_failed:
nv_drm_free(nv_gem_fence_context);
handle);
done:
return -ENOMEM;
}
int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep)
int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep)
{
struct nv_drm_device *nv_dev = to_nv_device(dev);
struct drm_nvidia_prime_fence_context_create_params *p = data;
struct nv_drm_prime_fence_context *nv_prime_fence_context =
__nv_drm_prime_fence_context_new(nv_dev, p);
int err;
if (!nv_prime_fence_context) {
goto done;
}
err = __nv_drm_gem_fence_context_create(dev,
&nv_prime_fence_context->base,
&p->handle,
filep);
if (err) {
__nv_drm_prime_fence_context_destroy(&nv_prime_fence_context->base);
}
return err;
done:
return -ENOMEM;
}
int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep)
{
int ret = -EINVAL;
struct nv_drm_device *nv_dev = to_nv_device(dev);
struct drm_nvidia_gem_fence_attach_params *p = data;
struct drm_nvidia_gem_prime_fence_attach_params *p = data;
struct nv_drm_gem_object *nv_gem;
struct nv_drm_gem_fence_context *nv_gem_fence_context;
nv_dma_fence_t *fence;
nv_dma_resv_t *resv;
nv_gem = nv_drm_gem_object_lookup(nv_dev->dev, filep, p->handle);
@@ -487,9 +542,22 @@ int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
goto fence_context_lookup_failed;
}
if (IS_ERR(fence = __nv_drm_fence_context_create_fence(
nv_gem_fence_context->nv_fence_context,
p->sem_thresh))) {
if (nv_gem_fence_context->nv_fence_context->ops !=
&nv_drm_prime_fence_context_ops) {
NV_DRM_DEV_LOG_ERR(
nv_dev,
"Wrong fence context type: 0x%08x",
p->fence_context_handle);
goto fence_context_create_fence_failed;
}
fence = __nv_drm_prime_fence_context_create_fence(
to_prime_fence_context(nv_gem_fence_context->nv_fence_context),
p->sem_thresh);
if (IS_ERR(fence)) {
ret = PTR_ERR(fence);
NV_DRM_DEV_LOG_ERR(
@@ -499,18 +567,23 @@ int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
goto fence_context_create_fence_failed;
}
nv_dma_resv_lock(&nv_gem->resv, NULL);
resv = nv_drm_gem_res_obj(nv_gem);
ret = nv_dma_resv_reserve_fences(&nv_gem->resv, 1, false);
nv_dma_resv_lock(resv, NULL);
ret = nv_dma_resv_reserve_fences(resv, 1, false);
if (ret == 0) {
nv_dma_resv_add_excl_fence(&nv_gem->resv, fence);
nv_dma_resv_add_excl_fence(resv, fence);
} else {
NV_DRM_DEV_LOG_ERR(
nv_dev,
"Failed to reserve fence. Error code: %d", ret);
}
nv_dma_resv_unlock(&nv_gem->resv);
nv_dma_resv_unlock(resv);
/* dma_resv_add_excl_fence takes its own reference to the fence. */
nv_dma_fence_put(fence);
fence_context_create_fence_failed:
nv_drm_gem_object_unreference_unlocked(&nv_gem_fence_context->base);

View File

@@ -35,11 +35,11 @@ struct drm_device;
int nv_drm_fence_supported_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep);
int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep);
int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep);
int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep);
int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep);
#endif /* NV_DRM_FENCE_AVAILABLE */

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -40,9 +40,16 @@ static const u32 nvkms_to_drm_format[] = {
[NvKmsSurfaceMemoryFormatR5G6B5] = DRM_FORMAT_RGB565,
[NvKmsSurfaceMemoryFormatA8R8G8B8] = DRM_FORMAT_ARGB8888,
[NvKmsSurfaceMemoryFormatX8R8G8B8] = DRM_FORMAT_XRGB8888,
[NvKmsSurfaceMemoryFormatX8B8G8R8] = DRM_FORMAT_XBGR8888,
[NvKmsSurfaceMemoryFormatA2B10G10R10] = DRM_FORMAT_ABGR2101010,
[NvKmsSurfaceMemoryFormatX2B10G10R10] = DRM_FORMAT_XBGR2101010,
[NvKmsSurfaceMemoryFormatA8B8G8R8] = DRM_FORMAT_ABGR8888,
#if defined(DRM_FORMAT_ABGR16161616F)
[NvKmsSurfaceMemoryFormatRF16GF16BF16AF16] = DRM_FORMAT_ABGR16161616F,
#endif
#if defined(DRM_FORMAT_XBGR16161616F)
[NvKmsSurfaceMemoryFormatRF16GF16BF16XF16] = DRM_FORMAT_XBGR16161616F,
#endif
[NvKmsSurfaceMemoryFormatY8_U8__Y8_V8_N422] = DRM_FORMAT_YUYV,
[NvKmsSurfaceMemoryFormatU8_Y8__V8_Y8_N422] = DRM_FORMAT_UYVY,

View File

@@ -95,7 +95,7 @@ static vm_fault_t __nv_drm_gem_nvkms_handle_vma_fault(
pfn >>= PAGE_SHIFT;
pfn += page_offset;
} else {
BUG_ON(page_offset > nv_nvkms_memory->pages_count);
BUG_ON(page_offset >= nv_nvkms_memory->pages_count);
pfn = page_to_pfn(nv_nvkms_memory->pages[page_offset]);
}
@@ -131,11 +131,11 @@ static struct drm_gem_object *__nv_drm_gem_nvkms_prime_dup(
const struct nv_drm_gem_object *nv_gem_src);
static int __nv_drm_gem_nvkms_map(
struct nv_drm_device *nv_dev,
struct NvKmsKapiMemory *pMemory,
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory,
uint64_t size)
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory)
{
struct nv_drm_device *nv_dev = nv_nvkms_memory->base.nv_dev;
struct NvKmsKapiMemory *pMemory = nv_nvkms_memory->base.pMemory;
if (!nv_dev->hasVideoMemory) {
return 0;
}
@@ -153,7 +153,7 @@ static int __nv_drm_gem_nvkms_map(
nv_nvkms_memory->pWriteCombinedIORemapAddress = ioremap_wc(
(uintptr_t)nv_nvkms_memory->pPhysicalAddress,
size);
nv_nvkms_memory->base.base.size);
if (!nv_nvkms_memory->pWriteCombinedIORemapAddress) {
NV_DRM_DEV_LOG_INFO(
@@ -167,6 +167,22 @@ static int __nv_drm_gem_nvkms_map(
return 0;
}
static void *__nv_drm_gem_nvkms_prime_vmap(
struct nv_drm_gem_object *nv_gem)
{
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
to_nv_nvkms_memory(nv_gem);
if (!nv_nvkms_memory->physically_mapped) {
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
if (ret) {
return ERR_PTR(ret);
}
}
return nv_nvkms_memory->pWriteCombinedIORemapAddress;
}
static int __nv_drm_gem_map_nvkms_memory_offset(
struct nv_drm_device *nv_dev,
struct nv_drm_gem_object *nv_gem,
@@ -176,10 +192,7 @@ static int __nv_drm_gem_map_nvkms_memory_offset(
to_nv_nvkms_memory(nv_gem);
if (!nv_nvkms_memory->physically_mapped) {
int ret = __nv_drm_gem_nvkms_map(nv_dev,
nv_nvkms_memory->base.pMemory,
nv_nvkms_memory,
nv_nvkms_memory->base.base.size);
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
if (ret) {
return ret;
}
@@ -201,7 +214,7 @@ static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
nv_dev,
"Cannot create sg_table for NvKmsKapiMemory 0x%p",
nv_gem->pMemory);
return NULL;
return ERR_PTR(-ENOMEM);
}
sg_table = nv_drm_prime_pages_to_sg(nv_dev->dev,
@@ -214,6 +227,7 @@ static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
const struct nv_drm_gem_object_funcs nv_gem_nvkms_memory_ops = {
.free = __nv_drm_gem_nvkms_memory_free,
.prime_dup = __nv_drm_gem_nvkms_prime_dup,
.prime_vmap = __nv_drm_gem_nvkms_prime_vmap,
.mmap = __nv_drm_gem_nvkms_mmap,
.handle_vma_fault = __nv_drm_gem_nvkms_handle_vma_fault,
.create_mmap_offset = __nv_drm_gem_map_nvkms_memory_offset,
@@ -229,6 +243,15 @@ static int __nv_drm_nvkms_gem_obj_init(
NvU64 *pages = NULL;
NvU32 numPages = 0;
if ((size % PAGE_SIZE) != 0) {
NV_DRM_DEV_LOG_ERR(
nv_dev,
"NvKmsKapiMemory 0x%p size should be in a multiple of page size to "
"create a gem object",
pMemory);
return -EINVAL;
}
nv_nvkms_memory->pPhysicalAddress = NULL;
nv_nvkms_memory->pWriteCombinedIORemapAddress = NULL;
nv_nvkms_memory->physically_mapped = false;
@@ -314,7 +337,7 @@ int nv_drm_dumb_create(
* to use dumb buffers for software rendering, so they're not much use
* without a CPU mapping.
*/
ret = __nv_drm_gem_nvkms_map(nv_dev, pMemory, nv_nvkms_memory, args->size);
ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
if (ret) {
nv_drm_gem_object_unreference_unlocked(&nv_nvkms_memory->base);
goto fail;
@@ -583,11 +606,13 @@ int nv_drm_dumb_map_offset(struct drm_file *file,
return ret;
}
#if defined(NV_DRM_DRIVER_HAS_DUMB_DESTROY)
int nv_drm_dumb_destroy(struct drm_file *file,
struct drm_device *dev,
uint32_t handle)
{
return drm_gem_handle_delete(file, handle);
}
#endif /* NV_DRM_DRIVER_HAS_DUMB_DESTROY */
#endif

View File

@@ -97,9 +97,11 @@ int nv_drm_dumb_map_offset(struct drm_file *file,
struct drm_device *dev, uint32_t handle,
uint64_t *offset);
#if defined(NV_DRM_DRIVER_HAS_DUMB_DESTROY)
int nv_drm_dumb_destroy(struct drm_file *file,
struct drm_device *dev,
uint32_t handle);
#endif /* NV_DRM_DRIVER_HAS_DUMB_DESTROY */
struct drm_gem_object *nv_drm_gem_nvkms_prime_import(
struct drm_device *dev,

View File

@@ -92,9 +92,9 @@ static int __nv_drm_gem_user_memory_mmap(struct nv_drm_gem_object *nv_gem,
return -EINVAL;
}
vma->vm_flags &= ~VM_PFNMAP;
vma->vm_flags &= ~VM_IO;
vma->vm_flags |= VM_MIXEDMAP;
nv_vm_flags_clear(vma, VM_PFNMAP);
nv_vm_flags_clear(vma, VM_IO);
nv_vm_flags_set(vma, VM_MIXEDMAP);
return 0;
}
@@ -112,8 +112,7 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
page_offset = vmf->pgoff - drm_vma_node_start(&gem->vma_node);
BUG_ON(page_offset > nv_user_memory->pages_count);
BUG_ON(page_offset >= nv_user_memory->pages_count);
ret = vm_insert_page(vma, address, nv_user_memory->pages[page_offset]);
switch (ret) {
case 0:

View File

@@ -26,7 +26,7 @@
#include "nvidia-drm-priv.h"
#include "nvidia-drm-ioctl.h"
#include "nvidia-drm-prime-fence.h"
#include "nvidia-drm-fence.h"
#include "nvidia-drm-gem.h"
#include "nvidia-drm-gem-nvkms-memory.h"
#include "nvidia-drm-gem-user-memory.h"
@@ -81,10 +81,13 @@ typedef struct dma_buf_map nv_sysio_map_t;
static int nv_drm_gem_vmap(struct drm_gem_object *gem,
nv_sysio_map_t *map)
{
map->vaddr = nv_drm_gem_prime_vmap(gem);
if (map->vaddr == NULL) {
void *vaddr = nv_drm_gem_prime_vmap(gem);
if (vaddr == NULL) {
return -ENOMEM;
} else if (IS_ERR(vaddr)) {
return PTR_ERR(vaddr);
}
map->vaddr = vaddr;
map->is_iomem = true;
return 0;
}
@@ -132,13 +135,8 @@ void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
/* Initialize the gem object */
#if defined(NV_DRM_FENCE_AVAILABLE)
#if defined(NV_DRM_FENCE_AVAILABLE) && !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
nv_dma_resv_init(&nv_gem->resv);
#if defined(NV_DRM_GEM_OBJECT_HAS_RESV)
nv_gem->base.resv = &nv_gem->resv;
#endif
#endif
#if !defined(NV_DRM_DRIVER_HAS_GEM_FREE_OBJECT)
@@ -212,8 +210,7 @@ void nv_drm_gem_prime_vunmap(struct drm_gem_object *gem, void *address)
nv_dma_resv_t* nv_drm_gem_prime_res_obj(struct drm_gem_object *obj)
{
struct nv_drm_gem_object *nv_gem = to_nv_gem_object(obj);
return &nv_gem->resv;
return nv_drm_gem_res_obj(nv_gem);
}
#endif
@@ -299,7 +296,7 @@ int nv_drm_mmap(struct file *file, struct vm_area_struct *vma)
ret = -EINVAL;
goto done;
}
vma->vm_flags &= ~VM_MAYWRITE;
nv_vm_flags_clear(vma, VM_MAYWRITE);
}
#endif

View File

@@ -45,6 +45,8 @@
#include "nvidia-dma-resv-helper.h"
#endif
#include "linux/dma-buf.h"
struct nv_drm_gem_object;
struct nv_drm_gem_object_funcs {
@@ -71,7 +73,7 @@ struct nv_drm_gem_object {
struct NvKmsKapiMemory *pMemory;
#if defined(NV_DRM_FENCE_AVAILABLE)
#if defined(NV_DRM_FENCE_AVAILABLE) && !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
nv_dma_resv_t resv;
#endif
};
@@ -177,6 +179,17 @@ static inline int nv_drm_gem_handle_create(struct drm_file *filp,
return drm_gem_handle_create(filp, &nv_gem->base, handle);
}
#if defined(NV_DRM_FENCE_AVAILABLE)
static inline nv_dma_resv_t *nv_drm_gem_res_obj(struct nv_drm_gem_object *nv_gem)
{
#if defined(NV_DRM_GEM_OBJECT_HAS_RESV)
return nv_gem->base.resv;
#else
return nv_gem->base.dma_buf ? nv_gem->base.dma_buf->resv : &nv_gem->resv;
#endif
}
#endif
void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
struct nv_drm_gem_object *nv_gem,
const struct nv_drm_gem_object_funcs * const ops,

View File

@@ -28,6 +28,8 @@
*/
#include "nvidia-drm-helper.h"
#include "nvidia-drm-priv.h"
#include "nvidia-drm-crtc.h"
#include "nvmisc.h"
@@ -148,6 +150,18 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
goto free;
}
#if defined(NV_DRM_ROTATION_AVAILABLE)
nv_drm_for_each_plane(plane, dev) {
plane_state = drm_atomic_get_plane_state(state, plane);
if (IS_ERR(plane_state)) {
ret = PTR_ERR(plane_state);
goto free;
}
plane_state->rotation = DRM_MODE_ROTATE_0;
}
#endif
nv_drm_for_each_connector_in_state(state, conn, conn_state, i) {
ret = drm_atomic_set_crtc_for_connector(conn_state, NULL);
if (ret < 0)

View File

@@ -35,6 +35,35 @@
#include <drm/drm_drv.h>
#endif
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE) || defined(NV_DRM_ROTATION_AVAILABLE)
/* For DRM_ROTATE_* , DRM_REFLECT_* */
#include <drm/drm_blend.h>
#endif
#if defined(NV_DRM_ROTATION_AVAILABLE)
/* For DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* */
#include <uapi/drm/drm_mode.h>
#endif
#if defined(NV_DRM_ROTATION_AVAILABLE)
/*
* 19-05-2017 c2c446ad29437bb92b157423c632286608ebd3ec has added
* DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* to UAPI and removed
* DRM_ROTATE_* and DRM_REFLECT_*
*/
#if !defined(DRM_MODE_ROTATE_0)
#define DRM_MODE_ROTATE_0 DRM_ROTATE_0
#define DRM_MODE_ROTATE_90 DRM_ROTATE_90
#define DRM_MODE_ROTATE_180 DRM_ROTATE_180
#define DRM_MODE_ROTATE_270 DRM_ROTATE_270
#define DRM_MODE_REFLECT_X DRM_REFLECT_X
#define DRM_MODE_REFLECT_Y DRM_REFLECT_Y
#define DRM_MODE_ROTATE_MASK DRM_ROTATE_MASK
#define DRM_MODE_REFLECT_MASK DRM_REFLECT_MASK
#endif
#endif //NV_DRM_ROTATION_AVAILABLE
/*
* drm_dev_put() is added by commit 9a96f55034e41b4e002b767e9218d55f03bdff7d
* (2017-09-26) and drm_dev_unref() is removed by
@@ -277,11 +306,33 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
for_each_plane_in_state(__state, plane, plane_state, __i)
#endif
static inline struct drm_crtc *nv_drm_crtc_find(struct drm_device *dev,
uint32_t id)
static inline struct drm_connector *
nv_drm_connector_lookup(struct drm_device *dev, struct drm_file *filep,
uint32_t id)
{
#if !defined(NV_DRM_CONNECTOR_LOOKUP_PRESENT)
return drm_connector_find(dev, id);
#elif defined(NV_DRM_MODE_OBJECT_FIND_HAS_FILE_PRIV_ARG)
return drm_connector_lookup(dev, filep, id);
#else
return drm_connector_lookup(dev, id);
#endif
}
static inline void nv_drm_connector_put(struct drm_connector *connector)
{
#if defined(NV_DRM_CONNECTOR_PUT_PRESENT)
drm_connector_put(connector);
#elif defined(NV_DRM_CONNECTOR_LOOKUP_PRESENT)
drm_connector_unreference(connector);
#endif
}
static inline struct drm_crtc *
nv_drm_crtc_find(struct drm_device *dev, struct drm_file *filep, uint32_t id)
{
#if defined(NV_DRM_MODE_OBJECT_FIND_HAS_FILE_PRIV_ARG)
return drm_crtc_find(dev, NULL /* file_priv */, id);
return drm_crtc_find(dev, filep, id);
#else
return drm_crtc_find(dev, id);
#endif
@@ -297,6 +348,30 @@ static inline struct drm_encoder *nv_drm_encoder_find(struct drm_device *dev,
#endif
}
#if defined(NV_DRM_DRM_AUTH_H_PRESENT)
#include <drm/drm_auth.h>
#endif
#if defined(NV_DRM_DRM_FILE_H_PRESENT)
#include <drm/drm_file.h>
#endif
/*
* drm_file_get_master() added by commit 56f0729a510f ("drm: protect drm_master
* pointers in drm_lease.c") in v5.15 (2021-07-20)
*/
static inline struct drm_master *nv_drm_file_get_master(struct drm_file *filep)
{
#if defined(NV_DRM_FILE_GET_MASTER_PRESENT)
return drm_file_get_master(filep);
#else
if (filep->master) {
return drm_master_get(filep->master);
} else {
return NULL;
}
#endif
}
/*
* drm_connector_for_each_possible_encoder() is added by commit
* 83aefbb887b59df0b3520965c3701e01deacfc52 which was Signed-off-by:
@@ -507,6 +582,19 @@ static inline int nv_drm_format_num_planes(uint32_t format)
#endif /* defined(NV_DRM_FORMAT_MODIFIERS_PRESENT) */
/*
* DRM_UNLOCKED was removed with linux-next commit 2798ffcc1d6a ("drm: Remove
* locking for legacy ioctls and DRM_UNLOCKED"), but it was previously made
* implicit for all non-legacy DRM driver IOCTLs since Linux v4.10 commit
* fa5386459f06 "drm: Used DRM_LEGACY for all legacy functions" (Linux v4.4
* commit ea487835e887 "drm: Enforce unlocked ioctl operation for kms driver
* ioctls" previously did it only for drivers that set the DRM_MODESET flag), so
* it was effectively a no-op anyway.
*/
#if !defined(NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT)
#define DRM_UNLOCKED 0
#endif
/*
* drm_vma_offset_exact_lookup_locked() were added
* by kernel commit 2225cfe46bcc which was Signed-off-by:

View File

@@ -34,8 +34,8 @@
#define DRM_NVIDIA_GEM_IMPORT_USERSPACE_MEMORY 0x02
#define DRM_NVIDIA_GET_DEV_INFO 0x03
#define DRM_NVIDIA_FENCE_SUPPORTED 0x04
#define DRM_NVIDIA_FENCE_CONTEXT_CREATE 0x05
#define DRM_NVIDIA_GEM_FENCE_ATTACH 0x06
#define DRM_NVIDIA_PRIME_FENCE_CONTEXT_CREATE 0x05
#define DRM_NVIDIA_GEM_PRIME_FENCE_ATTACH 0x06
#define DRM_NVIDIA_GET_CLIENT_CAPABILITY 0x08
#define DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY 0x09
#define DRM_NVIDIA_GEM_MAP_OFFSET 0x0a
@@ -43,6 +43,11 @@
#define DRM_NVIDIA_GET_CRTC_CRC32_V2 0x0c
#define DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY 0x0d
#define DRM_NVIDIA_GEM_IDENTIFY_OBJECT 0x0e
#define DRM_NVIDIA_DMABUF_SUPPORTED 0x0f
#define DRM_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID 0x10
#define DRM_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID 0x11
#define DRM_NVIDIA_GRANT_PERMISSIONS 0x12
#define DRM_NVIDIA_REVOKE_PERMISSIONS 0x13
#define DRM_IOCTL_NVIDIA_GEM_IMPORT_NVKMS_MEMORY \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IMPORT_NVKMS_MEMORY), \
@@ -65,50 +70,69 @@
#if defined(NV_LINUX)
#define DRM_IOCTL_NVIDIA_FENCE_SUPPORTED \
DRM_IO(DRM_COMMAND_BASE + DRM_NVIDIA_FENCE_SUPPORTED)
#define DRM_IOCTL_NVIDIA_DMABUF_SUPPORTED \
DRM_IO(DRM_COMMAND_BASE + DRM_NVIDIA_DMABUF_SUPPORTED)
#else
#define DRM_IOCTL_NVIDIA_FENCE_SUPPORTED 0
#define DRM_IOCTL_NVIDIA_DMABUF_SUPPORTED 0
#endif
#define DRM_IOCTL_NVIDIA_FENCE_CONTEXT_CREATE \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_FENCE_CONTEXT_CREATE), \
struct drm_nvidia_fence_context_create_params)
#define DRM_IOCTL_NVIDIA_PRIME_FENCE_CONTEXT_CREATE \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_PRIME_FENCE_CONTEXT_CREATE),\
struct drm_nvidia_prime_fence_context_create_params)
#define DRM_IOCTL_NVIDIA_GEM_FENCE_ATTACH \
DRM_IOW((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_FENCE_ATTACH), \
struct drm_nvidia_gem_fence_attach_params)
#define DRM_IOCTL_NVIDIA_GEM_PRIME_FENCE_ATTACH \
DRM_IOW((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_PRIME_FENCE_ATTACH), \
struct drm_nvidia_gem_prime_fence_attach_params)
#define DRM_IOCTL_NVIDIA_GET_CLIENT_CAPABILITY \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CLIENT_CAPABILITY), \
#define DRM_IOCTL_NVIDIA_GET_CLIENT_CAPABILITY \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CLIENT_CAPABILITY), \
struct drm_nvidia_get_client_capability_params)
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32 \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32), \
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32 \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32), \
struct drm_nvidia_get_crtc_crc32_params)
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32_V2 \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32_V2), \
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32_V2 \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32_V2), \
struct drm_nvidia_get_crtc_crc32_v2_params)
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_NVKMS_MEMORY \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY), \
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_NVKMS_MEMORY \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY), \
struct drm_nvidia_gem_export_nvkms_memory_params)
#define DRM_IOCTL_NVIDIA_GEM_MAP_OFFSET \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_MAP_OFFSET), \
#define DRM_IOCTL_NVIDIA_GEM_MAP_OFFSET \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_MAP_OFFSET), \
struct drm_nvidia_gem_map_offset_params)
#define DRM_IOCTL_NVIDIA_GEM_ALLOC_NVKMS_MEMORY \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_ALLOC_NVKMS_MEMORY), \
#define DRM_IOCTL_NVIDIA_GEM_ALLOC_NVKMS_MEMORY \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_ALLOC_NVKMS_MEMORY), \
struct drm_nvidia_gem_alloc_nvkms_memory_params)
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_DMABUF_MEMORY \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY), \
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_DMABUF_MEMORY \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY), \
struct drm_nvidia_gem_export_dmabuf_memory_params)
#define DRM_IOCTL_NVIDIA_GEM_IDENTIFY_OBJECT \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IDENTIFY_OBJECT), \
#define DRM_IOCTL_NVIDIA_GEM_IDENTIFY_OBJECT \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IDENTIFY_OBJECT), \
struct drm_nvidia_gem_identify_object_params)
#define DRM_IOCTL_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID),\
struct drm_nvidia_get_dpy_id_for_connector_id_params)
#define DRM_IOCTL_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID),\
struct drm_nvidia_get_connector_id_for_dpy_id_params)
#define DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GRANT_PERMISSIONS), \
struct drm_nvidia_grant_permissions_params)
#define DRM_IOCTL_NVIDIA_REVOKE_PERMISSIONS \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_REVOKE_PERMISSIONS), \
struct drm_nvidia_revoke_permissions_params)
struct drm_nvidia_gem_import_nvkms_memory_params {
uint64_t mem_size; /* IN */
@@ -136,7 +160,7 @@ struct drm_nvidia_get_dev_info_params {
uint32_t sector_layout; /* OUT */
};
struct drm_nvidia_fence_context_create_params {
struct drm_nvidia_prime_fence_context_create_params {
uint32_t handle; /* OUT GEM handle to fence context */
uint32_t index; /* IN Index of semaphore to use for fencing */
@@ -151,7 +175,7 @@ struct drm_nvidia_fence_context_create_params {
uint64_t event_nvkms_params_size; /* IN */
};
struct drm_nvidia_gem_fence_attach_params {
struct drm_nvidia_gem_prime_fence_attach_params {
uint32_t handle; /* IN GEM handle to attach fence to */
uint32_t fence_context_handle; /* IN GEM handle to fence context on which fence is run on */
uint32_t sem_thresh; /* IN Semaphore value to reach before signal */
@@ -232,4 +256,23 @@ struct drm_nvidia_gem_identify_object_params {
drm_nvidia_gem_object_type object_type; /* OUT GEM object type */
};
struct drm_nvidia_get_dpy_id_for_connector_id_params {
uint32_t connectorId; /* IN */
uint32_t dpyId; /* OUT */
};
struct drm_nvidia_get_connector_id_for_dpy_id_params {
uint32_t dpyId; /* IN */
uint32_t connectorId; /* OUT */
};
struct drm_nvidia_grant_permissions_params {
int32_t fd; /* IN */
uint32_t dpyId; /* IN */
};
struct drm_nvidia_revoke_permissions_params {
uint32_t dpyId; /* IN */
};
#endif /* _UAPI_NVIDIA_DRM_IOCTL_H_ */

View File

@@ -47,6 +47,14 @@ module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);
void *nv_drm_calloc(size_t nmemb, size_t size)
{
size_t total_size = nmemb * size;
//
// Check for overflow.
//
if ((nmemb != 0) && ((total_size / nmemb) != size))
{
return NULL;
}
return kzalloc(nmemb * size, GFP_KERNEL);
}
@@ -93,8 +101,6 @@ int nv_drm_lock_user_pages(unsigned long address,
{
struct mm_struct *mm = current->mm;
struct page **user_pages;
const int write = 1;
const int force = 0;
int pages_pinned;
user_pages = nv_drm_calloc(pages_count, sizeof(*user_pages));
@@ -105,7 +111,7 @@ int nv_drm_lock_user_pages(unsigned long address,
nv_mmap_read_lock(mm);
pages_pinned = NV_GET_USER_PAGES(address, pages_count, write, force,
pages_pinned = NV_PIN_USER_PAGES(address, pages_count, FOLL_WRITE,
user_pages, NULL);
nv_mmap_read_unlock(mm);
@@ -123,7 +129,7 @@ failed:
int i;
for (i = 0; i < pages_pinned; i++) {
put_page(user_pages[i]);
NV_UNPIN_USER_PAGE(user_pages[i]);
}
}
@@ -138,8 +144,7 @@ void nv_drm_unlock_user_pages(unsigned long pages_count, struct page **pages)
for (i = 0; i < pages_count; i++) {
set_page_dirty_lock(pages[i]);
put_page(pages[i]);
NV_UNPIN_USER_PAGE(pages[i]);
}
nv_drm_free(pages);
@@ -174,12 +179,7 @@ static void __exit nv_linux_drm_exit(void)
module_init(nv_linux_drm_init);
module_exit(nv_linux_drm_exit);
#if defined(MODULE_LICENSE)
MODULE_LICENSE("Dual MIT/GPL");
#endif
#if defined(MODULE_INFO)
MODULE_INFO(supported, "external");
#endif
#if defined(MODULE_VERSION)
MODULE_VERSION(NV_VERSION_STRING);
#endif
MODULE_INFO(supported, "external");
MODULE_VERSION(NV_VERSION_STRING);

View File

@@ -93,9 +93,6 @@ static bool __will_generate_flip_event(struct drm_crtc *crtc,
to_nv_crtc_state(new_crtc_state);
struct drm_plane_state *old_plane_state = NULL;
struct drm_plane *plane = NULL;
struct drm_plane *primary_plane = crtc->primary;
bool primary_event = false;
bool overlay_event = false;
int i;
if (!old_crtc_state->active && !new_crtc_state->active) {
@@ -134,16 +131,19 @@ static int __nv_drm_put_back_post_fence_fd(
const struct NvKmsKapiLayerReplyConfig *layer_reply_config)
{
int fd = layer_reply_config->postSyncptFd;
int ret = 0;
if ((fd >= 0) && (plane_state->fd_user_ptr != NULL)) {
if (put_user(fd, plane_state->fd_user_ptr)) {
return -EFAULT;
ret = copy_to_user(plane_state->fd_user_ptr, &fd, sizeof(fd));
if (ret != 0) {
return ret;
}
/*! set back to Null and let set_property specify it again */
plane_state->fd_user_ptr = NULL;
}
return 0;
return ret;
}
static int __nv_drm_get_syncpt_data(
@@ -274,6 +274,9 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
nv_new_crtc_state->nv_flip = NULL;
}
#if defined(NV_DRM_CRTC_STATE_HAS_VRR_ENABLED)
requested_config->headRequestedConfig[nv_crtc->head].modeSetConfig.vrrEnabled = new_crtc_state->vrr_enabled;
#endif
}
}
@@ -448,6 +451,13 @@ int nv_drm_atomic_commit(struct drm_device *dev,
#else
drm_atomic_helper_swap_state(dev, state);
#endif
/*
* Used to update legacy modeset state pointers to support UAPIs not updated
* by the core atomic modeset infrastructure.
*
* Example: /sys/class/drm/<card connector>/enabled
*/
drm_atomic_helper_update_legacy_modeset_state(dev, state);
/*
* nv_drm_atomic_commit_internal() must not return failure after

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -122,6 +122,11 @@ struct nv_drm_device {
NvBool supportsSyncpts;
struct drm_property *nv_out_fence_property;
struct drm_property *nv_input_colorspace_property;
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
struct drm_property *nv_hdr_output_metadata_property;
#endif
struct nv_drm_device *next;
};

View File

@@ -16,7 +16,7 @@ NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-connector.c
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem.c
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fb.c
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-modeset.c
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-prime-fence.c
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fence.c
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-linux.c
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-helper.c
NVIDIA_DRM_SOURCES += nvidia-drm/nv-pci-table.c
@@ -54,16 +54,22 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_atomic_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_inc
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_dec_and_test
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_alpha_blending_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_fd_to_handle
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_handle_to_fd
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl___vma_start_write
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages_remote
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages_remote
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_lookup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_state_ref_counting
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_driver_has_gem_prime_res_obj
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_connector_dpms
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_funcs_have_mode_in_name
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_has_vrr_capable_property
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_framebuffer_get
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
@@ -100,6 +106,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_has_resv
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_async_flip
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_pageflip_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_vrr_enabled
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_modifiers_present
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_node_is_allowed_has_tag_arg
@@ -115,6 +122,21 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_plane_atomic_check_has_atomic_state_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_device_has_pdev
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_no_vblank
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_config_has_allow_fb_modifiers
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_has_hdr_output_metadata
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_add_fence
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_reserve_fences
NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fences_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_has_leases
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_lookup
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_helper_funcs_mode_valid_has_const_mode_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_fb_create_takes_format_info

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -169,7 +169,6 @@ void nv_kthread_q_stop(nv_kthread_q_t *q)
//
// This function is never invoked when there is no NUMA preference (preferred
// node is NUMA_NO_NODE).
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
nv_kthread_q_t *q,
int preferred_node,
@@ -177,7 +176,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
{
unsigned i, j;
const static unsigned attempts = 3;
static const unsigned attempts = 3;
struct task_struct *thread[3];
for (i = 0;; i++) {
@@ -217,7 +216,6 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
return thread[i];
}
#endif
int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
{
@@ -231,11 +229,7 @@ int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferr
q->q_kthread = kthread_create(_main_loop, q, q_name);
}
else {
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
#else
return -ENOTSUPP;
#endif
}
if (IS_ERR(q->q_kthread)) {
@@ -307,7 +301,7 @@ static void _q_flush_function(void *args)
static void _raw_q_flush(nv_kthread_q_t *q)
{
nv_kthread_q_item_t q_item;
DECLARE_COMPLETION(completion);
DECLARE_COMPLETION_ONSTACK(completion);
nv_kthread_q_item_init(&q_item, _q_flush_function, &completion);

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2015-21 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2015-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -34,6 +34,9 @@
#include <linux/file.h>
#include <linux/list.h>
#include <linux/rwsem.h>
#include <linux/freezer.h>
#include <acpi/video.h>
#include "nvstatus.h"
@@ -51,7 +54,11 @@
#include "nv-time.h"
#include "nv-lock.h"
#if !defined(CONFIG_RETPOLINE)
/*
* Commit aefb2f2e619b ("x86/bugs: Rename CONFIG_RETPOLINE =>
* CONFIG_MITIGATION_RETPOLINE) in v6.8 renamed CONFIG_RETPOLINE.
*/
#if !defined(CONFIG_RETPOLINE) && !defined(CONFIG_MITIGATION_RETPOLINE)
#include "nv-retpoline.h"
#endif
@@ -62,6 +69,12 @@
static bool output_rounding_fix = true;
module_param_named(output_rounding_fix, output_rounding_fix, bool, 0400);
static bool disable_vrr_memclk_switch = false;
module_param_named(disable_vrr_memclk_switch, disable_vrr_memclk_switch, bool, 0400);
static bool opportunistic_display_sync = true;
module_param_named(opportunistic_display_sync, opportunistic_display_sync, bool, 0400);
/* These parameters are used for fault injection tests. Normally the defaults
* should be used. */
MODULE_PARM_DESC(fail_malloc, "Fail the Nth call to nvkms_alloc");
@@ -72,6 +85,15 @@ MODULE_PARM_DESC(malloc_verbose, "Report information about malloc calls on modul
static bool malloc_verbose = false;
module_param_named(malloc_verbose, malloc_verbose, bool, 0400);
/* This parameter is used to find the dpy override conf file */
#define NVKMS_CONF_FILE_SPECIFIED (nvkms_conf != NULL)
MODULE_PARM_DESC(config_file,
"Path to the nvidia-modeset configuration file "
"(default: disabled)");
static char *nvkms_conf = NULL;
module_param_named(config_file, nvkms_conf, charp, 0400);
static atomic_t nvkms_alloc_called_count;
NvBool nvkms_output_rounding_fix(void)
@@ -79,6 +101,16 @@ NvBool nvkms_output_rounding_fix(void)
return output_rounding_fix;
}
NvBool nvkms_disable_vrr_memclk_switch(void)
{
return disable_vrr_memclk_switch;
}
NvBool nvkms_opportunistic_display_sync(void)
{
return opportunistic_display_sync;
}
#define NVKMS_SYNCPT_STUBS_NEEDED
/*************************************************************************
@@ -180,7 +212,24 @@ static inline int nvkms_read_trylock_pm_lock(void)
static inline void nvkms_read_lock_pm_lock(void)
{
down_read(&nvkms_pm_lock);
if ((current->flags & PF_NOFREEZE)) {
/*
* Non-freezable tasks (i.e. kthreads in this case) don't have to worry
* about being frozen during system suspend, but do need to block so
* that the CPU can go idle during s2idle. Do a normal uninterruptible
* blocking wait for the PM lock.
*/
down_read(&nvkms_pm_lock);
} else {
/*
* For freezable tasks, make sure we give the kernel an opportunity to
* freeze if taking the PM lock fails.
*/
while (!down_read_trylock(&nvkms_pm_lock)) {
try_to_freeze();
cond_resched();
}
}
}
static inline void nvkms_read_unlock_pm_lock(void)
@@ -603,7 +652,11 @@ static void nvkms_kthread_q_callback(void *arg)
* pending timers and than waiting for workqueue callbacks.
*/
if (timer->kernel_timer_created) {
#if !defined(NV_BSD) && NV_IS_EXPORT_SYMBOL_PRESENT_timer_delete_sync
timer_delete_sync(&timer->kernel_timer);
#else
del_timer_sync(&timer->kernel_timer);
#endif
}
/*
@@ -956,6 +1009,17 @@ nvkms_register_backlight(NvU32 gpu_id, NvU32 display_id, void *drv_priv,
struct nvkms_backlight_device *nvkms_bd = NULL;
int i;
#if defined(NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE)
if (!acpi_video_backlight_use_native()) {
#if defined(NV_ACPI_VIDEO_REGISTER_BACKLIGHT)
nvkms_log(NVKMS_LOG_LEVEL_INFO, NVKMS_LOG_PREFIX,
"ACPI reported no NVIDIA native backlight available; attempting to use ACPI backlight.");
acpi_video_register_backlight();
#endif
return NULL;
}
#endif
gpu_info = nvkms_alloc(NV_MAX_GPUS * sizeof(*gpu_info), NV_TRUE);
if (gpu_info == NULL) {
return NULL;
@@ -1026,7 +1090,7 @@ static void nvkms_kapi_event_kthread_q_callback(void *arg)
nvKmsKapiHandleEventQueueChange(device);
}
struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type,
static struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type,
struct NvKmsKapiDevice *device,
int *status)
{
@@ -1078,7 +1142,7 @@ failed:
return NULL;
}
void nvkms_close_common(struct nvkms_per_open *popen)
static void nvkms_close_pm_locked(struct nvkms_per_open *popen)
{
/*
* Don't use down_interruptible(): we need to free resources
@@ -1116,13 +1180,13 @@ void nvkms_close_common(struct nvkms_per_open *popen)
nvkms_free(popen, sizeof(*popen));
}
static void nvkms_close_deferred(void *data)
static void nvkms_close_pm_unlocked(void *data)
{
struct nvkms_per_open *popen = data;
nvkms_read_lock_pm_lock();
nvkms_close_common(popen);
nvkms_close_pm_locked(popen);
nvkms_read_unlock_pm_lock();
}
@@ -1130,18 +1194,18 @@ static void nvkms_close_deferred(void *data)
static void nvkms_close_popen(struct nvkms_per_open *popen)
{
if (nvkms_read_trylock_pm_lock() == 0) {
nvkms_close_common(popen);
nvkms_close_pm_locked(popen);
nvkms_read_unlock_pm_lock();
} else {
nv_kthread_q_item_init(&popen->deferred_close_q_item,
nvkms_close_deferred,
nvkms_close_pm_unlocked,
popen);
nvkms_queue_work(&nvkms_deferred_close_kthread_q,
&popen->deferred_close_q_item);
}
}
int nvkms_ioctl_common
static int nvkms_ioctl_common
(
struct nvkms_per_open *popen,
NvU32 cmd, NvU64 address, const size_t size
@@ -1187,7 +1251,7 @@ struct nvkms_per_open* nvkms_open_from_kapi
void nvkms_close_from_kapi(struct nvkms_per_open *popen)
{
nvkms_close_popen(popen);
nvkms_close_pm_unlocked(popen);
}
NvBool nvkms_ioctl_from_kapi
@@ -1346,30 +1410,119 @@ static void nvkms_proc_exit(void)
return;
}
#if defined(NV_PROC_REMOVE_PRESENT)
proc_remove(nvkms_proc_dir);
#else
/*
* On kernel versions without proc_remove(), we need to explicitly
* remove each proc file beneath nvkms_proc_dir.
* nvkms_proc_init() only creates files directly under
* nvkms_proc_dir, so those are the only files we need to remove
* here: warn if there is any deeper directory nesting.
*/
{
struct proc_dir_entry *entry = nvkms_proc_dir->subdir;
#endif /* CONFIG_PROC_FS */
}
while (entry != NULL) {
struct proc_dir_entry *next = entry->next;
WARN_ON(entry->subdir != NULL);
remove_proc_entry(entry->name, entry->parent);
entry = next;
}
/*************************************************************************
* NVKMS Config File Read
************************************************************************/
static NvBool nvkms_fs_mounted(void)
{
return current->fs != NULL;
}
static size_t nvkms_config_file_open
(
char *fname,
char ** const buff
)
{
int i = 0;
struct file *file;
struct inode *file_inode;
size_t file_size = 0;
size_t read_size = 0;
#if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
loff_t pos = 0;
#endif
if (!nvkms_fs_mounted()) {
printk(KERN_ERR NVKMS_LOG_PREFIX "ERROR: Filesystems not mounted\n");
return 0;
}
remove_proc_entry(nvkms_proc_dir->name, nvkms_proc_dir->parent);
#endif /* NV_PROC_REMOVE_PRESENT */
#endif /* CONFIG_PROC_FS */
file = filp_open(fname, O_RDONLY, 0);
if (file == NULL || IS_ERR(file)) {
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Failed to open %s\n",
fname);
return 0;
}
file_inode = file->f_inode;
if (file_inode == NULL || IS_ERR(file_inode)) {
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Inode is invalid\n");
goto done;
}
file_size = file_inode->i_size;
if (file_size > NVKMS_READ_FILE_MAX_SIZE) {
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: File exceeds maximum size\n");
goto done;
}
*buff = nvkms_alloc(file_size, NV_FALSE);
if (*buff == NULL) {
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Out of memory\n");
goto done;
}
/*
* TODO: Once we have access to GPL symbols, this can be replaced with
* kernel_read_file for kernels >= 4.6
*/
while ((read_size < file_size) && (i++ < NVKMS_READ_FILE_MAX_LOOPS)) {
#if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
ssize_t ret = kernel_read(file, *buff + read_size,
file_size - read_size, &pos);
#else
ssize_t ret = kernel_read(file, read_size,
*buff + read_size,
file_size - read_size);
#endif
if (ret <= 0) {
break;
}
read_size += ret;
}
if (read_size != file_size) {
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Failed to read %s\n",
fname);
goto done;
}
filp_close(file, current->files);
return file_size;
done:
nvkms_free(*buff, file_size);
filp_close(file, current->files);
return 0;
}
/* must be called with nvkms_lock locked */
static void nvkms_read_config_file_locked(void)
{
char *buffer = NULL;
size_t buf_size = 0;
/* only read the config file if the kernel parameter is set */
if (!NVKMS_CONF_FILE_SPECIFIED) {
return;
}
buf_size = nvkms_config_file_open(nvkms_conf, &buffer);
if (buf_size == 0) {
return;
}
if (nvKmsReadConf(buffer, buf_size, nvkms_config_file_open)) {
printk(KERN_INFO NVKMS_LOG_PREFIX "Successfully read %s\n",
nvkms_conf);
}
nvkms_free(buffer, buf_size);
}
/*************************************************************************
@@ -1543,10 +1696,12 @@ static int __init nvkms_init(void)
if (!nvKmsModuleLoad()) {
ret = -ENOMEM;
}
up(&nvkms_lock);
if (ret != 0) {
up(&nvkms_lock);
goto fail_module_load;
}
nvkms_read_config_file_locked();
up(&nvkms_lock);
nvkms_proc_init();
@@ -1592,7 +1747,11 @@ restart:
* completion, and we wait for queue completion with
* nv_kthread_q_stop below.
*/
#if !defined(NV_BSD) && NV_IS_EXPORT_SYMBOL_PRESENT_timer_delete_sync
if (timer_delete_sync(&timer->kernel_timer) == 1) {
#else
if (del_timer_sync(&timer->kernel_timer) == 1) {
#endif
/* We've deactivated timer so we need to clean after it */
list_del(&timer->timers_list);
@@ -1630,12 +1789,7 @@ restart:
module_init(nvkms_init);
module_exit(nvkms_exit);
#if defined(MODULE_LICENSE)
MODULE_LICENSE("Dual MIT/GPL");
#endif
#if defined(MODULE_INFO)
MODULE_INFO(supported, "external");
#endif
#if defined(MODULE_VERSION)
MODULE_VERSION(NV_VERSION_STRING);
#endif
MODULE_INFO(supported, "external");
MODULE_VERSION(NV_VERSION_STRING);

View File

@@ -40,17 +40,31 @@
#include "nv_stdarg.h"
enum NvKmsSyncPtOp {
/*
* Call into Tegra's kernel nvhost driver, and allocate a syncpoint that can
* be exclusively used by the caller. Internally, this operation will call
* get() to set the initial refcount of the syncpoint to 1.
*/
NVKMS_SYNCPT_OP_ALLOC,
NVKMS_SYNCPT_OP_GET,
/*
* Decrease the refcount of an already allocated syncpoint. Once the
* refcount drops to 0, the syncpoint will be returned to the free pool that
* nvhost manages, so PUT can also be used to balance out an ALLOC.
*/
NVKMS_SYNCPT_OP_PUT,
NVKMS_SYNCPT_OP_INCR_MAX,
NVKMS_SYNCPT_OP_CPU_INCR,
/*
* Extract syncpt id and thresh from the sync-file file descriptor
*/
NVKMS_SYNCPT_OP_FD_TO_ID_AND_THRESH,
/*
* Create dma-fence from syncpt id and thresh value and create sync_file
* file descriptor for the dma-fence handle created.
*/
NVKMS_SYNCPT_OP_ID_AND_THRESH_TO_FD,
/*
* read syncpt minimum value of given syncpt
*/
NVKMS_SYNCPT_OP_READ_MINVAL,
NVKMS_SYNCPT_OP_READ_MAXVAL,
NVKMS_SYNCPT_OP_SET_MIN_EQ_MAX,
NVKMS_SYNCPT_OP_SET_MAXVAL,
};
typedef struct {
@@ -60,24 +74,10 @@ typedef struct {
NvU32 id; /* out */
} alloc;
struct {
NvU32 id; /* in */
} get;
struct {
NvU32 id; /* in */
} put;
struct {
NvU32 id; /* in */
NvU32 incr; /* in */
NvU32 value; /* out */
} incr_max;
struct {
NvU32 id; /* in */
} cpu_incr;
struct {
NvS32 fd; /* in */
NvU32 id; /* out */
@@ -94,24 +94,13 @@ typedef struct {
NvU32 id; /* in */
NvU32 minval; /* out */
} read_minval;
struct {
NvU32 id; /* in */
NvU32 maxval; /* out */
} read_maxval;
struct {
NvU32 id; /* in */
} set_min_eq_max;
struct {
NvU32 id; /* in */
NvU32 val; /* in */
} set_maxval;
} NvKmsSyncPtOpParams;
NvBool nvkms_output_rounding_fix(void);
NvBool nvkms_disable_vrr_memclk_switch(void);
NvBool nvkms_opportunistic_display_sync(void);
void nvkms_call_rm (void *ops);
void* nvkms_alloc (size_t size,
NvBool zero);

View File

@@ -40,9 +40,6 @@ NV_KERNEL_MODULE_TARGETS += $(NVIDIA_MODESET_KO)
NVIDIA_MODESET_BINARY_OBJECT := $(src)/nvidia-modeset/nv-modeset-kernel.o_binary
NVIDIA_MODESET_BINARY_OBJECT_O := nvidia-modeset/nv-modeset-kernel.o
quiet_cmd_symlink = SYMLINK $@
cmd_symlink = ln -sf $< $@
targets += $(NVIDIA_MODESET_BINARY_OBJECT_O)
$(obj)/$(NVIDIA_MODESET_BINARY_OBJECT_O): $(NVIDIA_MODESET_BINARY_OBJECT) FORCE
@@ -85,15 +82,13 @@ $(obj)/$(NVIDIA_MODESET_INTERFACE): $(addprefix $(obj)/,$(NVIDIA_MODESET_OBJECTS
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_MODESET_OBJECTS)
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_kthread_create_on_node
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_register_backlight
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync

View File

@@ -42,6 +42,20 @@ typedef void nvkms_procfs_proc_t(void *data,
char *buffer, size_t size,
nvkms_procfs_out_string_func_t *outString);
/* max number of loops to prevent hanging the kernel if an edge case is hit */
#define NVKMS_READ_FILE_MAX_LOOPS 1000
/* max size for any file read by the config system */
#define NVKMS_READ_FILE_MAX_SIZE 8192
/*
* The read file callback should allocate a buffer pointed to by *buff, fill it
* with the contents of fname, and return the size of the buffer. Buffer is not
* guaranteed to be null-terminated. The caller is responsible for freeing the
* buffer with nvkms_free, not nvFree.
*/
typedef size_t nvkms_config_read_file_func_t(char *fname,
char ** const buff);
typedef struct {
const char *name;
nvkms_procfs_proc_t *func;
@@ -74,6 +88,9 @@ void nvKmsResume(NvU32 gpuId);
void nvKmsGetProcFiles(const nvkms_procfs_file_t **ppProcFiles);
NvBool nvKmsReadConf(const char *buff, size_t size,
nvkms_config_read_file_func_t readfile);
void nvKmsKapiHandleEventQueueChange
(
struct NvKmsKapiDevice *device

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2011-2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2011-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -94,11 +94,10 @@ struct nvidia_p2p_params {
} nvidia_p2p_params_t;
/*
* Capability flag for users to detect
* Macro for users to detect
* driver support for persistent pages.
*/
extern int nvidia_p2p_cap_persistent_pages;
#define NVIDIA_P2P_CAP_PERSISTENT_PAGES
#define NVIDIA_P2P_CAP_GET_PAGES_PERSISTENT_API
/*
* This API is not supported.
@@ -173,11 +172,6 @@ struct nvidia_p2p_page_table {
* A pointer to the function to be invoked when the pages
* underlying the virtual address range are freed
* implicitly.
* If NULL, persistent pages will be returned.
* This means the pages underlying the range of GPU virtual memory
* will persist until explicitly freed by nvidia_p2p_put_pages().
* Persistent GPU memory mappings are not supported on PowerPC,
* MIG-enabled devices and vGPU.
* @param[in] data
* A non-NULL opaque pointer to private data to be passed to the
* callback function.
@@ -190,12 +184,48 @@ struct nvidia_p2p_page_table {
* insufficient resources were available to complete the operation.
* -EIO if an unknown error occurred.
*/
int nvidia_p2p_get_pages(uint64_t p2p_token, uint32_t va_space,
uint64_t virtual_address,
int nvidia_p2p_get_pages( uint64_t p2p_token, uint32_t va_space,
uint64_t virtual_address, uint64_t length,
struct nvidia_p2p_page_table **page_table,
void (*free_callback)(void *data), void *data);
/*
* @brief
* Pin and make the pages underlying a range of GPU virtual memory
* accessible to a third-party device. The pages will persist until
* explicitly freed by nvidia_p2p_put_pages_persistent().
*
* Persistent GPU memory mappings are not supported on PowerPC,
* MIG-enabled devices and vGPU.
*
* This API only supports pinned, GPU-resident memory, such as that provided
* by cudaMalloc().
*
* This API may sleep.
*
* @param[in] virtual_address
* The start address in the specified virtual address space.
* Address must be aligned to the 64KB boundary.
* @param[in] length
* The length of the requested P2P mapping.
* Length must be a multiple of 64KB.
* @param[out] page_table
* A pointer to an array of structures with P2P PTEs.
* @param[in] flags
* Must be set to zero for now.
*
* @return
* 0 upon successful completion.
* -EINVAL if an invalid argument was supplied.
* -ENOTSUPP if the requested operation is not supported.
* -ENOMEM if the driver failed to allocate memory or if
* insufficient resources were available to complete the operation.
* -EIO if an unknown error occurred.
*/
int nvidia_p2p_get_pages_persistent(uint64_t virtual_address,
uint64_t length,
struct nvidia_p2p_page_table **page_table,
void (*free_callback)(void *data),
void *data);
uint32_t flags);
#define NVIDIA_P2P_DMA_MAPPING_VERSION 0x00020003
@@ -268,6 +298,8 @@ int nvidia_p2p_dma_unmap_pages(struct pci_dev *peer,
* Release a set of pages previously made accessible to
* a third-party device.
*
* This API may sleep.
*
* @param[in] p2p_token
* A token that uniquely identifies the P2P mapping.
* @param[in] va_space
@@ -282,10 +314,33 @@ int nvidia_p2p_dma_unmap_pages(struct pci_dev *peer,
* -EINVAL if an invalid argument was supplied.
* -EIO if an unknown error occurred.
*/
int nvidia_p2p_put_pages(uint64_t p2p_token, uint32_t va_space,
uint64_t virtual_address,
int nvidia_p2p_put_pages(uint64_t p2p_token,
uint32_t va_space, uint64_t virtual_address,
struct nvidia_p2p_page_table *page_table);
/*
* @brief
* Release a set of persistent pages previously made accessible to
* a third-party device.
*
* This API may sleep.
*
* @param[in] virtual_address
* The start address in the specified virtual address space.
* @param[in] page_table
* A pointer to the array of structures with P2P PTEs.
* @param[in] flags
* Must be set to zero for now.
*
* @return
* 0 upon successful completion.
* -EINVAL if an invalid argument was supplied.
* -EIO if an unknown error occurred.
*/
int nvidia_p2p_put_pages_persistent(uint64_t virtual_address,
struct nvidia_p2p_page_table *page_table,
uint32_t flags);
/*
* @brief
* Free a third-party P2P page table. (This function is a no-op.)

View File

@@ -30,8 +30,18 @@ NVIDIA_PEERMEM_CFLAGS += -UDEBUG -U_DEBUG -DNDEBUG -DNV_BUILD_MODULE_INSTANCES=0
# MOFED's Module.symvers is needed for the build
# to find the additional ib_* symbols.
#
# Also, MOFED doesn't use kbuild ARCH names.
# So adapt OFA_ARCH to match MOFED's conventions.
#
ifeq ($(ARCH), arm64)
OFA_ARCH := aarch64
else ifeq ($(ARCH), powerpc)
OFA_ARCH := ppc64le
else
OFA_ARCH := $(ARCH)
endif
OFA_DIR := /usr/src/ofa_kernel
OFA_CANDIDATES = $(OFA_DIR)/$(ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
OFA_CANDIDATES = $(OFA_DIR)/$(OFA_ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
MLNX_OFED_KERNEL := $(shell for d in $(OFA_CANDIDATES); do \
if [ -d "$$d" ]; then \
echo "$$d"; \

View File

@@ -1,20 +1,25 @@
/* SPDX-License-Identifier: Linux-OpenIB */
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
@@ -43,7 +48,9 @@
MODULE_AUTHOR("Yishai Hadas");
MODULE_DESCRIPTION("NVIDIA GPU memory plug-in");
MODULE_LICENSE("Linux-OpenIB");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
enum {
NV_MEM_PEERDIRECT_SUPPORT_DEFAULT = 0,
@@ -53,7 +60,13 @@ static int peerdirect_support = NV_MEM_PEERDIRECT_SUPPORT_DEFAULT;
module_param(peerdirect_support, int, S_IRUGO);
MODULE_PARM_DESC(peerdirect_support, "Set level of support for Peer-direct, 0 [default] or 1 [legacy, for example MLNX_OFED 4.9 LTS]");
#define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d " FMT, __FUNCTION__, __LINE__, ## ARGS)
#define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d ERROR " FMT, __FUNCTION__, __LINE__, ## ARGS)
#ifdef NV_MEM_DEBUG
#define peer_trace(FMT, ARGS...) printk(KERN_DEBUG "nvidia-peermem" " %s:%d TRACE " FMT, __FUNCTION__, __LINE__, ## ARGS)
#else
#define peer_trace(FMT, ARGS...) do {} while (0)
#endif
#if defined(NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)
@@ -74,7 +87,10 @@ invalidate_peer_memory mem_invalidate_callback;
static void *reg_handle = NULL;
static void *reg_handle_nc = NULL;
#define NV_MEM_CONTEXT_MAGIC ((u64)0xF1F4F1D0FEF0DAD0ULL)
struct nv_mem_context {
u64 pad1;
struct nvidia_p2p_page_table *page_table;
struct nvidia_p2p_dma_mapping *dma_mapping;
u64 core_context;
@@ -86,8 +102,22 @@ struct nv_mem_context {
struct task_struct *callback_task;
int sg_allocated;
struct sg_table sg_head;
u64 pad2;
};
#define NV_MEM_CONTEXT_CHECK_OK(MC) ({ \
struct nv_mem_context *mc = (MC); \
int rc = ((0 != mc) && \
(READ_ONCE(mc->pad1) == NV_MEM_CONTEXT_MAGIC) && \
(READ_ONCE(mc->pad2) == NV_MEM_CONTEXT_MAGIC)); \
if (!rc) { \
peer_trace("invalid nv_mem_context=%px pad1=%016llx pad2=%016llx\n", \
mc, \
mc?mc->pad1:0, \
mc?mc->pad2:0); \
} \
rc; \
})
static void nv_get_p2p_free_callback(void *data)
{
@@ -97,8 +127,9 @@ static void nv_get_p2p_free_callback(void *data)
struct nvidia_p2p_dma_mapping *dma_mapping = NULL;
__module_get(THIS_MODULE);
if (!nv_mem_context) {
peer_err("nv_get_p2p_free_callback -- invalid nv_mem_context\n");
if (!NV_MEM_CONTEXT_CHECK_OK(nv_mem_context)) {
peer_err("detected invalid context, skipping further processing\n");
goto out;
}
@@ -169,9 +200,11 @@ static int nv_mem_acquire(unsigned long addr, size_t size, void *peer_mem_privat
/* Error case handled as not mine */
return 0;
nv_mem_context->pad1 = NV_MEM_CONTEXT_MAGIC;
nv_mem_context->page_virt_start = addr & GPU_PAGE_MASK;
nv_mem_context->page_virt_end = (addr + size + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK;
nv_mem_context->mapped_size = nv_mem_context->page_virt_end - nv_mem_context->page_virt_start;
nv_mem_context->pad2 = NV_MEM_CONTEXT_MAGIC;
ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
&nv_mem_context->page_table, nv_mem_dummy_callback, nv_mem_context);
@@ -195,6 +228,7 @@ static int nv_mem_acquire(unsigned long addr, size_t size, void *peer_mem_privat
return 1;
err:
memset(nv_mem_context, 0, sizeof(*nv_mem_context));
kfree(nv_mem_context);
/* Error case handled as not mine */
@@ -284,8 +318,9 @@ out:
return 0;
}
static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
static void nv_mem_put_pages_common(int nc,
struct sg_table *sg_head,
void *context)
{
int ret = 0;
struct nv_mem_context *nv_mem_context =
@@ -302,8 +337,13 @@ static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
if (nv_mem_context->callback_task == current)
return;
ret = nvidia_p2p_put_pages(0, 0, nv_mem_context->page_virt_start,
nv_mem_context->page_table);
if (nc) {
ret = nvidia_p2p_put_pages_persistent(nv_mem_context->page_virt_start,
nv_mem_context->page_table, 0);
} else {
ret = nvidia_p2p_put_pages(0, 0, nv_mem_context->page_virt_start,
nv_mem_context->page_table);
}
#ifdef _DEBUG_ONLY_
/* Here we expect an error in real life cases that should be ignored - not printed.
@@ -318,6 +358,16 @@ static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
return;
}
static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
{
nv_mem_put_pages_common(0, sg_head, context);
}
static void nv_mem_put_pages_nc(struct sg_table *sg_head, void *context)
{
nv_mem_put_pages_common(1, sg_head, context);
}
static void nv_mem_release(void *context)
{
struct nv_mem_context *nv_mem_context =
@@ -326,6 +376,7 @@ static void nv_mem_release(void *context)
sg_free_table(&nv_mem_context->sg_head);
nv_mem_context->sg_allocated = 0;
}
memset(nv_mem_context, 0, sizeof(*nv_mem_context));
kfree(nv_mem_context);
module_put(THIS_MODULE);
return;
@@ -396,8 +447,9 @@ static int nv_mem_get_pages_nc(unsigned long addr,
nv_mem_context->core_context = core_context;
nv_mem_context->page_size = GPU_PAGE_SIZE;
ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
&nv_mem_context->page_table, NULL, NULL);
ret = nvidia_p2p_get_pages_persistent(nv_mem_context->page_virt_start,
nv_mem_context->mapped_size,
&nv_mem_context->page_table, 0);
if (ret < 0) {
peer_err("error %d while calling nvidia_p2p_get_pages() with NULL callback\n", ret);
return ret;
@@ -407,13 +459,13 @@ static int nv_mem_get_pages_nc(unsigned long addr,
}
static struct peer_memory_client nv_mem_client_nc = {
.acquire = nv_mem_acquire,
.get_pages = nv_mem_get_pages_nc,
.dma_map = nv_dma_map,
.dma_unmap = nv_dma_unmap,
.put_pages = nv_mem_put_pages,
.get_page_size = nv_mem_get_page_size,
.release = nv_mem_release,
.acquire = nv_mem_acquire,
.get_pages = nv_mem_get_pages_nc,
.dma_map = nv_dma_map,
.dma_unmap = nv_dma_unmap,
.put_pages = nv_mem_put_pages_nc,
.get_page_size = nv_mem_get_page_size,
.release = nv_mem_release,
};
#endif /* NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT */
@@ -477,9 +529,6 @@ static int __init nv_mem_client_init(void)
}
// The nc client enables support for persistent pages.
// Thanks to this check, nvidia-peermem requires the new symbol from nvidia.ko, which
// prevents users to unintentionally load this module with unsupported nvidia.ko.
BUG_ON(!nvidia_p2p_cap_persistent_pages);
strcpy(nv_mem_client_nc.name, DRV_NAME "_nc");
strcpy(nv_mem_client_nc.version, DRV_VERSION);
reg_handle_nc = ib_register_peer_memory_client(&nv_mem_client_nc, NULL);

View File

@@ -1,29 +1,33 @@
/*******************************************************************************
Copyright (c) 2013 NVIDIA Corporation
/*
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __cla06fsubch_h__
#define __cla06fsubch_h__
#ifndef _cla06fsubch_h_
#define _cla06fsubch_h_
#define NVA06F_SUBCHANNEL_2D 3
#define NVA06F_SUBCHANNEL_3D 0
#define NVA06F_SUBCHANNEL_COMPUTE 1
#define NVA06F_SUBCHANNEL_COPY_ENGINE 4
#define NVA06F_SUBCHANNEL_I2M 2
#endif // {__cla06fsubch_h__}
#endif // _cla06fsubch_h_

View File

@@ -1,25 +1,25 @@
/*******************************************************************************
Copyright (c) 2021-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _cla16f_h_
#define _cla16f_h_
@@ -30,9 +30,48 @@ extern "C" {
#include "nvtypes.h"
#define KEPLER_CHANNEL_GPFIFO_B (0x0000A16F)
/* class KEPLER_CHANNEL_GPFIFO */
/*
* Documentation for KEPLER_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
* chapter "User Control Registers". It is documented as device NV_UDMA.
* The GPFIFO format itself is also documented in dev_pbdma.ref,
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
*
*/
#define KEPLER_CHANNEL_GPFIFO_B (0x0000A16F)
/* pio method data structure */
typedef volatile struct _cla16f_tag0 {
NvV32 Reserved00[0x7c0];
} NvA16FTypedef, KEPLER_ChannelGPFifoB;
#define NVA16F_TYPEDEF KEPLER_CHANNELChannelGPFifo
/* dma flow control data structure */
typedef volatile struct _cla16f_tag1 {
NvU32 Ignored00[0x010]; /* 0000-003f*/
NvU32 Put; /* put offset, read/write 0040-0043*/
NvU32 Get; /* get offset, read only 0044-0047*/
NvU32 Reference; /* reference value, read only 0048-004b*/
NvU32 PutHi; /* high order put offset bits 004c-004f*/
NvU32 Ignored01[0x002]; /* 0050-0057*/
NvU32 TopLevelGet; /* top level get offset, read only 0058-005b*/
NvU32 TopLevelGetHi; /* high order top level get bits 005c-005f*/
NvU32 GetHi; /* high order get offset bits 0060-0063*/
NvU32 Ignored02[0x007]; /* 0064-007f*/
NvU32 Ignored03; /* used to be engine yield 0080-0083*/
NvU32 Ignored04[0x001]; /* 0084-0087*/
NvU32 GPGet; /* GP FIFO get offset, read only 0088-008b*/
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
NvU32 Ignored05[0x5c];
} NvA16FControl, KeplerBControlGPFifo;
/* fields and values */
#define NVA16F_NUMBER_OF_SUBCHANNELS (8)
#define NVA16F_SET_OBJECT (0x00000000)
#define NVA16F_SET_OBJECT_NVCLASS 15:0
#define NVA16F_SET_OBJECT_ENGINE 20:16
#define NVA16F_SET_OBJECT_ENGINE_SW 0x0000001f
#define NVA16F_ILLEGAL (0x00000004)
#define NVA16F_ILLEGAL_HANDLE 31:0
#define NVA16F_NOP (0x00000008)
#define NVA16F_NOP_HANDLE 31:0
#define NVA16F_SEMAPHOREA (0x00000010)
@@ -100,6 +139,12 @@ extern "C" {
#define NVA16F_SET_REFERENCE_COUNT 31:0
#define NVA16F_WFI (0x00000078)
#define NVA16F_WFI_HANDLE 31:0
#define NVA16F_CRC_CHECK (0x0000007c)
#define NVA16F_CRC_CHECK_VALUE 31:0
#define NVA16F_YIELD (0x00000080)
#define NVA16F_YIELD_OP 1:0
#define NVA16F_YIELD_OP_NOP 0x00000000
/* GPFIFO entry format */
#define NVA16F_GP_ENTRY__SIZE 8
@@ -126,13 +171,28 @@ extern "C" {
#define NVA16F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
/* dma method formats */
#define NVA16F_DMA_METHOD_ADDRESS_OLD 12:2
#define NVA16F_DMA_METHOD_ADDRESS 11:0
#define NVA16F_DMA_SUBDEVICE_MASK 15:4
#define NVA16F_DMA_METHOD_SUBCHANNEL 15:13
#define NVA16F_DMA_TERT_OP 17:16
#define NVA16F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
#define NVA16F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
#define NVA16F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
#define NVA16F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
#define NVA16F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
#define NVA16F_DMA_METHOD_COUNT_OLD 28:18
#define NVA16F_DMA_METHOD_COUNT 28:16
#define NVA16F_DMA_IMMD_DATA 28:16
#define NVA16F_DMA_SEC_OP 31:29
#define NVA16F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
#define NVA16F_DMA_SEC_OP_INC_METHOD (0x00000001)
#define NVA16F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
#define NVA16F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
#define NVA16F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
#define NVA16F_DMA_SEC_OP_ONE_INC (0x00000005)
#define NVA16F_DMA_SEC_OP_RESERVED6 (0x00000006)
#define NVA16F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
/* dma incrementing method format */
#define NVA16F_DMA_INCR_ADDRESS 11:0
#define NVA16F_DMA_INCR_SUBCHANNEL 15:13
@@ -140,7 +200,6 @@ extern "C" {
#define NVA16F_DMA_INCR_OPCODE 31:29
#define NVA16F_DMA_INCR_OPCODE_VALUE (0x00000001)
#define NVA16F_DMA_INCR_DATA 31:0
/* dma non-incrementing method format */
#define NVA16F_DMA_NONINCR_ADDRESS 11:0
#define NVA16F_DMA_NONINCR_SUBCHANNEL 15:13
@@ -148,13 +207,45 @@ extern "C" {
#define NVA16F_DMA_NONINCR_OPCODE 31:29
#define NVA16F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
#define NVA16F_DMA_NONINCR_DATA 31:0
/* dma increment-once method format */
#define NVA16F_DMA_ONEINCR_ADDRESS 11:0
#define NVA16F_DMA_ONEINCR_SUBCHANNEL 15:13
#define NVA16F_DMA_ONEINCR_COUNT 28:16
#define NVA16F_DMA_ONEINCR_OPCODE 31:29
#define NVA16F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
#define NVA16F_DMA_ONEINCR_DATA 31:0
/* dma no-operation format */
#define NVA16F_DMA_NOP (0x00000000)
/* dma immediate-data format */
#define NVA16F_DMA_IMMD_ADDRESS 11:0
#define NVA16F_DMA_IMMD_SUBCHANNEL 15:13
#define NVA16F_DMA_IMMD_DATA 28:16
#define NVA16F_DMA_IMMD_OPCODE 31:29
#define NVA16F_DMA_IMMD_OPCODE_VALUE (0x00000004)
/* dma set sub-device mask format */
#define NVA16F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
#define NVA16F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
#define NVA16F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
/* dma store sub-device mask format */
#define NVA16F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
#define NVA16F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
#define NVA16F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
/* dma use sub-device mask format */
#define NVA16F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
#define NVA16F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
/* dma end-segment format */
#define NVA16F_DMA_ENDSEG_OPCODE 31:29
#define NVA16F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
/* dma legacy incrementing/non-incrementing formats */
#define NVA16F_DMA_ADDRESS 12:2
#define NVA16F_DMA_SUBCH 15:13
#define NVA16F_DMA_OPCODE3 17:16
#define NVA16F_DMA_OPCODE3_NONE (0x00000000)
#define NVA16F_DMA_COUNT 28:18
#define NVA16F_DMA_OPCODE 31:29
#define NVA16F_DMA_OPCODE_METHOD (0x00000000)
#define NVA16F_DMA_OPCODE_NONINC_METHOD (0x00000002)
#define NVA16F_DMA_DATA 31:0
#ifdef __cplusplus
}; /* extern "C" */

View File

@@ -1,24 +1,26 @@
/*******************************************************************************
Copyright (c) 2014 NVidia Corporation
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _clb069_h_
#define _clb069_h_

View File

@@ -1,28 +1,28 @@
/*******************************************************************************
Copyright (c) 2014 NVIDIA Corporation
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _clB06f_h_
#define _clB06f_h_
#ifndef _clb06f_h_
#define _clb06f_h_
#ifdef __cplusplus
extern "C" {
@@ -30,10 +30,46 @@ extern "C" {
#include "nvtypes.h"
/* class MAXWELL_CHANNEL_GPFIFO */
/*
* Documentation for MAXWELL_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
* chapter "User Control Registers". It is documented as device NV_UDMA.
* The GPFIFO format itself is also documented in dev_pbdma.ref,
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
*
*/
#define MAXWELL_CHANNEL_GPFIFO_A (0x0000B06F)
/* class MAXWELL_CHANNEL_GPFIFO */
#define NVB06F_TYPEDEF MAXWELL_CHANNELChannelGPFifoA
/* dma flow control data structure */
typedef volatile struct _clb06f_tag0 {
NvU32 Ignored00[0x010]; /* 0000-003f*/
NvU32 Put; /* put offset, read/write 0040-0043*/
NvU32 Get; /* get offset, read only 0044-0047*/
NvU32 Reference; /* reference value, read only 0048-004b*/
NvU32 PutHi; /* high order put offset bits 004c-004f*/
NvU32 Ignored01[0x002]; /* 0050-0057*/
NvU32 TopLevelGet; /* top level get offset, read only 0058-005b*/
NvU32 TopLevelGetHi; /* high order top level get bits 005c-005f*/
NvU32 GetHi; /* high order get offset bits 0060-0063*/
NvU32 Ignored02[0x007]; /* 0064-007f*/
NvU32 Ignored03; /* used to be engine yield 0080-0083*/
NvU32 Ignored04[0x001]; /* 0084-0087*/
NvU32 GPGet; /* GP FIFO get offset, read only 0088-008b*/
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
NvU32 Ignored05[0x5c];
} Nvb06FControl, MaxwellAControlGPFifo;
/* fields and values */
#define NVB06F_NUMBER_OF_SUBCHANNELS (8)
#define NVB06F_SET_OBJECT (0x00000000)
#define NVB06F_SET_OBJECT_NVCLASS 15:0
#define NVB06F_SET_OBJECT_ENGINE 20:16
#define NVB06F_SET_OBJECT_ENGINE_SW 0x0000001f
#define NVB06F_ILLEGAL (0x00000004)
#define NVB06F_ILLEGAL_HANDLE 31:0
#define NVB06F_NOP (0x00000008)
#define NVB06F_NOP_HANDLE 31:0
#define NVB06F_SEMAPHOREA (0x00000010)
@@ -47,6 +83,8 @@ extern "C" {
#define NVB06F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
#define NVB06F_SEMAPHORED_OPERATION_RELEASE 0x00000002
#define NVB06F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
#define NVB06F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
#define NVB06F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
#define NVB06F_SEMAPHORED_ACQUIRE_SWITCH 12:12
#define NVB06F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
#define NVB06F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
@@ -56,8 +94,22 @@ extern "C" {
#define NVB06F_SEMAPHORED_RELEASE_SIZE 24:24
#define NVB06F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
#define NVB06F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
#define NVB06F_SEMAPHORED_REDUCTION 30:27
#define NVB06F_SEMAPHORED_REDUCTION_MIN 0x00000000
#define NVB06F_SEMAPHORED_REDUCTION_MAX 0x00000001
#define NVB06F_SEMAPHORED_REDUCTION_XOR 0x00000002
#define NVB06F_SEMAPHORED_REDUCTION_AND 0x00000003
#define NVB06F_SEMAPHORED_REDUCTION_OR 0x00000004
#define NVB06F_SEMAPHORED_REDUCTION_ADD 0x00000005
#define NVB06F_SEMAPHORED_REDUCTION_INC 0x00000006
#define NVB06F_SEMAPHORED_REDUCTION_DEC 0x00000007
#define NVB06F_SEMAPHORED_FORMAT 31:31
#define NVB06F_SEMAPHORED_FORMAT_SIGNED 0x00000000
#define NVB06F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
#define NVB06F_NON_STALL_INTERRUPT (0x00000020)
#define NVB06F_NON_STALL_INTERRUPT_HANDLE 31:0
#define NVB06F_FB_FLUSH (0x00000024)
#define NVB06F_FB_FLUSH_HANDLE 31:0
// NOTE - MEM_OP_A and MEM_OP_B have been removed for gm20x to make room for
// possible future MEM_OP features. MEM_OP_C/D have identical functionality
// to the previous MEM_OP_A/B methods.
@@ -84,10 +136,27 @@ extern "C" {
#define NVB06F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
#define NVB06F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
#define NVB06F_MEM_OP_D_TLB_INVALIDATE_ADDR_HI 7:0
#define NVB06F_SET_REFERENCE (0x00000050)
#define NVB06F_SET_REFERENCE_COUNT 31:0
#define NVB06F_WFI (0x00000078)
#define NVB06F_WFI_SCOPE 0:0
#define NVB06F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
#define NVB06F_WFI_SCOPE_ALL 0x00000001
#define NVB06F_CRC_CHECK (0x0000007c)
#define NVB06F_CRC_CHECK_VALUE 31:0
#define NVB06F_YIELD (0x00000080)
#define NVB06F_YIELD_OP 1:0
#define NVB06F_YIELD_OP_NOP 0x00000000
#define NVB06F_YIELD_OP_PBDMA_TIMESLICE 0x00000001
#define NVB06F_YIELD_OP_RUNLIST_TIMESLICE 0x00000002
#define NVB06F_YIELD_OP_TSG 0x00000003
/* GPFIFO entry format */
#define NVB06F_GP_ENTRY__SIZE 8
#define NVB06F_GP_ENTRY0_FETCH 0:0
#define NVB06F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
#define NVB06F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
#define NVB06F_GP_ENTRY0_GET 31:2
#define NVB06F_GP_ENTRY0_OPERAND 31:0
#define NVB06F_GP_ENTRY1_GET_HI 7:0
@@ -98,11 +167,38 @@ extern "C" {
#define NVB06F_GP_ENTRY1_LEVEL_MAIN 0x00000000
#define NVB06F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
#define NVB06F_GP_ENTRY1_LENGTH 30:10
#define NVB06F_GP_ENTRY1_SYNC 31:31
#define NVB06F_GP_ENTRY1_SYNC_PROCEED 0x00000000
#define NVB06F_GP_ENTRY1_SYNC_WAIT 0x00000001
#define NVB06F_GP_ENTRY1_OPCODE 7:0
#define NVB06F_GP_ENTRY1_OPCODE_NOP 0x00000000
#define NVB06F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
#define NVB06F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
#define NVB06F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
/* dma method formats */
#define NVB06F_DMA_METHOD_ADDRESS_OLD 12:2
#define NVB06F_DMA_METHOD_ADDRESS 11:0
#define NVB06F_DMA_SUBDEVICE_MASK 15:4
#define NVB06F_DMA_METHOD_SUBCHANNEL 15:13
#define NVB06F_DMA_TERT_OP 17:16
#define NVB06F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
#define NVB06F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
#define NVB06F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
#define NVB06F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
#define NVB06F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
#define NVB06F_DMA_METHOD_COUNT_OLD 28:18
#define NVB06F_DMA_METHOD_COUNT 28:16
#define NVB06F_DMA_IMMD_DATA 28:16
#define NVB06F_DMA_SEC_OP 31:29
#define NVB06F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
#define NVB06F_DMA_SEC_OP_INC_METHOD (0x00000001)
#define NVB06F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
#define NVB06F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
#define NVB06F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
#define NVB06F_DMA_SEC_OP_ONE_INC (0x00000005)
#define NVB06F_DMA_SEC_OP_RESERVED6 (0x00000006)
#define NVB06F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
/* dma incrementing method format */
#define NVB06F_DMA_INCR_ADDRESS 11:0
#define NVB06F_DMA_INCR_SUBCHANNEL 15:13
@@ -132,9 +228,33 @@ extern "C" {
#define NVB06F_DMA_IMMD_DATA 28:16
#define NVB06F_DMA_IMMD_OPCODE 31:29
#define NVB06F_DMA_IMMD_OPCODE_VALUE (0x00000004)
/* dma set sub-device mask format */
#define NVB06F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
#define NVB06F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
#define NVB06F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
/* dma store sub-device mask format */
#define NVB06F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
#define NVB06F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
#define NVB06F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
/* dma use sub-device mask format */
#define NVB06F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
#define NVB06F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
/* dma end-segment format */
#define NVB06F_DMA_ENDSEG_OPCODE 31:29
#define NVB06F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
/* dma legacy incrementing/non-incrementing formats */
#define NVB06F_DMA_ADDRESS 12:2
#define NVB06F_DMA_SUBCH 15:13
#define NVB06F_DMA_OPCODE3 17:16
#define NVB06F_DMA_OPCODE3_NONE (0x00000000)
#define NVB06F_DMA_COUNT 28:18
#define NVB06F_DMA_OPCODE 31:29
#define NVB06F_DMA_OPCODE_METHOD (0x00000000)
#define NVB06F_DMA_OPCODE_NONINC_METHOD (0x00000002)
#define NVB06F_DMA_DATA 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* _clB06F_h_ */
#endif /* _clb06f_h_ */

View File

@@ -1,19 +1,19 @@
/*******************************************************************************
Copyright (c) 2014 NVIDIA Corporation
Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
@@ -32,6 +32,10 @@ extern "C" {
#define MAXWELL_DMA_COPY_A (0x0000B0B5)
#define NVB0B5_NOP (0x00000100)
#define NVB0B5_NOP_PARAMETER 31:0
#define NVB0B5_PM_TRIGGER (0x00000140)
#define NVB0B5_PM_TRIGGER_V 31:0
#define NVB0B5_SET_SEMAPHORE_A (0x00000240)
#define NVB0B5_SET_SEMAPHORE_A_UPPER 7:0
#define NVB0B5_SET_SEMAPHORE_B (0x00000244)
@@ -183,9 +187,75 @@ extern "C" {
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
#define NVB0B5_SET_DST_BLOCK_SIZE (0x0000070C)
#define NVB0B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
#define NVB0B5_SET_DST_BLOCK_SIZE_WIDTH_QUARTER_GOB (0x0000000E)
#define NVB0B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVB0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVB0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_TESLA_4 (0x00000000)
#define NVB0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVB0B5_SET_DST_WIDTH (0x00000710)
#define NVB0B5_SET_DST_WIDTH_V 31:0
#define NVB0B5_SET_DST_HEIGHT (0x00000714)
#define NVB0B5_SET_DST_HEIGHT_V 31:0
#define NVB0B5_SET_DST_DEPTH (0x00000718)
#define NVB0B5_SET_DST_DEPTH_V 31:0
#define NVB0B5_SET_DST_LAYER (0x0000071C)
#define NVB0B5_SET_DST_LAYER_V 31:0
#define NVB0B5_SET_DST_ORIGIN (0x00000720)
#define NVB0B5_SET_DST_ORIGIN_X 15:0
#define NVB0B5_SET_DST_ORIGIN_Y 31:16
#define NVB0B5_SET_SRC_BLOCK_SIZE (0x00000728)
#define NVB0B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
#define NVB0B5_SET_SRC_BLOCK_SIZE_WIDTH_QUARTER_GOB (0x0000000E)
#define NVB0B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVB0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVB0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_TESLA_4 (0x00000000)
#define NVB0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVB0B5_SET_SRC_WIDTH (0x0000072C)
#define NVB0B5_SET_SRC_WIDTH_V 31:0
#define NVB0B5_SET_SRC_HEIGHT (0x00000730)
#define NVB0B5_SET_SRC_HEIGHT_V 31:0
#define NVB0B5_SET_SRC_DEPTH (0x00000734)
#define NVB0B5_SET_SRC_DEPTH_V 31:0
#define NVB0B5_SET_SRC_LAYER (0x00000738)
#define NVB0B5_SET_SRC_LAYER_V 31:0
#define NVB0B5_SET_SRC_ORIGIN (0x0000073C)
#define NVB0B5_SET_SRC_ORIGIN_X 15:0
#define NVB0B5_SET_SRC_ORIGIN_Y 31:16
#define NVB0B5_PM_TRIGGER_END (0x00001114)
#define NVB0B5_PM_TRIGGER_END_V 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif // _clb0b5_h

View File

@@ -1,25 +1,25 @@
/*******************************************************************************
Copyright (c) 2014 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _clc06f_h_
#define _clc06f_h_
@@ -30,10 +30,47 @@ extern "C" {
#include "nvtypes.h"
/* class PASCAL_CHANNEL_GPFIFO */
/*
* Documentation for PASCAL_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
* chapter "User Control Registers". It is documented as device NV_UDMA.
* The GPFIFO format itself is also documented in dev_pbdma.ref,
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
*
* Note there is no .mfs file for this class.
*/
#define PASCAL_CHANNEL_GPFIFO_A (0x0000C06F)
/* class PASCAL_CHANNEL_GPFIFO_A */
#define NVC06F_TYPEDEF PASCAL_CHANNELChannelGPFifoA
/* dma flow control data structure */
typedef volatile struct Nvc06fControl_struct {
NvU32 Ignored00[0x010]; /* 0000-003f*/
NvU32 Put; /* put offset, read/write 0040-0043*/
NvU32 Get; /* get offset, read only 0044-0047*/
NvU32 Reference; /* reference value, read only 0048-004b*/
NvU32 PutHi; /* high order put offset bits 004c-004f*/
NvU32 Ignored01[0x002]; /* 0050-0057*/
NvU32 TopLevelGet; /* top level get offset, read only 0058-005b*/
NvU32 TopLevelGetHi; /* high order top level get bits 005c-005f*/
NvU32 GetHi; /* high order get offset bits 0060-0063*/
NvU32 Ignored02[0x007]; /* 0064-007f*/
NvU32 Ignored03; /* used to be engine yield 0080-0083*/
NvU32 Ignored04[0x001]; /* 0084-0087*/
NvU32 GPGet; /* GP FIFO get offset, read only 0088-008b*/
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
NvU32 Ignored05[0x5c];
} Nvc06fControl, PascalAControlGPFifo;
/* fields and values */
#define NVC06F_NUMBER_OF_SUBCHANNELS (8)
#define NVC06F_SET_OBJECT (0x00000000)
#define NVC06F_SET_OBJECT_NVCLASS 15:0
#define NVC06F_SET_OBJECT_ENGINE 20:16
#define NVC06F_SET_OBJECT_ENGINE_SW 0x0000001f
#define NVC06F_ILLEGAL (0x00000004)
#define NVC06F_ILLEGAL_HANDLE 31:0
#define NVC06F_NOP (0x00000008)
#define NVC06F_NOP_HANDLE 31:0
#define NVC06F_SEMAPHOREA (0x00000010)
@@ -47,54 +84,33 @@ extern "C" {
#define NVC06F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
#define NVC06F_SEMAPHORED_OPERATION_RELEASE 0x00000002
#define NVC06F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
#define NVC06F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
#define NVC06F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
#define NVC06F_SEMAPHORED_ACQUIRE_SWITCH 12:12
#define NVC06F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
#define NVC06F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
/* GPFIFO entry format */
#define NVC06F_GP_ENTRY__SIZE 8
#define NVC06F_GP_ENTRY0_GET 31:2
#define NVC06F_GP_ENTRY0_OPERAND 31:0
#define NVC06F_GP_ENTRY1_GET_HI 7:0
#define NVC06F_GP_ENTRY1_PRIV 8:8
#define NVC06F_GP_ENTRY1_PRIV_USER 0x00000000
#define NVC06F_GP_ENTRY1_PRIV_KERNEL 0x00000001
#define NVC06F_GP_ENTRY1_LEVEL 9:9
#define NVC06F_GP_ENTRY1_LEVEL_MAIN 0x00000000
#define NVC06F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
#define NVC06F_GP_ENTRY1_LENGTH 30:10
/* dma incrementing method format */
#define NVC06F_DMA_INCR_ADDRESS 11:0
#define NVC06F_DMA_INCR_SUBCHANNEL 15:13
#define NVC06F_DMA_INCR_COUNT 28:16
#define NVC06F_DMA_INCR_OPCODE 31:29
#define NVC06F_DMA_INCR_OPCODE_VALUE (0x00000001)
#define NVC06F_DMA_INCR_DATA 31:0
/* dma non-incrementing method format */
#define NVC06F_DMA_NONINCR_ADDRESS 11:0
#define NVC06F_DMA_NONINCR_SUBCHANNEL 15:13
#define NVC06F_DMA_NONINCR_COUNT 28:16
#define NVC06F_DMA_NONINCR_OPCODE 31:29
#define NVC06F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
#define NVC06F_DMA_NONINCR_DATA 31:0
/* dma increment-once method format */
#define NVC06F_DMA_ONEINCR_ADDRESS 11:0
#define NVC06F_DMA_ONEINCR_SUBCHANNEL 15:13
#define NVC06F_DMA_ONEINCR_COUNT 28:16
#define NVC06F_DMA_ONEINCR_OPCODE 31:29
#define NVC06F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
#define NVC06F_DMA_ONEINCR_DATA 31:0
/* dma no-operation format */
#define NVC06F_DMA_NOP (0x00000000)
/* dma immediate-data format */
#define NVC06F_DMA_IMMD_ADDRESS 11:0
#define NVC06F_DMA_IMMD_SUBCHANNEL 15:13
#define NVC06F_DMA_IMMD_DATA 28:16
#define NVC06F_DMA_IMMD_OPCODE 31:29
#define NVC06F_DMA_IMMD_OPCODE_VALUE (0x00000004)
#define NVC06F_SEMAPHORED_RELEASE_WFI 20:20
#define NVC06F_SEMAPHORED_RELEASE_WFI_EN 0x00000000
#define NVC06F_SEMAPHORED_RELEASE_WFI_DIS 0x00000001
#define NVC06F_SEMAPHORED_RELEASE_SIZE 24:24
#define NVC06F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
#define NVC06F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
#define NVC06F_SEMAPHORED_REDUCTION 30:27
#define NVC06F_SEMAPHORED_REDUCTION_MIN 0x00000000
#define NVC06F_SEMAPHORED_REDUCTION_MAX 0x00000001
#define NVC06F_SEMAPHORED_REDUCTION_XOR 0x00000002
#define NVC06F_SEMAPHORED_REDUCTION_AND 0x00000003
#define NVC06F_SEMAPHORED_REDUCTION_OR 0x00000004
#define NVC06F_SEMAPHORED_REDUCTION_ADD 0x00000005
#define NVC06F_SEMAPHORED_REDUCTION_INC 0x00000006
#define NVC06F_SEMAPHORED_REDUCTION_DEC 0x00000007
#define NVC06F_SEMAPHORED_FORMAT 31:31
#define NVC06F_SEMAPHORED_FORMAT_SIGNED 0x00000000
#define NVC06F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
#define NVC06F_NON_STALL_INTERRUPT (0x00000020)
#define NVC06F_NON_STALL_INTERRUPT_HANDLE 31:0
#define NVC06F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
#define NVC06F_FB_FLUSH_HANDLE 31:0
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
// specifying the page address for a targeted TLB invalidate and the uTLB for
// a targeted REPLAY_CANCEL for UVM.
@@ -153,19 +169,142 @@ extern "C" {
#define NVC06F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
#define NVC06F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
// CLEAN_LINES is an alias for Tegra/GPU IP usage
#define NVC06F_MEM_OP_D_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
// This B alias is confusing but it was missed as part of the update. Left here
// for compatibility.
#define NVC06F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
#define NVC06F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
#define NVC06F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
#define NVC06F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015
#define NVC06F_SET_REFERENCE (0x00000050)
#define NVC06F_SET_REFERENCE_COUNT 31:0
// Syncpoint methods are only available on Tegra parts. Attempting to use
// them on discrete GPUs will result in Host raising NV_PPBDMA_INTR_0_METHOD.
#define NVC06F_SYNCPOINTA (0x00000070)
#define NVC06F_SYNCPOINTA_PAYLOAD 31:0
#define NVC06F_SYNCPOINTB (0x00000074)
#define NVC06F_SYNCPOINTB_OPERATION 0:0
#define NVC06F_SYNCPOINTB_OPERATION_WAIT 0x00000000
#define NVC06F_SYNCPOINTB_OPERATION_INCR 0x00000001
#define NVC06F_SYNCPOINTB_WAIT_SWITCH 4:4
#define NVC06F_SYNCPOINTB_WAIT_SWITCH_DIS 0x00000000
#define NVC06F_SYNCPOINTB_WAIT_SWITCH_EN 0x00000001
#define NVC06F_SYNCPOINTB_SYNCPT_INDEX 19:8
#define NVC06F_WFI (0x00000078)
#define NVC06F_WFI_SCOPE 0:0
#define NVC06F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
#define NVC06F_WFI_SCOPE_ALL 0x00000001
#define NVC06F_CRC_CHECK (0x0000007c)
#define NVC06F_CRC_CHECK_VALUE 31:0
#define NVC06F_YIELD (0x00000080)
#define NVC06F_YIELD_OP 1:0
#define NVC06F_YIELD_OP_NOP 0x00000000
#define NVC06F_YIELD_OP_PBDMA_TIMESLICE 0x00000001
#define NVC06F_YIELD_OP_RUNLIST_TIMESLICE 0x00000002
#define NVC06F_YIELD_OP_TSG 0x00000003
/* GPFIFO entry format */
#define NVC06F_GP_ENTRY__SIZE 8
#define NVC06F_GP_ENTRY0_FETCH 0:0
#define NVC06F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
#define NVC06F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
#define NVC06F_GP_ENTRY0_GET 31:2
#define NVC06F_GP_ENTRY0_OPERAND 31:0
#define NVC06F_GP_ENTRY1_GET_HI 7:0
#define NVC06F_GP_ENTRY1_PRIV 8:8
#define NVC06F_GP_ENTRY1_PRIV_USER 0x00000000
#define NVC06F_GP_ENTRY1_PRIV_KERNEL 0x00000001
#define NVC06F_GP_ENTRY1_LEVEL 9:9
#define NVC06F_GP_ENTRY1_LEVEL_MAIN 0x00000000
#define NVC06F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
#define NVC06F_GP_ENTRY1_LENGTH 30:10
#define NVC06F_GP_ENTRY1_SYNC 31:31
#define NVC06F_GP_ENTRY1_SYNC_PROCEED 0x00000000
#define NVC06F_GP_ENTRY1_SYNC_WAIT 0x00000001
#define NVC06F_GP_ENTRY1_OPCODE 7:0
#define NVC06F_GP_ENTRY1_OPCODE_NOP 0x00000000
#define NVC06F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
#define NVC06F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
#define NVC06F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
/* dma method formats */
#define NVC06F_DMA_METHOD_ADDRESS_OLD 12:2
#define NVC06F_DMA_METHOD_ADDRESS 11:0
#define NVC06F_DMA_SUBDEVICE_MASK 15:4
#define NVC06F_DMA_METHOD_SUBCHANNEL 15:13
#define NVC06F_DMA_TERT_OP 17:16
#define NVC06F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
#define NVC06F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
#define NVC06F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
#define NVC06F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
#define NVC06F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
#define NVC06F_DMA_METHOD_COUNT_OLD 28:18
#define NVC06F_DMA_METHOD_COUNT 28:16
#define NVC06F_DMA_IMMD_DATA 28:16
#define NVC06F_DMA_SEC_OP 31:29
#define NVC06F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
#define NVC06F_DMA_SEC_OP_INC_METHOD (0x00000001)
#define NVC06F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
#define NVC06F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
#define NVC06F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
#define NVC06F_DMA_SEC_OP_ONE_INC (0x00000005)
#define NVC06F_DMA_SEC_OP_RESERVED6 (0x00000006)
#define NVC06F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
/* dma incrementing method format */
#define NVC06F_DMA_INCR_ADDRESS 11:0
#define NVC06F_DMA_INCR_SUBCHANNEL 15:13
#define NVC06F_DMA_INCR_COUNT 28:16
#define NVC06F_DMA_INCR_OPCODE 31:29
#define NVC06F_DMA_INCR_OPCODE_VALUE (0x00000001)
#define NVC06F_DMA_INCR_DATA 31:0
/* dma non-incrementing method format */
#define NVC06F_DMA_NONINCR_ADDRESS 11:0
#define NVC06F_DMA_NONINCR_SUBCHANNEL 15:13
#define NVC06F_DMA_NONINCR_COUNT 28:16
#define NVC06F_DMA_NONINCR_OPCODE 31:29
#define NVC06F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
#define NVC06F_DMA_NONINCR_DATA 31:0
/* dma increment-once method format */
#define NVC06F_DMA_ONEINCR_ADDRESS 11:0
#define NVC06F_DMA_ONEINCR_SUBCHANNEL 15:13
#define NVC06F_DMA_ONEINCR_COUNT 28:16
#define NVC06F_DMA_ONEINCR_OPCODE 31:29
#define NVC06F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
#define NVC06F_DMA_ONEINCR_DATA 31:0
/* dma no-operation format */
#define NVC06F_DMA_NOP (0x00000000)
/* dma immediate-data format */
#define NVC06F_DMA_IMMD_ADDRESS 11:0
#define NVC06F_DMA_IMMD_SUBCHANNEL 15:13
#define NVC06F_DMA_IMMD_DATA 28:16
#define NVC06F_DMA_IMMD_OPCODE 31:29
#define NVC06F_DMA_IMMD_OPCODE_VALUE (0x00000004)
/* dma set sub-device mask format */
#define NVC06F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
#define NVC06F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
#define NVC06F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
/* dma store sub-device mask format */
#define NVC06F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
#define NVC06F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
#define NVC06F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
/* dma use sub-device mask format */
#define NVC06F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
#define NVC06F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
/* dma end-segment format */
#define NVC06F_DMA_ENDSEG_OPCODE 31:29
#define NVC06F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
/* dma legacy incrementing/non-incrementing formats */
#define NVC06F_DMA_ADDRESS 12:2
#define NVC06F_DMA_SUBCH 15:13
#define NVC06F_DMA_OPCODE3 17:16
#define NVC06F_DMA_OPCODE3_NONE (0x00000000)
#define NVC06F_DMA_COUNT 28:18
#define NVC06F_DMA_OPCODE 31:29
#define NVC06F_DMA_OPCODE_METHOD (0x00000000)
#define NVC06F_DMA_OPCODE_NONINC_METHOD (0x00000002)
#define NVC06F_DMA_DATA 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif

View File

@@ -1,19 +1,19 @@
/*******************************************************************************
Copyright (c) 2014 NVIDIA Corporation
Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
@@ -32,6 +32,10 @@ extern "C" {
#define PASCAL_DMA_COPY_A (0x0000C0B5)
#define NVC0B5_NOP (0x00000100)
#define NVC0B5_NOP_PARAMETER 31:0
#define NVC0B5_PM_TRIGGER (0x00000140)
#define NVC0B5_PM_TRIGGER_V 31:0
#define NVC0B5_SET_SEMAPHORE_A (0x00000240)
#define NVC0B5_SET_SEMAPHORE_A_UPPER 16:0
#define NVC0B5_SET_SEMAPHORE_B (0x00000244)
@@ -115,6 +119,10 @@ extern "C" {
#define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2 20:20
#define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING (0x00000000)
#define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE (0x00000001)
#define NVC0B5_LAUNCH_DMA_DST_BYPASS_L2 21:21
#define NVC0B5_LAUNCH_DMA_DST_BYPASS_L2_USE_PTE_SETTING (0x00000000)
#define NVC0B5_LAUNCH_DMA_DST_BYPASS_L2_FORCE_VOLATILE (0x00000001)
#define NVC0B5_LAUNCH_DMA_RESERVED 31:28
#define NVC0B5_OFFSET_IN_UPPER (0x00000400)
#define NVC0B5_OFFSET_IN_UPPER_UPPER 16:0
#define NVC0B5_OFFSET_IN_LOWER (0x00000404)
@@ -183,6 +191,68 @@ extern "C" {
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
#define NVC0B5_SET_DST_BLOCK_SIZE (0x0000070C)
#define NVC0B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
#define NVC0B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC0B5_SET_DST_WIDTH (0x00000710)
#define NVC0B5_SET_DST_WIDTH_V 31:0
#define NVC0B5_SET_DST_HEIGHT (0x00000714)
#define NVC0B5_SET_DST_HEIGHT_V 31:0
#define NVC0B5_SET_DST_DEPTH (0x00000718)
#define NVC0B5_SET_DST_DEPTH_V 31:0
#define NVC0B5_SET_DST_LAYER (0x0000071C)
#define NVC0B5_SET_DST_LAYER_V 31:0
#define NVC0B5_SET_DST_ORIGIN (0x00000720)
#define NVC0B5_SET_DST_ORIGIN_X 15:0
#define NVC0B5_SET_DST_ORIGIN_Y 31:16
#define NVC0B5_SET_SRC_BLOCK_SIZE (0x00000728)
#define NVC0B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
#define NVC0B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC0B5_SET_SRC_WIDTH (0x0000072C)
#define NVC0B5_SET_SRC_WIDTH_V 31:0
#define NVC0B5_SET_SRC_HEIGHT (0x00000730)
#define NVC0B5_SET_SRC_HEIGHT_V 31:0
#define NVC0B5_SET_SRC_DEPTH (0x00000734)
#define NVC0B5_SET_SRC_DEPTH_V 31:0
#define NVC0B5_SET_SRC_LAYER (0x00000738)
#define NVC0B5_SET_SRC_LAYER_V 31:0
#define NVC0B5_SET_SRC_ORIGIN (0x0000073C)
#define NVC0B5_SET_SRC_ORIGIN_X 15:0
#define NVC0B5_SET_SRC_ORIGIN_Y 31:16
#define NVC0B5_PM_TRIGGER_END (0x00001114)
#define NVC0B5_PM_TRIGGER_END_V 31:0
#ifdef __cplusplus
}; /* extern "C" */

View File

@@ -1,19 +1,19 @@
/*******************************************************************************
Copyright (c) 2014 NVIDIA Corporation
Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
@@ -32,6 +32,10 @@ extern "C" {
#define PASCAL_DMA_COPY_B (0x0000C1B5)
#define NVC1B5_NOP (0x00000100)
#define NVC1B5_NOP_PARAMETER 31:0
#define NVC1B5_PM_TRIGGER (0x00000140)
#define NVC1B5_PM_TRIGGER_V 31:0
#define NVC1B5_SET_SEMAPHORE_A (0x00000240)
#define NVC1B5_SET_SEMAPHORE_A_UPPER 16:0
#define NVC1B5_SET_SEMAPHORE_B (0x00000244)
@@ -115,6 +119,14 @@ extern "C" {
#define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2 20:20
#define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING (0x00000000)
#define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE (0x00000001)
#define NVC1B5_LAUNCH_DMA_DST_BYPASS_L2 21:21
#define NVC1B5_LAUNCH_DMA_DST_BYPASS_L2_USE_PTE_SETTING (0x00000000)
#define NVC1B5_LAUNCH_DMA_DST_BYPASS_L2_FORCE_VOLATILE (0x00000001)
#define NVC1B5_LAUNCH_DMA_VPRMODE 23:22
#define NVC1B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
#define NVC1B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
#define NVC1B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
#define NVC1B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
#define NVC1B5_OFFSET_IN_UPPER (0x00000400)
#define NVC1B5_OFFSET_IN_UPPER_UPPER 16:0
#define NVC1B5_OFFSET_IN_LOWER (0x00000404)
@@ -183,6 +195,76 @@ extern "C" {
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
#define NVC1B5_SET_DST_BLOCK_SIZE (0x0000070C)
#define NVC1B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
#define NVC1B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC1B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC1B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC1B5_SET_DST_WIDTH (0x00000710)
#define NVC1B5_SET_DST_WIDTH_V 31:0
#define NVC1B5_SET_DST_HEIGHT (0x00000714)
#define NVC1B5_SET_DST_HEIGHT_V 31:0
#define NVC1B5_SET_DST_DEPTH (0x00000718)
#define NVC1B5_SET_DST_DEPTH_V 31:0
#define NVC1B5_SET_DST_LAYER (0x0000071C)
#define NVC1B5_SET_DST_LAYER_V 31:0
#define NVC1B5_SET_DST_ORIGIN (0x00000720)
#define NVC1B5_SET_DST_ORIGIN_X 15:0
#define NVC1B5_SET_DST_ORIGIN_Y 31:16
#define NVC1B5_SET_SRC_BLOCK_SIZE (0x00000728)
#define NVC1B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
#define NVC1B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC1B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC1B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC1B5_SET_SRC_WIDTH (0x0000072C)
#define NVC1B5_SET_SRC_WIDTH_V 31:0
#define NVC1B5_SET_SRC_HEIGHT (0x00000730)
#define NVC1B5_SET_SRC_HEIGHT_V 31:0
#define NVC1B5_SET_SRC_DEPTH (0x00000734)
#define NVC1B5_SET_SRC_DEPTH_V 31:0
#define NVC1B5_SET_SRC_LAYER (0x00000738)
#define NVC1B5_SET_SRC_LAYER_V 31:0
#define NVC1B5_SET_SRC_ORIGIN (0x0000073C)
#define NVC1B5_SET_SRC_ORIGIN_X 15:0
#define NVC1B5_SET_SRC_ORIGIN_Y 31:16
#define NVC1B5_SRC_ORIGIN_X (0x00000744)
#define NVC1B5_SRC_ORIGIN_X_VALUE 31:0
#define NVC1B5_SRC_ORIGIN_Y (0x00000748)
#define NVC1B5_SRC_ORIGIN_Y_VALUE 31:0
#define NVC1B5_DST_ORIGIN_X (0x0000074C)
#define NVC1B5_DST_ORIGIN_X_VALUE 31:0
#define NVC1B5_DST_ORIGIN_Y (0x00000750)
#define NVC1B5_DST_ORIGIN_Y_VALUE 31:0
#define NVC1B5_PM_TRIGGER_END (0x00001114)
#define NVC1B5_PM_TRIGGER_END_V 31:0
#ifdef __cplusplus
}; /* extern "C" */

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Copyright (c) 2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Copyright (c) 2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to

View File

@@ -1,19 +1,19 @@
/*******************************************************************************
Copyright (c) 2016 NVIDIA Corporation
Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
@@ -32,6 +32,10 @@ extern "C" {
#define VOLTA_DMA_COPY_A (0x0000C3B5)
#define NVC3B5_NOP (0x00000100)
#define NVC3B5_NOP_PARAMETER 31:0
#define NVC3B5_PM_TRIGGER (0x00000140)
#define NVC3B5_PM_TRIGGER_V 31:0
#define NVC3B5_SET_SEMAPHORE_A (0x00000240)
#define NVC3B5_SET_SEMAPHORE_A_UPPER 16:0
#define NVC3B5_SET_SEMAPHORE_B (0x00000244)
@@ -69,6 +73,9 @@ extern "C" {
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE 25:25
#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
@@ -123,8 +130,6 @@ extern "C" {
#define NVC3B5_LAUNCH_DMA_VPRMODE 23:22
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
#define NVC3B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
#define NVC3B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
#define NVC3B5_OFFSET_IN_UPPER (0x00000400)
@@ -195,6 +200,76 @@ extern "C" {
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
#define NVC3B5_SET_DST_BLOCK_SIZE (0x0000070C)
#define NVC3B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
#define NVC3B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC3B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC3B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC3B5_SET_DST_WIDTH (0x00000710)
#define NVC3B5_SET_DST_WIDTH_V 31:0
#define NVC3B5_SET_DST_HEIGHT (0x00000714)
#define NVC3B5_SET_DST_HEIGHT_V 31:0
#define NVC3B5_SET_DST_DEPTH (0x00000718)
#define NVC3B5_SET_DST_DEPTH_V 31:0
#define NVC3B5_SET_DST_LAYER (0x0000071C)
#define NVC3B5_SET_DST_LAYER_V 31:0
#define NVC3B5_SET_DST_ORIGIN (0x00000720)
#define NVC3B5_SET_DST_ORIGIN_X 15:0
#define NVC3B5_SET_DST_ORIGIN_Y 31:16
#define NVC3B5_SET_SRC_BLOCK_SIZE (0x00000728)
#define NVC3B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
#define NVC3B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC3B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC3B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC3B5_SET_SRC_WIDTH (0x0000072C)
#define NVC3B5_SET_SRC_WIDTH_V 31:0
#define NVC3B5_SET_SRC_HEIGHT (0x00000730)
#define NVC3B5_SET_SRC_HEIGHT_V 31:0
#define NVC3B5_SET_SRC_DEPTH (0x00000734)
#define NVC3B5_SET_SRC_DEPTH_V 31:0
#define NVC3B5_SET_SRC_LAYER (0x00000738)
#define NVC3B5_SET_SRC_LAYER_V 31:0
#define NVC3B5_SET_SRC_ORIGIN (0x0000073C)
#define NVC3B5_SET_SRC_ORIGIN_X 15:0
#define NVC3B5_SET_SRC_ORIGIN_Y 31:16
#define NVC3B5_SRC_ORIGIN_X (0x00000744)
#define NVC3B5_SRC_ORIGIN_X_VALUE 31:0
#define NVC3B5_SRC_ORIGIN_Y (0x00000748)
#define NVC3B5_SRC_ORIGIN_Y_VALUE 31:0
#define NVC3B5_DST_ORIGIN_X (0x0000074C)
#define NVC3B5_DST_ORIGIN_X_VALUE 31:0
#define NVC3B5_DST_ORIGIN_Y (0x00000750)
#define NVC3B5_DST_ORIGIN_Y_VALUE 31:0
#define NVC3B5_PM_TRIGGER_END (0x00001114)
#define NVC3B5_PM_TRIGGER_END_V 31:0
#ifdef __cplusplus
}; /* extern "C" */

View File

@@ -0,0 +1,97 @@
/*******************************************************************************
Copyright (c) 2021-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "nvtypes.h"
#ifndef _clcba2_h_
#define _clcba2_h_
#ifdef __cplusplus
extern "C" {
#endif
#define HOPPER_SEC2_WORK_LAUNCH_A (0x0000CBA2)
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI (0x00000400)
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI_DATA 24:0
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_LO (0x00000404)
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_LO_DATA 31:4
#define NVCBA2_DECRYPT_COPY_DST_ADDR_HI (0x00000408)
#define NVCBA2_DECRYPT_COPY_DST_ADDR_HI_DATA 24:0
#define NVCBA2_DECRYPT_COPY_DST_ADDR_LO (0x0000040c)
#define NVCBA2_DECRYPT_COPY_DST_ADDR_LO_DATA 31:4
#define NVCBA2_DECRYPT_COPY_SIZE (0x00000410)
#define NVCBA2_DECRYPT_COPY_SIZE_DATA 31:2
#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_HI (0x00000414)
#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_HI_DATA 24:0
#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_LO (0x00000418)
#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_LO_DATA 31:4
#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_HI (0x0000041C)
#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_HI_DATA 24:0
#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_LO (0x00000420)
#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_LO_DATA 31:4
#define NVCBA2_SEMAPHORE_A (0x00000440)
#define NVCBA2_SEMAPHORE_A_UPPER 24:0
#define NVCBA2_SEMAPHORE_B (0x00000444)
#define NVCBA2_SEMAPHORE_B_LOWER 31:2
#define NVCBA2_SET_SEMAPHORE_PAYLOAD_LOWER (0x00000448)
#define NVCBA2_SET_SEMAPHORE_PAYLOAD_LOWER_DATA 31:0
#define NVCBA2_SET_SEMAPHORE_PAYLOAD_UPPER (0x0000044C)
#define NVCBA2_SET_SEMAPHORE_PAYLOAD_UPPER_DATA 31:0
#define NVCBA2_SEMAPHORE_D (0x00000450)
#define NVCBA2_SEMAPHORE_D_NOTIFY_INTR 0:0
#define NVCBA2_SEMAPHORE_D_NOTIFY_INTR_DISABLE (0x00000000)
#define NVCBA2_SEMAPHORE_D_NOTIFY_INTR_ENABLE (0x00000001)
#define NVCBA2_SEMAPHORE_D_PAYLOAD_SIZE 1:1
#define NVCBA2_SEMAPHORE_D_PAYLOAD_SIZE_32_BIT (0x00000000)
#define NVCBA2_SEMAPHORE_D_PAYLOAD_SIZE_64_BIT (0x00000001)
#define NVCBA2_SEMAPHORE_D_TIMESTAMP 2:2
#define NVCBA2_SEMAPHORE_D_TIMESTAMP_DISABLE (0x00000000)
#define NVCBA2_SEMAPHORE_D_TIMESTAMP_ENABLE (0x00000001)
#define NVCBA2_SEMAPHORE_D_FLUSH_DISABLE 3:3
#define NVCBA2_SEMAPHORE_D_FLUSH_DISABLE_FALSE (0x00000000)
#define NVCBA2_SEMAPHORE_D_FLUSH_DISABLE_TRUE (0x00000001)
#define NVCBA2_EXECUTE (0x00000470)
#define NVCBA2_EXECUTE_NOTIFY 0:0
#define NVCBA2_EXECUTE_NOTIFY_DISABLE (0x00000000)
#define NVCBA2_EXECUTE_NOTIFY_ENABLE (0x00000001)
#define NVCBA2_EXECUTE_NOTIFY_ON 1:1
#define NVCBA2_EXECUTE_NOTIFY_ON_END (0x00000000)
#define NVCBA2_EXECUTE_NOTIFY_ON_BEGIN (0x00000001)
#define NVCBA2_EXECUTE_FLUSH_DISABLE 2:2
#define NVCBA2_EXECUTE_FLUSH_DISABLE_FALSE (0x00000000)
#define NVCBA2_EXECUTE_FLUSH_DISABLE_TRUE (0x00000001)
#define NVCBA2_EXECUTE_NOTIFY_INTR 3:3
#define NVCBA2_EXECUTE_NOTIFY_INTR_DISABLE (0x00000000)
#define NVCBA2_EXECUTE_NOTIFY_INTR_ENABLE (0x00000001)
#define NVCBA2_EXECUTE_PAYLOAD_SIZE 4:4
#define NVCBA2_EXECUTE_PAYLOAD_SIZE_32_BIT (0x00000000)
#define NVCBA2_EXECUTE_PAYLOAD_SIZE_64_BIT (0x00000001)
#define NVCBA2_EXECUTE_TIMESTAMP 5:5
#define NVCBA2_EXECUTE_TIMESTAMP_DISABLE (0x00000000)
#define NVCBA2_EXECUTE_TIMESTAMP_ENABLE (0x00000001)
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif // _clcba2_h

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2013-2021 NVIDIA Corporation
Copyright (c) 2013-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016 NVIDIA Corporation
Copyright (c) 2016-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -81,7 +81,7 @@
#define NUM_Q_ITEMS_IN_MULTITHREAD_TEST (NUM_TEST_Q_ITEMS * NUM_TEST_KTHREADS)
// This exists in order to have a function to place a breakpoint on:
void on_nvq_assert(void)
static void on_nvq_assert(void)
{
(void)NULL;
}
@@ -481,16 +481,6 @@ static int _check_cpu_affinity_test(void)
int result, node;
nv_kthread_q_t local_q;
// If the API does not support CPU affinity, check whether the correct
// error code is returned.
// Non-affinitized queue allocation has been verified by previous test
// so just ensure that the affinitized version also works.
if (!NV_KTHREAD_Q_SUPPORTS_AFFINITY()) {
result = nv_kthread_q_init_on_node(&local_q, "should_fail", 0);
TEST_CHECK_RET(result == -ENOTSUPP);
return 0;
}
for_each_online_node(node) {
unsigned i;
const unsigned max_i = 100;

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -169,7 +169,6 @@ void nv_kthread_q_stop(nv_kthread_q_t *q)
//
// This function is never invoked when there is no NUMA preference (preferred
// node is NUMA_NO_NODE).
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
nv_kthread_q_t *q,
int preferred_node,
@@ -177,7 +176,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
{
unsigned i, j;
const static unsigned attempts = 3;
static const unsigned attempts = 3;
struct task_struct *thread[3];
for (i = 0;; i++) {
@@ -217,7 +216,6 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
return thread[i];
}
#endif
int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
{
@@ -231,11 +229,7 @@ int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferr
q->q_kthread = kthread_create(_main_loop, q, q_name);
}
else {
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
#else
return -ENOTSUPP;
#endif
}
if (IS_ERR(q->q_kthread)) {
@@ -307,7 +301,7 @@ static void _q_flush_function(void *args)
static void _raw_q_flush(nv_kthread_q_t *q)
{
nv_kthread_q_item_t q_item;
DECLARE_COMPLETION(completion);
DECLARE_COMPLETION_ONSTACK(completion);
nv_kthread_q_item_init(&q_item, _q_flush_function, &completion);

View File

@@ -1,12 +1,11 @@
NVIDIA_UVM_SOURCES ?=
NVIDIA_UVM_SOURCES_CXX ?=
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
@@ -58,6 +57,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_ce.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_ce.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
@@ -72,6 +72,12 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_ce.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_policy.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_utils.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_kvmalloc.c
@@ -94,7 +100,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_rng.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_allocator_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_semaphore_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hmm_sanity_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_mem_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rm_mem_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_page_tree_test.c

View File

@@ -36,7 +36,7 @@ NVIDIA_UVM_KO = nvidia-uvm/nvidia-uvm.ko
#
ifeq ($(UVM_BUILD_TYPE),debug)
NVIDIA_UVM_CFLAGS += -DDEBUG $(call cc-option,-Og,-O0) -g
NVIDIA_UVM_CFLAGS += -DDEBUG -O1 -g
else
ifeq ($(UVM_BUILD_TYPE),develop)
# -DDEBUG is required, in order to allow pr_devel() print statements to
@@ -67,17 +67,11 @@ endif
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
NV_CONFTEST_FUNCTION_COMPILE_TESTS += address_space_init_once
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vzalloc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += wait_on_bit_lock_argument_count
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
NV_CONFTEST_FUNCTION_COMPILE_TESTS += bitmap_clear
NV_CONFTEST_FUNCTION_COMPILE_TESTS += usleep_range
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
@@ -87,26 +81,41 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_drop
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmgrab
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno
NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache
NV_CONFTEST_FUNCTION_COMPILE_TESTS += page_pgmap
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
NV_CONFTEST_TYPE_COMPILE_TESTS += kuid_t
NV_CONFTEST_TYPE_COMPILE_TESTS += address_space
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages_remote
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages_remote
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_invalidate_range
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_arch_invalidate_secondary_tlbs
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += make_device_exclusive_range
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_device_range
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present___iowrite64_lo_hi
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_make_device_exclusive

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation
Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -28,6 +28,7 @@
#include "uvm_lock.h"
#include "uvm_test.h"
#include "uvm_va_space.h"
#include "uvm_va_space_mm.h"
#include "uvm_va_range.h"
#include "uvm_va_block.h"
#include "uvm_tools.h"
@@ -35,93 +36,200 @@
#include "uvm_linux_ioctl.h"
#include "uvm_hmm.h"
#include "uvm_mem.h"
#include "uvm_kvmalloc.h"
#define NVIDIA_UVM_DEVICE_NAME "nvidia-uvm"
static dev_t g_uvm_base_dev;
static struct cdev g_uvm_cdev;
static const struct file_operations uvm_fops;
// List of fault service contexts for CPU faults
static LIST_HEAD(g_cpu_service_block_context_list);
static uvm_spinlock_t g_cpu_service_block_context_list_lock;
NV_STATUS uvm_service_block_context_init(void)
bool uvm_file_is_nvidia_uvm(struct file *filp)
{
unsigned num_preallocated_contexts = 4;
return (filp != NULL) && (filp->f_op == &uvm_fops);
}
uvm_spin_lock_init(&g_cpu_service_block_context_list_lock, UVM_LOCK_ORDER_LEAF);
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
{
unsigned long uptr;
uvm_fd_type_t type;
void *ptr;
// Pre-allocate some fault service contexts for the CPU and add them to the global list
while (num_preallocated_contexts-- > 0) {
uvm_service_block_context_t *service_context = uvm_kvmalloc(sizeof(*service_context));
if (!service_context)
return NV_ERR_NO_MEMORY;
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
uptr = atomic_long_read_acquire((atomic_long_t *) (&filp->private_data));
type = (uvm_fd_type_t)(uptr & UVM_FD_TYPE_MASK);
ptr = (void *)(uptr & ~UVM_FD_TYPE_MASK);
BUILD_BUG_ON(UVM_FD_COUNT > UVM_FD_TYPE_MASK + 1);
switch (type) {
case UVM_FD_UNINITIALIZED:
case UVM_FD_INITIALIZING:
UVM_ASSERT(!ptr);
break;
case UVM_FD_VA_SPACE:
UVM_ASSERT(ptr);
BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
break;
case UVM_FD_MM:
UVM_ASSERT(ptr);
BUILD_BUG_ON(__alignof__(struct file) < (1UL << UVM_FD_TYPE_BITS));
break;
default:
UVM_ASSERT(0);
}
if (ptr_val)
*ptr_val = ptr;
return type;
}
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type)
{
void *ptr;
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
if (uvm_fd_type(filp, &ptr) == type)
return ptr;
else
return NULL;
}
static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct file *filp)
{
uvm_va_space_t *va_space;
uvm_va_space_mm_t *va_space_mm;
struct file *uvm_file;
uvm_fd_type_t old_fd_type;
struct mm_struct *mm;
NV_STATUS status;
uvm_file = fget(params->uvmFd);
if (!uvm_file_is_nvidia_uvm(uvm_file)) {
status = NV_ERR_INVALID_ARGUMENT;
goto err;
}
if (uvm_fd_type(uvm_file, (void **)&va_space) != UVM_FD_VA_SPACE) {
status = NV_ERR_INVALID_ARGUMENT;
goto err;
}
// Tell userspace the MM FD is not required and it may be released
// with no loss of functionality.
if (!uvm_va_space_mm_enabled(va_space)) {
status = NV_WARN_NOTHING_TO_DO;
goto err;
}
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
UVM_FD_UNINITIALIZED,
UVM_FD_INITIALIZING);
old_fd_type &= UVM_FD_TYPE_MASK;
if (old_fd_type != UVM_FD_UNINITIALIZED) {
status = NV_ERR_IN_USE;
goto err;
}
va_space_mm = &va_space->va_space_mm;
uvm_spin_lock(&va_space_mm->lock);
switch (va_space->va_space_mm.state) {
// We only allow the va_space_mm to be initialised once. If
// userspace passed the UVM FD to another process it is up to
// userspace to ensure it also passes the UVM MM FD that
// initialised the va_space_mm or arranges some other way to keep
// a reference on the FD.
case UVM_VA_SPACE_MM_STATE_ALIVE:
status = NV_ERR_INVALID_STATE;
goto err_release_unlock;
break;
// Once userspace has released the va_space_mm the GPU is
// effectively dead and no new work can be started. We don't
// support re-initializing once userspace has closed the FD.
case UVM_VA_SPACE_MM_STATE_RELEASED:
status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
goto err_release_unlock;
break;
// Keep the warnings at bay
case UVM_VA_SPACE_MM_STATE_UNINITIALIZED:
mm = va_space->va_space_mm.mm;
if (!mm || !mmget_not_zero(mm)) {
status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
goto err_release_unlock;
}
va_space_mm->state = UVM_VA_SPACE_MM_STATE_ALIVE;
break;
default:
UVM_ASSERT(0);
break;
}
uvm_spin_unlock(&va_space_mm->lock);
atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)uvm_file | UVM_FD_MM);
return NV_OK;
err_release_unlock:
uvm_spin_unlock(&va_space_mm->lock);
atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
err:
if (uvm_file)
fput(uvm_file);
return status;
}
void uvm_service_block_context_exit(void)
{
uvm_service_block_context_t *service_context, *service_context_tmp;
// Free fault service contexts for the CPU and add clear the global list
list_for_each_entry_safe(service_context, service_context_tmp, &g_cpu_service_block_context_list,
cpu_fault.service_context_list) {
uvm_kvfree(service_context);
}
INIT_LIST_HEAD(&g_cpu_service_block_context_list);
}
// Get a fault service context from the global list or allocate a new one if there are no
// available entries
static uvm_service_block_context_t *uvm_service_block_context_cpu_alloc(void)
{
uvm_service_block_context_t *service_context;
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
service_context = list_first_entry_or_null(&g_cpu_service_block_context_list, uvm_service_block_context_t,
cpu_fault.service_context_list);
if (service_context)
list_del(&service_context->cpu_fault.service_context_list);
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
if (!service_context)
service_context = uvm_kvmalloc(sizeof(*service_context));
return service_context;
}
// Put a fault service context in the global list
static void uvm_service_block_context_cpu_free(uvm_service_block_context_t *service_context)
{
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
}
// Called when opening /dev/nvidia-uvm. This code doesn't take any UVM locks, so
// there's no need to acquire g_uvm_global.pm.lock, but if that changes the PM
// lock will need to be taken.
static int uvm_open(struct inode *inode, struct file *filp)
{
struct address_space *mapping;
NV_STATUS status = uvm_global_get_status();
if (status == NV_OK) {
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
return -EAGAIN;
if (status != NV_OK)
return -nv_status_to_errno(status);
status = uvm_va_space_create(inode, filp);
mapping = uvm_kvmalloc(sizeof(*mapping));
if (!mapping)
return -ENOMEM;
uvm_up_read(&g_uvm_global.pm.lock);
}
// By default all struct files on the same inode share the same
// address_space structure (the inode's) across all processes. This means
// unmap_mapping_range would unmap virtual mappings across all processes on
// that inode.
//
// Since the UVM driver uses the mapping offset as the VA of the file's
// process, we need to isolate the mappings to each process.
address_space_init_once(mapping);
mapping->host = inode;
return -nv_status_to_errno(status);
// Some paths in the kernel, for example force_page_cache_readahead which
// can be invoked from user-space via madvise MADV_WILLNEED and fadvise
// POSIX_FADV_WILLNEED, check the function pointers within
// file->f_mapping->a_ops for validity. However, those paths assume that a_ops
// itself is always valid. Handle that by using the inode's a_ops pointer,
// which is what f_mapping->a_ops would point to anyway if we weren't re-
// assigning f_mapping.
mapping->a_ops = inode->i_mapping->a_ops;
#if defined(NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO)
mapping->backing_dev_info = inode->i_mapping->backing_dev_info;
#endif
filp->private_data = NULL;
filp->f_mapping = mapping;
return NV_OK;
}
static int uvm_open_entry(struct inode *inode, struct file *filp)
@@ -145,11 +253,44 @@ static void uvm_release_deferred(void *data)
uvm_up_read(&g_uvm_global.pm.lock);
}
static void uvm_mm_release(struct file *filp, struct file *uvm_file)
{
uvm_va_space_t *va_space = uvm_va_space_get(uvm_file);
uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
struct mm_struct *mm = va_space_mm->mm;
if (uvm_va_space_mm_enabled(va_space)) {
uvm_va_space_mm_unregister(va_space);
if (uvm_va_space_mm_enabled(va_space))
uvm_mmput(mm);
va_space_mm->mm = NULL;
fput(uvm_file);
}
}
static int uvm_release(struct inode *inode, struct file *filp)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);
void *ptr;
uvm_va_space_t *va_space;
uvm_fd_type_t fd_type;
int ret;
fd_type = uvm_fd_type(filp, &ptr);
UVM_ASSERT(fd_type != UVM_FD_INITIALIZING);
if (fd_type == UVM_FD_UNINITIALIZED) {
uvm_kvfree(filp->f_mapping);
return 0;
}
else if (fd_type == UVM_FD_MM) {
uvm_kvfree(filp->f_mapping);
uvm_mm_release(filp, (struct file *)ptr);
return 0;
}
UVM_ASSERT(fd_type == UVM_FD_VA_SPACE);
va_space = (uvm_va_space_t *)ptr;
filp->private_data = NULL;
filp->f_mapping = NULL;
@@ -167,7 +308,7 @@ static int uvm_release(struct inode *inode, struct file *filp)
// been destroyed, and va_space->mapping won't be used again. Still,
// the va_space survives the inode if its destruction is deferred, in
// which case the references are rendered stale.
address_space_init_once(&va_space->mapping);
address_space_init_once(va_space->mapping);
nv_kthread_q_item_init(&va_space->deferred_release_q_item, uvm_release_deferred, va_space);
ret = nv_kthread_q_schedule_q_item(&g_uvm_global.deferred_release_q, &va_space->deferred_release_q_item);
@@ -430,14 +571,11 @@ static void uvm_vm_open_managed_entry(struct vm_area_struct *vma)
static void uvm_vm_close_managed(struct vm_area_struct *vma)
{
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
uvm_gpu_t *gpu;
bool make_zombie = false;
if (current->mm != NULL)
uvm_record_lock_mmap_lock_write(current->mm);
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
// current->mm will be NULL on process teardown, in which case we have
// special handling.
if (current->mm == NULL) {
@@ -467,14 +605,6 @@ static void uvm_vm_close_managed(struct vm_area_struct *vma)
uvm_destroy_vma_managed(vma, make_zombie);
// Notify GPU address spaces that the fault buffer needs to be flushed to avoid finding stale entries
// that can be attributed to new VA ranges reallocated at the same address
for_each_va_space_gpu_in_mask(gpu, va_space, &va_space->registered_gpu_va_spaces) {
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
UVM_ASSERT(gpu_va_space);
gpu_va_space->needs_fault_buffer_flush = true;
}
uvm_va_space_up_write(va_space);
if (current->mm != NULL)
@@ -489,139 +619,10 @@ static void uvm_vm_close_managed_entry(struct vm_area_struct *vma)
static vm_fault_t uvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
uvm_va_block_t *va_block;
NvU64 fault_addr = nv_page_fault_va(vmf);
bool is_write = vmf->flags & FAULT_FLAG_WRITE;
NV_STATUS status = uvm_global_get_status();
bool tools_enabled;
bool major_fault = false;
uvm_service_block_context_t *service_context;
uvm_global_processor_mask_t gpus_to_check_for_ecc;
if (status != NV_OK)
goto convert_error;
// TODO: Bug 2583279: Lock tracking is disabled for the power management
// lock in order to suppress reporting of a lock policy violation.
// The violation consists in acquiring the power management lock multiple
// times, and it is manifested as an error during release. The
// re-acquisition of the power management locks happens upon re-entry in the
// UVM module, and it is benign on itself, but when combined with certain
// power management scenarios, it is indicative of a potential deadlock.
// Tracking will be re-enabled once the power management locking strategy is
// modified to avoid deadlocks.
if (!uvm_down_read_trylock_no_tracking(&g_uvm_global.pm.lock)) {
status = NV_ERR_BUSY_RETRY;
goto convert_error;
}
service_context = uvm_service_block_context_cpu_alloc();
if (!service_context) {
status = NV_ERR_NO_MEMORY;
goto unlock;
}
service_context->cpu_fault.wakeup_time_stamp = 0;
// The mmap_lock might be held in write mode, but the mode doesn't matter
// for the purpose of lock ordering and we don't rely on it being in write
// anywhere so just record it as read mode in all cases.
uvm_record_lock_mmap_lock_read(vma->vm_mm);
do {
bool do_sleep = false;
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
NvU64 now = NV_GETTIME();
if (now < service_context->cpu_fault.wakeup_time_stamp)
do_sleep = true;
if (do_sleep)
uvm_tools_record_throttling_start(va_space, fault_addr, UVM_ID_CPU);
// Drop the VA space lock while we sleep
uvm_va_space_up_read(va_space);
// usleep_range is preferred because msleep has a 20ms granularity
// and udelay uses a busy-wait loop. usleep_range uses high-resolution
// timers and, by adding a range, the Linux scheduler may coalesce
// our wakeup with others, thus saving some interrupts.
if (do_sleep) {
unsigned long nap_us = (service_context->cpu_fault.wakeup_time_stamp - now) / 1000;
usleep_range(nap_us, nap_us + nap_us / 2);
}
}
uvm_va_space_down_read(va_space);
if (do_sleep)
uvm_tools_record_throttling_end(va_space, fault_addr, UVM_ID_CPU);
status = uvm_va_block_find_create_managed(va_space, fault_addr, &va_block);
if (status != NV_OK) {
UVM_ASSERT_MSG(status == NV_ERR_NO_MEMORY, "status: %s\n", nvstatusToString(status));
break;
}
// Watch out, current->mm might not be vma->vm_mm
UVM_ASSERT(vma == uvm_va_range_vma(va_block->va_range));
// Loop until thrashing goes away.
status = uvm_va_block_cpu_fault(va_block, fault_addr, is_write, service_context);
} while (status == NV_WARN_MORE_PROCESSING_REQUIRED);
if (status != NV_OK) {
UvmEventFatalReason reason;
reason = uvm_tools_status_to_fatal_fault_reason(status);
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
uvm_tools_record_cpu_fatal_fault(va_space, fault_addr, is_write, reason);
}
tools_enabled = va_space->tools.enabled;
if (status == NV_OK) {
uvm_va_space_global_gpus_in_mask(va_space,
&gpus_to_check_for_ecc,
&service_context->cpu_fault.gpus_to_check_for_ecc);
uvm_global_mask_retain(&gpus_to_check_for_ecc);
}
uvm_va_space_up_read(va_space);
uvm_record_unlock_mmap_lock_read(vma->vm_mm);
if (status == NV_OK) {
status = uvm_global_mask_check_ecc_error(&gpus_to_check_for_ecc);
uvm_global_mask_release(&gpus_to_check_for_ecc);
}
if (tools_enabled)
uvm_tools_flush_events();
// Major faults involve I/O in order to resolve the fault.
// If any pages were DMA'ed between the GPU and host memory, that makes it a major fault.
// A process can also get statistics for major and minor faults by calling readproc().
major_fault = service_context->cpu_fault.did_migrate;
uvm_service_block_context_cpu_free(service_context);
unlock:
// TODO: Bug 2583279: See the comment above the matching lock acquisition
uvm_up_read_no_tracking(&g_uvm_global.pm.lock);
convert_error:
switch (status) {
case NV_OK:
case NV_ERR_BUSY_RETRY:
return VM_FAULT_NOPAGE | (major_fault ? VM_FAULT_MAJOR : 0);
case NV_ERR_NO_MEMORY:
return VM_FAULT_OOM;
default:
return VM_FAULT_SIGBUS;
}
return uvm_va_space_cpu_fault_managed(va_space, vma, vmf);
}
static vm_fault_t uvm_vm_fault_entry(struct vm_area_struct *vma, struct vm_fault *vmf)
{
UVM_ENTRY_RET(uvm_vm_fault(vma, vmf));
@@ -681,6 +682,9 @@ static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
// Semaphore pool vmas do not have vma wrappers, but some functions will
// assume vm_private_data is a wrapper.
vma->vm_private_data = NULL;
#if defined(VM_WIPEONFORK)
nv_vm_flags_set(vma, VM_WIPEONFORK);
#endif
if (is_fork) {
// If we forked, leave the parent vma alone.
@@ -752,7 +756,7 @@ static struct vm_operations_struct uvm_vm_ops_semaphore_pool =
static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);
uvm_va_space_t *va_space;
uvm_va_range_t *va_range;
NV_STATUS status = uvm_global_get_status();
int ret = 0;
@@ -761,8 +765,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
if (status != NV_OK)
return -nv_status_to_errno(status);
status = uvm_va_space_initialized(va_space);
if (status != NV_OK)
va_space = uvm_fd_va_space(filp);
if (!va_space)
return -EBADFD;
// When the VA space is associated with an mm, all vmas under the VA space
@@ -814,7 +818,11 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
// Using VM_DONTCOPY would be nice, but madvise(MADV_DOFORK) can reset that
// so we have to handle vm_open on fork anyway. We could disable MADV_DOFORK
// with VM_IO, but that causes other mapping issues.
vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
// Make the default behavior be VM_DONTCOPY to avoid the performance impact
// of removing CPU mappings in the parent on fork()+exec(). Users can call
// madvise(MDV_DOFORK) if the child process requires access to the
// allocation.
nv_vm_flags_set(vma, VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTCOPY);
vma->vm_ops = &uvm_vm_ops_managed;
@@ -874,6 +882,13 @@ out:
return ret;
}
bool uvm_vma_is_managed(struct vm_area_struct *vma)
{
return vma->vm_ops == &uvm_vm_ops_disabled ||
vma->vm_ops == &uvm_vm_ops_managed ||
vma->vm_ops == &uvm_vm_ops_semaphore_pool;
}
static int uvm_mmap_entry(struct file *filp, struct vm_area_struct *vma)
{
UVM_ENTRY_RET(uvm_mmap(filp, vma));
@@ -881,7 +896,56 @@ static int uvm_mmap_entry(struct file *filp, struct vm_area_struct *vma)
static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *filp)
{
return uvm_va_space_initialize(uvm_va_space_get(filp), params->flags);
uvm_va_space_t *va_space;
NV_STATUS status;
uvm_fd_type_t old_fd_type;
// Normally we expect private_data == UVM_FD_UNINITIALIZED. However multiple
// threads may call this ioctl concurrently so we have to be careful to
// avoid initializing multiple va_spaces and/or leaking memory. To do this
// we do an atomic compare and swap. Only one thread will observe
// UVM_FD_UNINITIALIZED and that thread will allocate and setup the
// va_space.
//
// Other threads will either see UVM_FD_INITIALIZING or UVM_FD_VA_SPACE. In
// the case of UVM_FD_VA_SPACE we return success if and only if the
// initialization flags match. If another thread is still initializing the
// va_space we return NV_ERR_BUSY_RETRY.
//
// If va_space initialization fails we return the failure code and reset the
// FD state back to UVM_FD_UNINITIALIZED to allow another initialization
// attempt to be made. This is safe because other threads will have only had
// a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
// case.
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
old_fd_type &= UVM_FD_TYPE_MASK;
if (old_fd_type == UVM_FD_UNINITIALIZED) {
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
if (status != NV_OK) {
atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
return status;
}
atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)va_space | UVM_FD_VA_SPACE);
}
else if (old_fd_type == UVM_FD_VA_SPACE) {
va_space = uvm_va_space_get(filp);
if (params->flags != va_space->initialization_flags)
status = NV_ERR_INVALID_ARGUMENT;
else
status = NV_OK;
}
else if (old_fd_type == UVM_FD_MM) {
status = NV_ERR_INVALID_ARGUMENT;
}
else {
UVM_ASSERT(old_fd_type == UVM_FD_INITIALIZING);
status = NV_ERR_BUSY_RETRY;
}
return status;
}
static NV_STATUS uvm_api_pageable_mem_access(UVM_PAGEABLE_MEM_ACCESS_PARAMS *params, struct file *filp)
@@ -899,6 +963,7 @@ static long uvm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return 0;
UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_INITIALIZE, uvm_api_initialize);
UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_MM_INITIALIZE, uvm_api_mm_initialize);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_PAGEABLE_MEM_ACCESS, uvm_api_pageable_mem_access);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_PAGEABLE_MEM_ACCESS_ON_GPU, uvm_api_pageable_mem_access_on_gpu);
@@ -978,16 +1043,9 @@ static const struct file_operations uvm_fops =
.owner = THIS_MODULE,
};
bool uvm_file_is_nvidia_uvm(struct file *filp)
{
return (filp != NULL) && (filp->f_op == &uvm_fops);
}
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params, struct file *filp)
{
long ret;
int write = 1;
int force = 0;
struct page *page;
NV_STATUS status = NV_OK;
@@ -998,7 +1056,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
// are not used because unload_state_buf may be a managed memory pointer and
// therefore a locking assertion from the CPU fault handler could be fired.
nv_mmap_read_lock(current->mm);
ret = NV_GET_USER_PAGES(params->unload_state_buf, 1, write, force, &page, NULL);
ret = NV_PIN_USER_PAGES(params->unload_state_buf, 1, FOLL_WRITE, &page, NULL);
nv_mmap_read_unlock(current->mm);
if (ret < 0)
@@ -1008,7 +1066,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
uvm_mutex_lock(&g_uvm_global.global_lock);
if (g_uvm_global.unload_state.ptr) {
put_page(page);
NV_UNPIN_USER_PAGE(page);
status = NV_ERR_IN_USE;
goto error;
}
@@ -1027,7 +1085,7 @@ static void uvm_test_unload_state_exit(void)
{
if (g_uvm_global.unload_state.ptr) {
kunmap(g_uvm_global.unload_state.page);
put_page(g_uvm_global.unload_state.page);
NV_UNPIN_USER_PAGE(g_uvm_global.unload_state.page);
}
}

View File

@@ -54,7 +54,7 @@
#ifndef _UVM_H_
#define _UVM_H_
#define UVM_API_LATEST_REVISION 7
#define UVM_API_LATEST_REVISION 8
#if !defined(UVM_API_REVISION)
#error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
@@ -211,12 +211,12 @@ NV_STATUS UvmDeinitialize(void);
// UvmReopen
//
// Reinitializes the UVM driver after checking for minimal user-mode state.
// Before calling this function, all GPUs must be unregistered with
// Before calling this function, all GPUs must be unregistered with
// UvmUnregisterGpu() and all allocated VA ranges must be freed with UvmFree().
// Note that it is not required to release VA ranges that were reserved with
// UvmReserveVa().
//
// UvmReopen() closes the open file returned by UvmGetFileDescriptor() and
// UvmReopen() closes the open file returned by UvmGetFileDescriptor() and
// replaces it with a new open file with the same name.
//
// Arguments:
@@ -410,6 +410,12 @@ NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
// location will have their range group association changed to
// UVM_RANGE_GROUP_ID_NONE.
//
// If the Confidential Computing feature is enabled in the system, any VA
// ranges allocated using UvmAllocSemaphorePool and owned by this GPU will be
// unmapped from all GPUs and the CPU. UvmFree must still be called on those
// ranges to reclaim the VA. See UvmAllocSemaphorePool to determine which GPU
// is considered the owner.
//
// Arguments:
// gpuUuid: (INPUT)
// UUID of the GPU to unregister.
@@ -1094,10 +1100,12 @@ NV_STATUS UvmAllowMigrationRangeGroups(const NvU64 *rangeGroupIds,
// Creates a new mapping in the virtual address space of the process, populates
// it at the specified preferred location, maps it on the provided list of
// processors if feasible and associates the range with the given range group.
// If the preferredLocationUuid is the UUID of the CPU, preferred location is
// set to all CPU nodes allowed by the global and thread memory policies.
//
// This API is equivalent to the following code sequence:
// UvmMemMap(base, length);
// UvmSetPreferredLocation(base, length, preferredLocationUuid);
// UvmSetPreferredLocation(base, length, preferredLocationUuid, -1);
// for (i = 0; i < accessedByCount; i++) {
// UvmSetAccessedBy(base, length, &accessedByUuids[i]);
// }
@@ -1262,6 +1270,12 @@ NV_STATUS UvmCleanUpZombieResources(void);
//
// The VA range can be unmapped and freed via a call to UvmFree.
//
// If the Confidential Computing feature is enabled in the system, at least one
// GPU must be provided in the perGpuAttribs array. The first GPU in the array
// is considered the owning GPU. If the owning GPU is unregistered via
// UvmUnregisterGpu, this allocation will no longer be usable.
// See UvmUnregisterGpu.
//
// Arguments:
// base: (INPUT)
// Base address of the virtual address range.
@@ -1298,6 +1312,8 @@ NV_STATUS UvmCleanUpZombieResources(void);
// NV_ERR_INVALID_ARGUMENT:
// perGpuAttribs is NULL but gpuAttribsCount is non-zero or vice-versa,
// or caching is requested on more than one GPU.
// The Confidential Computing feature is enabled and the perGpuAttribs
// list is empty.
//
// NV_ERR_NOT_SUPPORTED:
// The current process is not the one which called UvmInitialize, and
@@ -1444,7 +1460,7 @@ NV_STATUS UvmMigrate(void *base,
NV_STATUS UvmMigrate(void *base,
NvLength length,
const NvProcessorUuid *destinationUuid,
NvU32 preferredCpuMemoryNode);
NvS32 preferredCpuMemoryNode);
#endif
//------------------------------------------------------------------------------
@@ -1537,7 +1553,7 @@ NV_STATUS UvmMigrateAsync(void *base,
NV_STATUS UvmMigrateAsync(void *base,
NvLength length,
const NvProcessorUuid *destinationUuid,
NvU32 preferredCpuMemoryNode,
NvS32 preferredCpuMemoryNode,
void *semaphoreAddress,
NvU32 semaphorePayload);
#endif
@@ -1746,17 +1762,20 @@ NV_STATUS UvmCreateExternalRange(void *base,
// GPUs. The external allocation can be unmapped from a specific GPU using
// UvmUnmapExternal or from all GPUs using UvmFree.
//
// The virtual address range specified by (base, length) must be aligned to the
// allocation's physical page size and must fall within a VA range previously
// created with UvmCreateExternalRange. A GPU VA space must have been registered
// for each GPU in the list. The offset in the physical allocation at which the
// allocation must be mapped should also be aligned to the allocation's physical
// page size. The (base, length) range must lie within the largest possible
// virtual address supported by the specified GPUs.
// The virtual address range specified by (base, length) must fall within a VA
// range previously created with UvmCreateExternalRange. A GPU VA space must
// have been registered for each GPU in the list. The (base, length) range must
// lie within the largest possible virtual address supported by the specified
// GPUs.
//
// The page size used for the mapping is the largest supported page size less
// than or equal to the alignments of base, length, offset, and the allocation
// page size.
//
// If the range specified by (base, length) falls within any existing mappings,
// the behavior is the same as if UvmUnmapExternal with the range specified by
// (base, length) had been called first.
// (base, length) had been called first, provided that base and length are
// aligned to the page size used for the existing one.
//
// If the allocation resides in GPU memory, that GPU must have been registered
// via UvmRegisterGpu. If the allocation resides in GPU memory and a mapping is
@@ -1838,8 +1857,9 @@ NV_STATUS UvmCreateExternalRange(void *base,
// - The requested address range does not fall entirely within an
// existing external VA range created with a single call to
// UvmCreateExternalRange.
// - At least one of base and length is not aligned to the allocation's
// physical page size.
// - The mapping page size allowed by the alignments of base, length,
// and offset is smaller than the minimum supported page size on the
// GPU.
// - base or base + length fall within an existing mapping but are not
// aligned to that mapping's page size.
//
@@ -1848,8 +1868,7 @@ NV_STATUS UvmCreateExternalRange(void *base,
// address supported by one or more of the specified GPUs.
//
// NV_ERR_INVALID_OFFSET:
// offset is not aligned to the allocation's physical page size or
// offset+length exceeds the allocation size.
// - offset+length exceeds the allocation size.
//
// NV_ERR_INVALID_DEVICE:
// One of the following occurred:
@@ -2214,11 +2233,10 @@ NV_STATUS UvmDisableReadDuplication(void *base,
// supported by the specified processor.
//
// The virtual address range specified by (base, length) must have been
// allocated via a call to either UvmAlloc or UvmMemMap, or be supported
// system-allocated pageable memory. If the input range is pageable memory and
// at least one GPU in the system supports transparent access to pageable
// memory, the behavior described below does not take effect and the preferred
// location of the pages in the given range does not change.
// allocated via a call to either UvmAlloc or UvmMemMap (managed memory), or be
// supported system-allocated pageable memory. If the input range corresponds to
// a file backed shared mapping and least one GPU in the system supports
// transparent access to pageable memory, the behavior below is not guaranteed.
//
// If any pages in the VA range are associated with a range group that was made
// non-migratable via UvmPreventMigrationRangeGroups, then those pages are
@@ -2237,17 +2255,17 @@ NV_STATUS UvmDisableReadDuplication(void *base,
// not cause a migration if a mapping for that page from that processor can be
// established without migrating the page.
//
// When a page migrates away from its preferred location, the mapping on the
// preferred location's processor is cleared so that the next access from that
// processor will cause a fault and migrate the page back to its preferred
// location. In other words, a page is mapped on the preferred location's
// processor only if the page is in its preferred location. Thus, when the
// preferred location changes, mappings to pages in the given range are removed
// from the new preferred location if the pages are resident in a different
// processor. Note that if the preferred location's processor is a GPU, then a
// mapping from that GPU to a page in the VA range is only created if a GPU VA
// space has been registered for that GPU and the page is in its preferred
// location.
// When a page that was allocated via either UvmAlloc or UvmMemMap migrates away
// from its preferred location, the mapping on the preferred location's
// processor is cleared so that the next access from that processor will cause a
// fault and migrate the page back to its preferred location. In other words, a
// page is mapped on the preferred location's processor only if the page is in
// its preferred location. Thus, when the preferred location changes, mappings
// to pages in the given range are removed from the new preferred location if
// the pages are resident in a different processor. Note that if the preferred
// location's processor is a GPU, then a mapping from that GPU to a page in the
// VA range is only created if a GPU VA space has been registered for that GPU
// and the page is in its preferred location.
//
// If read duplication has been enabled for any pages in this VA range and
// UvmPreventMigrationRangeGroups has not been called on the range group that
@@ -2260,7 +2278,7 @@ NV_STATUS UvmDisableReadDuplication(void *base,
//
// If the preferred location processor is present in the accessed-by list of any
// of the pages in this VA range, then the migration and mapping policies
// associated with associated with the accessed-by list.
// associated with this API override those associated with the accessed-by list.
//
// The state set by this API can be cleared either by calling
// UvmUnsetPreferredLocation for the same VA range or by calling
@@ -2281,35 +2299,66 @@ NV_STATUS UvmDisableReadDuplication(void *base,
// preferredLocationUuid: (INPUT)
// UUID of the preferred location.
//
// preferredCpuNumaNode: (INPUT)
// Preferred CPU NUMA memory node used if preferredLocationUuid is the
// UUID of the CPU. -1 is a special value which indicates all CPU nodes
// allowed by the global and thread memory policies. This argument is
// ignored if preferredLocationUuid refers to a GPU or the given virtual
// address range corresponds to managed memory. If NUMA is not enabled,
// only 0 or -1 is allowed.
//
// Errors:
// NV_ERR_INVALID_ADDRESS:
// base and length are not properly aligned, or the range does not
// represent a valid UVM allocation, or the range is pageable memory and
// the system does not support accessing pageable memory, or the range
// does not represent a supported Operating System allocation.
// One of the following occurred:
// - base and length are not properly aligned.
// - The range does not represent a valid UVM allocation.
// - The range is pageable memory and the system does not support
// accessing pageable memory.
// - The range does not represent a supported Operating System
// allocation.
//
// NV_ERR_OUT_OF_RANGE:
// The VA range exceeds the largest virtual address supported by the
// specified processor.
//
// NV_ERR_INVALID_DEVICE:
// preferredLocationUuid is neither the UUID of the CPU nor the UUID of
// a GPU that was registered by this process. Or at least one page in
// VA range belongs to a non-migratable range group and the specified
// UUID represents a fault-capable GPU. Or preferredLocationUuid is the
// UUID of a non-fault-capable GPU and at least one page in the VA range
// belongs to a non-migratable range group and another non-fault-capable
// GPU is in the accessed-by list of the same page but P2P support
// between both GPUs has not been enabled.
// One of the following occurred:
// - preferredLocationUuid is neither the UUID of the CPU nor the UUID
// of a GPU that was registered by this process.
// - At least one page in VA range belongs to a non-migratable range
// group and the specified UUID represents a fault-capable GPU.
// - preferredLocationUuid is the UUID of a non-fault-capable GPU and at
// least one page in the VA range belongs to a non-migratable range
// group and another non-fault-capable GPU is in the accessed-by list
// of the same page but P2P support between both GPUs has not been
// enabled.
//
// NV_ERR_INVALID_ARGUMENT:
// One of the following occured:
// - preferredLocationUuid is the UUID of a CPU and preferredCpuNumaNode
// refers to a registered GPU.
// - preferredCpuNumaNode is invalid and preferredLocationUuid is the
// UUID of the CPU.
//
// NV_ERR_NOT_SUPPORTED:
// The UVM file descriptor is associated with another process and the
// input virtual range corresponds to system-allocated pageable memory.
//
// NV_ERR_GENERIC:
// Unexpected error. We try hard to avoid returning this error code,
// because it is not very informative.
//
//------------------------------------------------------------------------------
#if UVM_API_REV_IS_AT_MOST(7)
NV_STATUS UvmSetPreferredLocation(void *base,
NvLength length,
const NvProcessorUuid *preferredLocationUuid);
#else
NV_STATUS UvmSetPreferredLocation(void *base,
NvLength length,
const NvProcessorUuid *preferredLocationUuid,
NvS32 preferredCpuNumaNode);
#endif
//------------------------------------------------------------------------------
// UvmUnsetPreferredLocation
@@ -2323,10 +2372,9 @@ NV_STATUS UvmSetPreferredLocation(void *base,
//
// The virtual address range specified by (base, length) must have been
// allocated via a call to either UvmAlloc or UvmMemMap, or be supported
// system-allocated pageable memory. If the input range is pageable memory and
// at least one GPU in the system supports transparent access to pageable
// memory, the behavior described below does not take effect and the preferred
// location of the pages in the given range does not change.
// system-allocated pageable memory. If the input range corresponds to a file
// backed shared mapping and least one GPU in the system supports transparent
// access to pageable memory, the behavior below is not guaranteed.
//
// If the VA range is associated with a non-migratable range group, then that
// association is cleared. i.e. the pages in this VA range have their range
@@ -2345,10 +2393,18 @@ NV_STATUS UvmSetPreferredLocation(void *base,
//
// Errors:
// NV_ERR_INVALID_ADDRESS:
// base and length are not properly aligned or the range does not
// represent a valid UVM allocation, or the range is pageable memory and
// the system does not support accessing pageable memory, or the range
// does not represent a supported Operating System allocation.
// One of the following occured:
// - base and length are not properly aligned or the range does not
// represent a valid UVM allocation.
// - The range is pageable memory and the system does not support
// accessing pageable memory.
// - The range does not represent a supported Operating System
// allocation.
// - The range contains both managed and pageable memory allocations.
//
// NV_ERR_NOT_SUPPORTED:
// The UVM file descriptor is associated with another process and the
// input virtual range corresponds to system-allocated pageable memory.
//
// NV_ERR_GENERIC:
// Unexpected error. We try hard to avoid returning this error code,
@@ -2629,13 +2685,34 @@ NV_STATUS UvmDisableSystemWideAtomics(const NvProcessorUuid *gpuUuid);
// NV_ERR_INVALID_STATE:
// UVM was not initialized before calling this function.
//
// NV_ERR_GENERIC:
// Unexpected error. We try hard to avoid returning this error code,
// because it is not very informative.
//
//------------------------------------------------------------------------------
NV_STATUS UvmGetFileDescriptor(UvmFileDescriptor *returnedFd);
//------------------------------------------------------------------------------
// UvmGetMmFileDescriptor
//
// Returns the UVM file descriptor currently being used to keep the
// memory management context valid. The data type of the returned file
// descriptor is platform specific.
//
// If UvmInitialize has not yet been called, an error is returned.
//
// Arguments:
// returnedFd: (OUTPUT)
// A platform specific file descriptor.
//
// Error codes:
// NV_ERR_INVALID_ARGUMENT:
// returnedFd is NULL.
//
// NV_ERR_INVALID_STATE:
// UVM was not initialized before calling this function.
//
// NV_ERR_NOT_SUPPORTED:
// This file descriptor is not required on this platform.
//------------------------------------------------------------------------------
NV_STATUS UvmGetMmFileDescriptor(UvmFileDescriptor *returnedFd);
//------------------------------------------------------------------------------
// UvmIs8Supported
//

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Copyright (c) 2021-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -49,11 +49,13 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
// A single top level PDE on Ada covers 128 TB and that's the minimum size
// that can be used.
parent_gpu->rm_va_base = 0;
parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024 * 1024;
parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;
parent_gpu->uvm_mem_va_base = 384ull * 1024 * 1024 * 1024 * 1024;
parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
parent_gpu->ce_phys_vidmem_write_supported = true;
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
// Not all units on Ada support 49-bit addressing, including those which
@@ -92,4 +94,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->map_remap_larger_page_promotion = false;
parent_gpu->plc_supported = true;
parent_gpu->no_ats_range_required = false;
}

View File

@@ -47,14 +47,16 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
// A single top level PDE on Ampere covers 128 TB and that's the minimum
// size that can be used.
parent_gpu->rm_va_base = 0;
parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024 * 1024;
parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;
parent_gpu->uvm_mem_va_base = 384ull * 1024 * 1024 * 1024 * 1024;
parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
// See uvm_mmu.h for mapping placement
parent_gpu->flat_vidmem_va_base = 136ull * 1024 * 1024 * 1024 * 1024;
parent_gpu->flat_sysmem_va_base = 256ull * 1024 * 1024 * 1024 * 1024;
parent_gpu->flat_vidmem_va_base = 136 * UVM_SIZE_1TB;
parent_gpu->flat_sysmem_va_base = 256 * UVM_SIZE_1TB;
parent_gpu->ce_phys_vidmem_write_supported = true;
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
@@ -99,4 +101,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->map_remap_larger_page_promotion = false;
parent_gpu->plc_supported = true;
parent_gpu->no_ats_range_required = false;
}

View File

@@ -27,7 +27,7 @@
#include "clc7b5.h"
#include "clc56f.h" // Needed because HAL ce_init pushes SET_OBJECT
bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
{
if (!uvm_channel_is_proxy(push->channel))
return true;
@@ -112,7 +112,7 @@ NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void)
return HWCONST(C7B5, LAUNCH_DMA, DISABLE_PLC, TRUE);
}
bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
{
NvU64 push_begin_gpu_va;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
@@ -121,6 +121,8 @@ bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t
return true;
if (uvm_channel_is_proxy(push->channel)) {
uvm_pushbuffer_t *pushbuffer;
if (dst.is_virtual) {
UVM_ERR_PRINT("Destination address of memcopy must be physical, not virtual\n");
return false;
@@ -142,7 +144,8 @@ bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t
return false;
}
push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
pushbuffer = uvm_channel_get_pushbuffer(push->channel);
push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
if ((src.address < push_begin_gpu_va) || (src.address >= push_begin_gpu_va + uvm_push_get_size(push))) {
UVM_ERR_PRINT("Source address of memcopy must point to pushbuffer\n");
@@ -177,13 +180,19 @@ bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t
// irrespective of the virtualization mode.
void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src)
{
uvm_pushbuffer_t *pushbuffer;
if (!uvm_channel_is_proxy(push->channel))
return;
src->address -= uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
pushbuffer = uvm_channel_get_pushbuffer(push->channel);
src->address -= uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
}
bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push,
uvm_gpu_address_t dst,
size_t num_elements,
size_t element_size)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);

View File

@@ -29,7 +29,7 @@
#include "clc56f.h"
#include "clc076.h"
bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
@@ -82,7 +82,7 @@ bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address,
return true;
}
bool uvm_hal_ampere_host_sw_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
bool uvm_hal_ampere_host_sw_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
{
if (!uvm_channel_is_proxy(push->channel))
return true;

View File

@@ -25,6 +25,7 @@
#define __UVM_API_H__
#include "uvm_types.h"
#include "uvm_common.h"
#include "uvm_ioctl.h"
#include "uvm_linux.h"
#include "uvm_lock.h"
@@ -51,8 +52,10 @@
\
params.rmStatus = uvm_global_get_status(); \
if (params.rmStatus == NV_OK) { \
if (do_init_check) \
params.rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp)); \
if (do_init_check) { \
if (!uvm_fd_va_space(filp)) \
params.rmStatus = NV_ERR_ILLEGAL_ACTION; \
} \
if (likely(params.rmStatus == NV_OK)) \
params.rmStatus = function_name(&params, filp); \
} \
@@ -88,8 +91,10 @@
\
params->rmStatus = uvm_global_get_status(); \
if (params->rmStatus == NV_OK) { \
if (do_init_check) \
params->rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp)); \
if (do_init_check) { \
if (!uvm_fd_va_space(filp)) \
params->rmStatus = NV_ERR_ILLEGAL_ACTION; \
} \
if (likely(params->rmStatus == NV_OK)) \
params->rmStatus = function_name(params, filp); \
} \
@@ -196,21 +201,20 @@ static bool uvm_api_range_invalid_64k(NvU64 base, NvU64 length)
return uvm_api_range_invalid_aligned(base, length, UVM_PAGE_SIZE_64K);
}
// Returns true if the interval [start, start + length -1] is entirely covered
// by vmas.
//
// LOCKING: mm->mmap_lock must be held in at least read mode.
bool uvm_is_valid_vma_range(struct mm_struct *mm, NvU64 start, NvU64 length);
typedef enum
{
UVM_API_RANGE_TYPE_MANAGED,
UVM_API_RANGE_TYPE_HMM,
UVM_API_RANGE_TYPE_ATS,
UVM_API_RANGE_TYPE_INVALID
} uvm_api_range_type_t;
// Check that the interval [base, base + length) is fully covered by UVM
// managed ranges (NV_OK is returned), or (if ATS is enabled and mm != NULL)
// fully covered by valid vmas (NV_WARN_NOTHING_TO_DO is returned), or (if HMM
// is enabled and mm != NULL) fully covered by valid vmas (NV_OK is returned).
// Any other input results in a return status of NV_ERR_INVALID_ADDRESS.
// If the interval [base, base + length) is fully covered by VMAs which all have
// the same uvm_api_range_type_t, that range type is returned.
//
// LOCKING: va_space->lock must be held in at least read mode. If mm != NULL,
// mm->mmap_lock must also be held in at least read mode.
NV_STATUS uvm_api_range_type_check(uvm_va_space_t *va_space, struct mm_struct *mm, NvU64 base, NvU64 length);
uvm_api_range_type_t uvm_api_range_type_check(uvm_va_space_t *va_space, struct mm_struct *mm, NvU64 base, NvU64 length);
NV_STATUS uvm_api_pageable_mem_access_on_gpu(UVM_PAGEABLE_MEM_ACCESS_ON_GPU_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_register_gpu(UVM_REGISTER_GPU_PARAMS *params, struct file *filp);

View File

@@ -44,6 +44,8 @@ void uvm_ats_init(const UvmPlatformInfo *platform_info)
void uvm_ats_init_va_space(uvm_va_space_t *va_space)
{
uvm_init_rwsem(&va_space->ats.lock, UVM_LOCK_ORDER_LEAF);
if (UVM_ATS_IBM_SUPPORTED())
uvm_ats_ibm_init_va_space(va_space);
}
@@ -57,6 +59,10 @@ NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
return uvm_ats_ibm_add_gpu(parent_gpu);
}
else if (UVM_ATS_SVA_SUPPORTED()) {
if (g_uvm_global.ats.enabled)
return uvm_ats_sva_add_gpu(parent_gpu);
}
return NV_OK;
}
@@ -71,6 +77,10 @@ void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu)
uvm_ats_ibm_remove_gpu(parent_gpu);
}
else if (UVM_ATS_SVA_SUPPORTED()) {
if (g_uvm_global.ats.enabled)
uvm_ats_sva_remove_gpu(parent_gpu);
}
}
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
@@ -87,6 +97,8 @@ NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
if (UVM_ATS_IBM_SUPPORTED())
status = uvm_ats_ibm_bind_gpu(gpu_va_space);
else if (UVM_ATS_SVA_SUPPORTED())
status = uvm_ats_sva_bind_gpu(gpu_va_space);
return status;
}
@@ -100,6 +112,8 @@ void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
if (UVM_ATS_IBM_SUPPORTED())
uvm_ats_ibm_unbind_gpu(gpu_va_space);
else if (UVM_ATS_SVA_SUPPORTED())
uvm_ats_sva_unbind_gpu(gpu_va_space);
}
NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
@@ -126,6 +140,8 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
if (UVM_ATS_IBM_SUPPORTED())
status = uvm_ats_ibm_register_gpu_va_space(gpu_va_space);
else if (UVM_ATS_SVA_SUPPORTED())
status = uvm_ats_sva_register_gpu_va_space(gpu_va_space);
if (status == NV_OK)
uvm_processor_mask_set(&va_space->ats.registered_gpu_va_spaces, gpu_id);
@@ -148,6 +164,8 @@ void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
if (UVM_ATS_IBM_SUPPORTED())
uvm_ats_ibm_unregister_gpu_va_space(gpu_va_space);
else if (UVM_ATS_SVA_SUPPORTED())
uvm_ats_sva_unregister_gpu_va_space(gpu_va_space);
uvm_va_space_down_write(va_space);
uvm_processor_mask_clear(&va_space->ats.registered_gpu_va_spaces, gpu_id);

View File

@@ -28,8 +28,11 @@
#include "uvm_forward_decl.h"
#include "uvm_ats_ibm.h"
#include "nv_uvm_types.h"
#include "uvm_lock.h"
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED())
#include "uvm_ats_sva.h"
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
typedef struct
{
@@ -37,10 +40,15 @@ typedef struct
// indexed by gpu->id. This mask is protected by the VA space lock.
uvm_processor_mask_t registered_gpu_va_spaces;
// Protects racing invalidates in the VA space while hmm_range_fault() is
// being called in ats_compute_residency_mask().
uvm_rw_semaphore_t lock;
union
{
uvm_ibm_va_space_t ibm;
uvm_sva_va_space_t sva;
};
} uvm_ats_va_space_t;
@@ -58,6 +66,7 @@ typedef struct
{
uvm_ibm_gpu_va_space_t ibm;
uvm_sva_gpu_va_space_t sva;
};
} uvm_ats_gpu_va_space_t;
@@ -90,6 +99,8 @@ void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu);
// LOCKING: mmap_lock must be lockable.
// VA space lock must be lockable.
// gpu_va_space->gpu must be retained.
// mm must be retained with uvm_va_space_mm_retain() iff
// UVM_ATS_SVA_SUPPORTED() is 1
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space);
// Decrements the refcount on the {gpu, mm} pair. Removes the binding from the

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2018 NVIDIA Corporation
Copyright (c) 2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -20,21 +20,46 @@
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_api.h"
#include "uvm_tools.h"
#include "uvm_va_range.h"
#include "uvm_ats.h"
#include "uvm_ats_faults.h"
#include "uvm_migrate_pageable.h"
#include <linux/nodemask.h>
#include <linux/mempolicy.h>
#include <linux/mmu_notifier.h>
static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
NvU64 fault_addr,
uvm_fault_access_type_t access_type)
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
#include <linux/hmm.h>
#endif
typedef enum
{
UVM_ATS_SERVICE_TYPE_FAULTS = 0,
UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS,
UVM_ATS_SERVICE_TYPE_COUNT
} uvm_ats_service_type_t;
static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 start,
size_t length,
uvm_fault_access_type_t access_type,
uvm_ats_service_type_t service_type,
uvm_ats_fault_context_t *ats_context)
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
struct mm_struct *mm = va_space->va_space_mm.mm;
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
NV_STATUS status;
NvU64 start;
NvU64 length;
NvU64 user_space_start;
NvU64 user_space_length;
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
bool fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
uvm_populate_permissions_t populate_permissions = fault_service_type ?
(write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY) :
UVM_POPULATE_PERMISSIONS_INHERIT;
// Request uvm_migrate_pageable() to touch the corresponding page after
// population.
@@ -43,17 +68,18 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
// 2) guest physical -> host physical
//
// The overall ATS translation will fault if either of those translations is
// invalid. The get_user_pages() call above handles translation #1, but not
// #2. We don't know if we're running as a guest, but in case we are we can
// force that translation to be valid by touching the guest physical address
// from the CPU. If the translation is not valid then the access will cause
// a hypervisor fault. Note that dma_map_page() can't establish mappings
// used by GPU ATS SVA translations. GPU accesses to host physical addresses
// obtained as a result of the address translation request uses the CPU
// address space instead of the IOMMU address space since the translated
// host physical address isn't necessarily an IOMMU address. The only way to
// establish guest physical to host physical mapping in the CPU address
// space is to touch the page from the CPU.
// invalid. The pin_user_pages() call within uvm_migrate_pageable() call
// below handles translation #1, but not #2. We don't know if we're running
// as a guest, but in case we are we can force that translation to be valid
// by touching the guest physical address from the CPU. If the translation
// is not valid then the access will cause a hypervisor fault. Note that
// dma_map_page() can't establish mappings used by GPU ATS SVA translations.
// GPU accesses to host physical addresses obtained as a result of the
// address translation request uses the CPU address space instead of the
// IOMMU address space since the translated host physical address isn't
// necessarily an IOMMU address. The only way to establish guest physical to
// host physical mapping in the CPU address space is to touch the page from
// the CPU.
//
// We assume that the hypervisor mappings are all VM_PFNMAP, VM_SHARED, and
// VM_WRITE, meaning that the mappings are all granted write access on any
@@ -64,23 +90,22 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
uvm_migrate_args_t uvm_migrate_args =
{
.va_space = va_space,
.mm = mm,
.start = fault_addr,
.length = PAGE_SIZE,
.dst_id = gpu_va_space->gpu->parent->id,
.dst_node_id = -1,
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
.touch = true,
.skip_mapped = true,
.user_space_start = &start,
.user_space_length = &length,
.va_space = va_space,
.mm = mm,
.dst_id = ats_context->residency_id,
.dst_node_id = ats_context->residency_node,
.start = start,
.length = length,
.populate_permissions = populate_permissions,
.touch = fault_service_type,
.skip_mapped = fault_service_type,
.populate_on_cpu_alloc_failures = fault_service_type,
.user_space_start = &user_space_start,
.user_space_length = &user_space_length,
};
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
// TODO: Bug 2103669: Service more than a single fault at a time
//
// We are trying to use migrate_vma API in the kernel (if it exists) to
// populate and map the faulting region on the GPU. We want to do this only
// on the first touch. That is, pages which are not already mapped. So, we
@@ -95,110 +120,475 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
return status;
}
NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
uvm_fault_buffer_entry_t *current_entry,
uvm_ats_fault_invalidate_t *ats_invalidate)
static void flush_tlb_va_region(uvm_gpu_va_space_t *gpu_va_space,
NvU64 addr,
size_t size,
uvm_fault_client_type_t client_type)
{
uvm_ats_fault_invalidate_t *ats_invalidate;
if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
else
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
if (!ats_invalidate->tlb_batch_pending) {
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->tlb_batch);
ats_invalidate->tlb_batch_pending = true;
}
uvm_tlb_batch_invalidate(&ats_invalidate->tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
}
static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
uvm_ats_fault_context_t *ats_context)
{
uvm_gpu_t *gpu = gpu_va_space->gpu;
int residency = uvm_gpu_numa_node(gpu);
#if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
struct mempolicy *vma_policy = vma_policy(vma);
unsigned short mode;
ats_context->prefetch_state.has_preferred_location = false;
// It's safe to read vma_policy since the mmap_lock is held in at least read
// mode in this path.
uvm_assert_mmap_lock_locked(vma->vm_mm);
if (!vma_policy)
goto done;
mode = vma_policy->mode;
if ((mode == MPOL_BIND)
#if defined(NV_MPOL_PREFERRED_MANY_PRESENT)
|| (mode == MPOL_PREFERRED_MANY)
#endif
|| (mode == MPOL_PREFERRED)) {
int home_node = NUMA_NO_NODE;
#if defined(NV_MEMPOLICY_HAS_HOME_NODE)
if ((mode != MPOL_PREFERRED) && (vma_policy->home_node != NUMA_NO_NODE))
home_node = vma_policy->home_node;
#endif
// Prefer home_node if set. Otherwise, prefer the faulting GPU if it's
// in the list of preferred nodes, else prefer the closest_cpu_numa_node
// to the GPU if closest_cpu_numa_node is in the list of preferred
// nodes. Fallback to the faulting GPU if all else fails.
if (home_node != NUMA_NO_NODE) {
residency = home_node;
}
else if (!node_isset(residency, vma_policy->nodes)) {
int closest_cpu_numa_node = gpu->parent->closest_cpu_numa_node;
if ((closest_cpu_numa_node != NUMA_NO_NODE) && node_isset(closest_cpu_numa_node, vma_policy->nodes))
residency = gpu->parent->closest_cpu_numa_node;
else
residency = first_node(vma_policy->nodes);
}
if (!nodes_empty(vma_policy->nodes))
ats_context->prefetch_state.has_preferred_location = true;
}
// Update gpu if residency is not the faulting gpu.
if (residency != uvm_gpu_numa_node(gpu))
gpu = uvm_va_space_find_gpu_with_memory_node_id(gpu_va_space->va_space, residency);
done:
#else
ats_context->prefetch_state.has_preferred_location = false;
#endif
ats_context->residency_id = gpu ? gpu->parent->id : UVM_ID_CPU;
ats_context->residency_node = residency;
}
static void get_range_in_vma(struct vm_area_struct *vma, NvU64 base, NvU64 *start, NvU64 *end)
{
*start = max(vma->vm_start, (unsigned long) base);
*end = min(vma->vm_end, (unsigned long) (base + UVM_VA_BLOCK_SIZE));
}
static uvm_page_index_t uvm_ats_cpu_page_index(NvU64 base, NvU64 addr)
{
UVM_ASSERT(addr >= base);
UVM_ASSERT(addr <= (base + UVM_VA_BLOCK_SIZE));
return (addr - base) / PAGE_SIZE;
}
// start and end must be aligned to PAGE_SIZE and must fall within
// [base, base + UVM_VA_BLOCK_SIZE]
static uvm_va_block_region_t uvm_ats_region_from_start_end(NvU64 start, NvU64 end)
{
// base can be greater than, less than or equal to the start of a VMA.
NvU64 base = UVM_VA_BLOCK_ALIGN_DOWN(start);
UVM_ASSERT(start < end);
UVM_ASSERT(PAGE_ALIGNED(start));
UVM_ASSERT(PAGE_ALIGNED(end));
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
return uvm_va_block_region(uvm_ats_cpu_page_index(base, start), uvm_ats_cpu_page_index(base, end));
}
static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma, NvU64 base)
{
NvU64 start;
NvU64 end;
get_range_in_vma(vma, base, &start, &end);
return uvm_ats_region_from_start_end(start, end);
}
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
{
uvm_ats_fault_context_t *ats_context = container_of(mni, uvm_ats_fault_context_t, prefetch_state.notifier);
uvm_va_space_t *va_space = ats_context->prefetch_state.va_space;
// The following write lock protects against concurrent invalidates while
// hmm_range_fault() is being called in ats_compute_residency_mask().
uvm_down_write(&va_space->ats.lock);
mmu_interval_set_seq(mni, cur_seq);
uvm_up_write(&va_space->ats.lock);
return true;
}
static bool uvm_ats_invalidate_notifier_entry(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
unsigned long cur_seq)
{
UVM_ENTRY_RET(uvm_ats_invalidate_notifier(mni, cur_seq));
}
static const struct mmu_interval_notifier_ops uvm_ats_notifier_ops =
{
.invalidate = uvm_ats_invalidate_notifier_entry,
};
#endif
static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_ats_fault_context_t *ats_context)
{
NvU64 gmmu_region_base;
bool in_gmmu_region;
NV_STATUS status = NV_OK;
uvm_fault_access_type_t service_access_type;
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
UVM_ASSERT(g_uvm_global.ats.enabled);
UVM_ASSERT(gpu_va_space->ats.enabled);
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
int ret;
NvU64 start;
NvU64 end;
struct hmm_range range;
uvm_page_index_t page_index;
uvm_va_block_region_t vma_region;
uvm_va_space_t *va_space = gpu_va_space->va_space;
struct mm_struct *mm = va_space->va_space_mm.mm;
UVM_ASSERT(current_entry->fault_access_type ==
uvm_fault_access_type_mask_highest(current_entry->access_type_mask));
uvm_assert_rwsem_locked_read(&va_space->lock);
service_access_type = current_entry->fault_access_type;
ats_context->prefetch_state.first_touch = true;
// ATS lookups are disabled on all addresses within the same
// UVM_GMMU_ATS_GRANULARITY as existing GMMU mappings (see documentation in
// uvm_mmu.h). User mode is supposed to reserve VAs as appropriate to
// prevent any system memory allocations from falling within the NO_ATS
// range of other GMMU mappings, so this shouldn't happen during normal
// operation. However, since this scenario may lead to infinite fault loops,
// we handle it by canceling the fault.
//
// TODO: Bug 2103669: Remove redundant VA range lookups
gmmu_region_base = UVM_ALIGN_DOWN(current_entry->fault_address, UVM_GMMU_ATS_GRANULARITY);
in_gmmu_region = !uvm_va_space_range_empty(current_entry->va_space,
gmmu_region_base,
gmmu_region_base + UVM_GMMU_ATS_GRANULARITY - 1);
if (in_gmmu_region) {
status = NV_ERR_INVALID_ADDRESS;
}
else {
// TODO: Bug 2103669: Service more than a single fault at a time
status = uvm_ats_service_fault(gpu_va_space, current_entry->fault_address, service_access_type);
uvm_page_mask_zero(residency_mask);
get_range_in_vma(vma, base, &start, &end);
vma_region = uvm_ats_region_from_start_end(start, end);
range.notifier = &ats_context->prefetch_state.notifier;
range.start = start;
range.end = end;
range.hmm_pfns = ats_context->prefetch_state.pfns;
range.default_flags = 0;
range.pfn_flags_mask = 0;
range.dev_private_owner = NULL;
ats_context->prefetch_state.va_space = va_space;
// mmu_interval_notifier_insert() will try to acquire mmap_lock for write
// and will deadlock since mmap_lock is already held for read in this path.
// This is prevented by calling __mmu_notifier_register() during va_space
// creation. See the comment in uvm_mmu_notifier_register() for more
// details.
ret = mmu_interval_notifier_insert(range.notifier, mm, start, end, &uvm_ats_notifier_ops);
if (ret)
return errno_to_nv_status(ret);
while (true) {
range.notifier_seq = mmu_interval_read_begin(range.notifier);
ret = hmm_range_fault(&range);
if (ret == -EBUSY)
continue;
if (ret) {
status = errno_to_nv_status(ret);
UVM_ASSERT(status != NV_OK);
break;
}
uvm_down_read(&va_space->ats.lock);
// Pages may have been freed or re-allocated after hmm_range_fault() is
// called. So the PTE might point to a different page or nothing. In the
// memory hot-unplug case it is not safe to call page_to_nid() on the
// page as the struct page itself may have been freed. To protect
// against these cases, uvm_ats_invalidate_entry() blocks on va_space
// ATS write lock for concurrent invalidates since va_space ATS lock is
// held for read in this path.
if (!mmu_interval_read_retry(range.notifier, range.notifier_seq))
break;
uvm_up_read(&va_space->ats.lock);
}
// Do not flag prefetch faults as fatal unless something fatal happened
if (status == NV_ERR_INVALID_ADDRESS) {
if (current_entry->fault_access_type != UVM_FAULT_ACCESS_TYPE_PREFETCH) {
current_entry->is_fatal = true;
current_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
if (status == NV_OK) {
for_each_va_block_page_in_region(page_index, vma_region) {
unsigned long pfn = ats_context->prefetch_state.pfns[page_index - vma_region.first];
// Compute cancel mode for replayable faults
if (current_entry->is_replayable) {
if (service_access_type == UVM_FAULT_ACCESS_TYPE_READ || in_gmmu_region)
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
else
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_WRITE_AND_ATOMIC;
if (pfn & HMM_PFN_VALID) {
struct page *page = hmm_pfn_to_page(pfn);
// If there are pending read accesses on the same page, we have to
// service them before we can cancel the write/atomic faults. So we
// retry with read fault access type.
if (!in_gmmu_region &&
current_entry->fault_access_type > UVM_FAULT_ACCESS_TYPE_READ &&
uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
status = uvm_ats_service_fault(gpu_va_space,
current_entry->fault_address,
UVM_FAULT_ACCESS_TYPE_READ);
if (page_to_nid(page) == ats_context->residency_node)
uvm_page_mask_set(residency_mask, page_index);
// If read accesses are also invalid, cancel the fault. If a
// different error code is returned, exit
if (status == NV_ERR_INVALID_ADDRESS)
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
else if (status != NV_OK)
return status;
}
ats_context->prefetch_state.first_touch = false;
}
}
else {
current_entry->is_invalid_prefetch = true;
}
// Do not fail overall fault servicing due to logical errors
status = NV_OK;
uvm_up_read(&va_space->ats.lock);
}
// The Linux kernel never invalidates TLB entries on mapping permission
// upgrade. This is a problem if the GPU has cached entries with the old
// permission. The GPU will re-fetch the entry if the PTE is invalid and
// page size is not 4K (this is the case on P9). However, if a page gets
// upgraded from R/O to R/W and GPU has the PTEs cached with R/O
// permissions we will enter an infinite loop because we just forward the
// fault to the Linux kernel and it will see that the permissions in the
// page table are correct. Therefore, we flush TLB entries on ATS write
// faults.
if (!current_entry->is_fatal && current_entry->fault_access_type > UVM_FAULT_ACCESS_TYPE_READ) {
if (!ats_invalidate->write_faults_in_batch) {
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->write_faults_tlb_batch);
ats_invalidate->write_faults_in_batch = true;
}
mmu_interval_notifier_remove(range.notifier);
uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch,
current_entry->fault_address,
PAGE_SIZE,
PAGE_SIZE,
UVM_MEMBAR_NONE);
#else
uvm_page_mask_zero(residency_mask);
#endif
return status;
}
static void ats_compute_prefetch_mask(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
uvm_ats_fault_context_t *ats_context,
uvm_va_block_region_t max_prefetch_region)
{
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;
if (uvm_page_mask_empty(accessed_mask))
return;
uvm_perf_prefetch_compute_ats(gpu_va_space->va_space,
accessed_mask,
uvm_va_block_region_from_mask(NULL, accessed_mask),
max_prefetch_region,
residency_mask,
bitmap_tree,
prefetch_mask);
}
static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_ats_service_type_t service_type,
uvm_ats_fault_context_t *ats_context)
{
NV_STATUS status;
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
// Residency mask needs to be computed even if prefetching is disabled since
// the residency information is also needed by access counters servicing in
// uvm_ats_service_access_counters()
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
if (status != NV_OK)
return status;
if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
return status;
if (uvm_page_mask_empty(accessed_mask))
return status;
// Prefetch the entire region if none of the pages are resident on any node
// and if preferred_location is the faulting GPU.
if (ats_context->prefetch_state.has_preferred_location &&
(ats_context->prefetch_state.first_touch || (service_type == UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS)) &&
uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->id)) {
uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
}
else {
ats_compute_prefetch_mask(gpu_va_space, vma, ats_context, max_prefetch_region);
}
if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS) {
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
if (vma->vm_flags & VM_WRITE)
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
}
else {
uvm_page_mask_or(accessed_mask, accessed_mask, prefetch_mask);
}
return status;
}
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_ats_fault_context_t *ats_context)
{
NV_STATUS status = NV_OK;
uvm_va_block_region_t subregion;
uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
uvm_fault_client_type_t client_type = ats_context->client_type;
uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_FAULTS;
UVM_ASSERT(vma);
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
UVM_ASSERT(g_uvm_global.ats.enabled);
UVM_ASSERT(gpu_va_space);
UVM_ASSERT(gpu_va_space->ats.enabled);
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
uvm_assert_mmap_lock_locked(vma->vm_mm);
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
uvm_page_mask_zero(faults_serviced_mask);
uvm_page_mask_zero(reads_serviced_mask);
if (!(vma->vm_flags & VM_READ))
return status;
if (!(vma->vm_flags & VM_WRITE)) {
// If VMA doesn't have write permissions, all write faults are fatal.
// Try servicing such faults for read iff they are also present in
// read_fault_mask. This is because for replayable faults, if there are
// pending read accesses on the same page, we have to service them
// before we can cancel the write/atomic faults. So we try with read
// fault access type even though these write faults are fatal.
if (ats_context->client_type == UVM_FAULT_CLIENT_TYPE_GPC)
uvm_page_mask_and(write_fault_mask, write_fault_mask, read_fault_mask);
else
uvm_page_mask_zero(write_fault_mask);
// There are no pending faults beyond write faults to RO region.
if (uvm_page_mask_empty(read_fault_mask))
return status;
}
ats_batch_select_residency(gpu_va_space, vma, ats_context);
ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
NvU64 start = base + (subregion.first * PAGE_SIZE);
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
uvm_fault_access_type_t access_type = (vma->vm_flags & VM_WRITE) ?
UVM_FAULT_ACCESS_TYPE_WRITE :
UVM_FAULT_ACCESS_TYPE_READ;
UVM_ASSERT(start >= vma->vm_start);
UVM_ASSERT((start + length) <= vma->vm_end);
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
if (status != NV_OK)
return status;
if (vma->vm_flags & VM_WRITE) {
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
uvm_ats_smmu_invalidate_tlbs(gpu_va_space, start, length);
// The Linux kernel never invalidates TLB entries on mapping
// permission upgrade. This is a problem if the GPU has cached
// entries with the old permission. The GPU will re-fetch the entry
// if the PTE is invalid and page size is not 4K (this is the case
// on P9). However, if a page gets upgraded from R/O to R/W and GPU
// has the PTEs cached with R/O permissions we will enter an
// infinite loop because we just forward the fault to the Linux
// kernel and it will see that the permissions in the page table are
// correct. Therefore, we flush TLB entries on ATS write faults.
flush_tlb_va_region(gpu_va_space, start, length, client_type);
}
else {
uvm_page_mask_region_fill(reads_serviced_mask, subregion);
}
}
// Remove write faults from read_fault_mask
uvm_page_mask_andnot(read_fault_mask, read_fault_mask, write_fault_mask);
for_each_va_block_subregion_in_mask(subregion, read_fault_mask, region) {
NvU64 start = base + (subregion.first * PAGE_SIZE);
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_READ;
UVM_ASSERT(start >= vma->vm_start);
UVM_ASSERT((start + length) <= vma->vm_end);
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
if (status != NV_OK)
return status;
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
// Similarly to permission upgrade scenario, discussed above, GPU
// will not re-fetch the entry if the PTE is invalid and page size
// is 4K. To avoid infinite faulting loop, invalidate TLB for every
// new translation written explicitly like in the case of permission
// upgrade.
if (PAGE_SIZE == UVM_PAGE_SIZE_4K)
flush_tlb_va_region(gpu_va_space, start, length, client_type);
}
return status;
}
bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next)
{
uvm_va_range_t *prev;
NvU64 gmmu_region_base = UVM_ALIGN_DOWN(address, UVM_GMMU_ATS_GRANULARITY);
UVM_ASSERT(va_space);
if (next) {
if (next->node.start <= gmmu_region_base + UVM_GMMU_ATS_GRANULARITY - 1)
return true;
prev = uvm_va_range_container(uvm_range_tree_prev(&va_space->va_range_tree, &next->node));
}
else {
// No VA range exists after address, so check the last VA range in the
// tree.
prev = uvm_va_range_container(uvm_range_tree_last(&va_space->va_range_tree));
}
return prev && (prev->node.end >= gmmu_region_base);
}
NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
uvm_ats_fault_invalidate_t *ats_invalidate,
uvm_tracker_t *out_tracker)
@@ -206,7 +596,7 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
NV_STATUS status;
uvm_push_t push;
if (!ats_invalidate->write_faults_in_batch)
if (!ats_invalidate->tlb_batch_pending)
return NV_OK;
UVM_ASSERT(gpu_va_space);
@@ -218,7 +608,7 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
"Invalidate ATS entries");
if (status == NV_OK) {
uvm_tlb_batch_end(&ats_invalidate->write_faults_tlb_batch, &push, UVM_MEMBAR_NONE);
uvm_tlb_batch_end(&ats_invalidate->tlb_batch, &push, UVM_MEMBAR_NONE);
uvm_push_end(&push);
// Add this push to the GPU's tracker so that fault replays/clears can
@@ -226,7 +616,57 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
status = uvm_tracker_add_push_safe(out_tracker, &push);
}
ats_invalidate->write_faults_in_batch = false;
ats_invalidate->tlb_batch_pending = false;
return status;
}
NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_ats_fault_context_t *ats_context)
{
uvm_va_block_region_t subregion;
uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS;
UVM_ASSERT(vma);
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
UVM_ASSERT(g_uvm_global.ats.enabled);
UVM_ASSERT(gpu_va_space);
UVM_ASSERT(gpu_va_space->ats.enabled);
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
uvm_assert_mmap_lock_locked(vma->vm_mm);
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
ats_batch_select_residency(gpu_va_space, vma, ats_context);
// Ignoring the return value of ats_compute_prefetch is ok since prefetching
// is just an optimization and servicing access counter migrations is still
// worthwhile even without any prefetching added. So, let servicing continue
// instead of returning early even if the prefetch computation fails.
ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
// Remove pages which are already resident at the intended destination from
// the accessed_mask.
uvm_page_mask_andnot(&ats_context->accessed_mask,
&ats_context->accessed_mask,
&ats_context->prefetch_state.residency_mask);
for_each_va_block_subregion_in_mask(subregion, &ats_context->accessed_mask, region) {
NV_STATUS status;
NvU64 start = base + (subregion.first * PAGE_SIZE);
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_COUNT;
UVM_ASSERT(start >= vma->vm_start);
UVM_ASSERT((start + length) <= vma->vm_end);
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
if (status != NV_OK)
return status;
}
return NV_OK;
}

View File

@@ -25,13 +25,54 @@
#include "uvm_lock.h"
#include "uvm_global.h"
#include "uvm_va_space.h"
#include "uvm_gpu.h"
NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
uvm_fault_buffer_entry_t *current_entry,
uvm_ats_fault_invalidate_t *ats_invalidate);
// Service ATS faults in the range (base, base + UVM_VA_BLOCK_SIZE) with service
// type for individual pages in the range requested by page masks set in
// ats_context->read_fault_mask/write_fault_mask. base must be aligned to
// UVM_VA_BLOCK_SIZE. The caller is responsible for ensuring that faulting
// addresses fall completely within the VMA. The caller is also responsible for
// ensuring that the faulting addresses don't overlap a GMMU region. (See
// uvm_ats_check_in_gmmu_region). The caller is also responsible for handling
// any errors returned by this function (fault cancellations etc.).
//
// Returns the fault service status in ats_context->faults_serviced_mask. In
// addition, ats_context->reads_serviced_mask returns whether read servicing
// worked on write faults iff the read service was also requested in the
// corresponding bit in read_fault_mask. These returned masks are only valid if
// the return status is NV_OK. Status other than NV_OK indicate system global
// fault servicing failures.
//
// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
// lock.
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_ats_fault_context_t *ats_context);
// Service access counter notifications on ATS regions in the range (base, base
// + UVM_VA_BLOCK_SIZE) for individual pages in the range requested by page_mask
// set in ats_context->accessed_mask. base must be aligned to UVM_VA_BLOCK_SIZE.
// The caller is responsible for ensuring that the addresses in the
// accessed_mask is completely covered by the VMA. The caller is also
// responsible for handling any errors returned by this function.
//
// Returns NV_OK if servicing was successful. Any other error indicates an error
// while servicing the range.
//
// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
// lock.
NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_ats_fault_context_t *ats_context);
// Return whether there are any VA ranges (and thus GMMU mappings) within the
// UVM_GMMU_ATS_GRANULARITY-aligned region containing address.
bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next);
// This function performs pending TLB invalidations for ATS and clears the
// ats_invalidate->write_faults_in_batch flag
// ats_invalidate->tlb_batch_pending flag
NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
uvm_ats_fault_invalidate_t *ats_invalidate,
uvm_tracker_t *out_tracker);

View File

@@ -0,0 +1,427 @@
/*******************************************************************************
Copyright (c) 2018-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_ats_sva.h"
#if UVM_ATS_SVA_SUPPORTED()
#include "uvm_gpu.h"
#include "uvm_va_space.h"
#include "uvm_va_space_mm.h"
#include <asm/io.h>
#include <linux/log2.h>
#include <linux/iommu.h>
#include <linux/mm_types.h>
#include <linux/acpi.h>
#include <linux/device.h>
#include <linux/mmu_context.h>
// linux/sched/mm.h is needed for mmget_not_zero and mmput to get the mm
// reference required for the iommu_sva_bind_device() call. This header is not
// present in all the supported versions. Instead of adding a conftest just for
// this header file, use UVM_ATS_SVA_SUPPORTED().
#include <linux/sched/mm.h>
// iommu_sva_bind_device() removed drvdata paramter with commit
// 942fd5435dccb273f90176b046ae6bbba60cfbd8 (10/31/2022).
#if defined(NV_IOMMU_SVA_BIND_DEVICE_HAS_DRVDATA_ARG)
#define UVM_IOMMU_SVA_BIND_DEVICE(dev, mm) iommu_sva_bind_device(dev, mm, NULL)
#else
#define UVM_IOMMU_SVA_BIND_DEVICE(dev, mm) iommu_sva_bind_device(dev, mm)
#endif
// Type to represent a 128-bit SMMU command queue command.
struct smmu_cmd {
NvU64 low;
NvU64 high;
};
// Base address of SMMU CMDQ-V for GSMMU0.
#define SMMU_CMDQV_BASE_ADDR(smmu_base) (smmu_base + 0x200000)
#define SMMU_CMDQV_BASE_LEN 0x00830000
// CMDQV configuration is done by firmware but we check status here.
#define SMMU_CMDQV_CONFIG 0x0
#define SMMU_CMDQV_CONFIG_CMDQV_EN BIT(0)
// Used to map a particular VCMDQ to a VINTF.
#define SMMU_CMDQV_CMDQ_ALLOC_MAP(vcmdq_id) (0x200 + 0x4 * (vcmdq_id))
#define SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC BIT(0)
// Shift for the field containing the index of the virtual interface
// owning the VCMDQ.
#define SMMU_CMDQV_CMDQ_ALLOC_MAP_VIRT_INTF_INDX_SHIFT 15
// Base address for the VINTF registers.
#define SMMU_VINTF_BASE_ADDR(cmdqv_base_addr, vintf_id) (cmdqv_base_addr + 0x1000 + 0x100 * (vintf_id))
// Virtual interface (VINTF) configuration registers. The WAR only
// works on baremetal so we need to configure ourselves as the
// hypervisor owner.
#define SMMU_VINTF_CONFIG 0x0
#define SMMU_VINTF_CONFIG_ENABLE BIT(0)
#define SMMU_VINTF_CONFIG_HYP_OWN BIT(17)
#define SMMU_VINTF_STATUS 0x0
#define SMMU_VINTF_STATUS_ENABLED BIT(0)
// Caclulates the base address for a particular VCMDQ instance.
#define SMMU_VCMDQ_BASE_ADDR(cmdqv_base_addr, vcmdq_id) (cmdqv_base_addr + 0x10000 + 0x80 * (vcmdq_id))
// SMMU command queue consumer index register. Updated by SMMU
// when commands are consumed.
#define SMMU_VCMDQ_CONS 0x0
// SMMU command queue producer index register. Updated by UVM when
// commands are added to the queue.
#define SMMU_VCMDQ_PROD 0x4
// Configuration register used to enable a VCMDQ.
#define SMMU_VCMDQ_CONFIG 0x8
#define SMMU_VCMDQ_CONFIG_ENABLE BIT(0)
// Status register used to check the VCMDQ is enabled.
#define SMMU_VCMDQ_STATUS 0xc
#define SMMU_VCMDQ_STATUS_ENABLED BIT(0)
// Base address offset for the VCMDQ registers.
#define SMMU_VCMDQ_CMDQ_BASE 0x10000
// Size of the command queue. Each command is 16 bytes and we can't
// have a command queue greater than one page in size.
#define SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE (PAGE_SHIFT - ilog2(sizeof(struct smmu_cmd)))
#define SMMU_VCMDQ_CMDQ_ENTRIES (1UL << SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE)
// We always use VINTF63 for the WAR
#define VINTF 63
static void smmu_vintf_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
{
iowrite32(val, SMMU_VINTF_BASE_ADDR(smmu_cmdqv_base, VINTF) + reg);
}
static NvU32 smmu_vintf_read32(void __iomem *smmu_cmdqv_base, int reg)
{
return ioread32(SMMU_VINTF_BASE_ADDR(smmu_cmdqv_base, VINTF) + reg);
}
// We always use VCMDQ127 for the WAR
#define VCMDQ 127
void smmu_vcmdq_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
{
iowrite32(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
}
NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg)
{
return ioread32(SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
}
static void smmu_vcmdq_write64(void __iomem *smmu_cmdqv_base, int reg, NvU64 val)
{
#if NV_IS_EXPORT_SYMBOL_PRESENT___iowrite64_lo_hi
__iowrite64_lo_hi(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
#else
iowrite64(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
#endif
}
// Fix for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU
// TLB invalidates on read-only to read-write upgrades
static NV_STATUS uvm_ats_smmu_war_init(uvm_parent_gpu_t *parent_gpu)
{
uvm_spin_loop_t spin;
NV_STATUS status;
unsigned long cmdqv_config;
void __iomem *smmu_cmdqv_base;
struct acpi_iort_node *node;
struct acpi_iort_smmu_v3 *iort_smmu;
node = *(struct acpi_iort_node **) dev_get_platdata(parent_gpu->pci_dev->dev.iommu->iommu_dev->dev->parent);
iort_smmu = (struct acpi_iort_smmu_v3 *) node->node_data;
smmu_cmdqv_base = ioremap(SMMU_CMDQV_BASE_ADDR(iort_smmu->base_address), SMMU_CMDQV_BASE_LEN);
if (!smmu_cmdqv_base)
return NV_ERR_NO_MEMORY;
parent_gpu->smmu_war.smmu_cmdqv_base = smmu_cmdqv_base;
cmdqv_config = ioread32(smmu_cmdqv_base + SMMU_CMDQV_CONFIG);
if (!(cmdqv_config & SMMU_CMDQV_CONFIG_CMDQV_EN)) {
status = NV_ERR_OBJECT_NOT_FOUND;
goto out;
}
// Allocate SMMU CMDQ pages for WAR
parent_gpu->smmu_war.smmu_cmdq = alloc_page(NV_UVM_GFP_FLAGS | __GFP_ZERO);
if (!parent_gpu->smmu_war.smmu_cmdq) {
status = NV_ERR_NO_MEMORY;
goto out;
}
// Initialise VINTF for the WAR
smmu_vintf_write32(smmu_cmdqv_base, SMMU_VINTF_CONFIG, SMMU_VINTF_CONFIG_ENABLE | SMMU_VINTF_CONFIG_HYP_OWN);
UVM_SPIN_WHILE(!(smmu_vintf_read32(smmu_cmdqv_base, SMMU_VINTF_STATUS) & SMMU_VINTF_STATUS_ENABLED), &spin);
// Allocate VCMDQ to VINTF
iowrite32((VINTF << SMMU_CMDQV_CMDQ_ALLOC_MAP_VIRT_INTF_INDX_SHIFT) | SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC,
smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
smmu_vcmdq_write64(smmu_cmdqv_base, SMMU_VCMDQ_CMDQ_BASE,
page_to_phys(parent_gpu->smmu_war.smmu_cmdq) | SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE);
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONS, 0);
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_PROD, 0);
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONFIG, SMMU_VCMDQ_CONFIG_ENABLE);
UVM_SPIN_WHILE(!(smmu_vcmdq_read32(smmu_cmdqv_base, SMMU_VCMDQ_STATUS) & SMMU_VCMDQ_STATUS_ENABLED), &spin);
uvm_mutex_init(&parent_gpu->smmu_war.smmu_lock, UVM_LOCK_ORDER_LEAF);
parent_gpu->smmu_war.smmu_prod = 0;
parent_gpu->smmu_war.smmu_cons = 0;
return NV_OK;
out:
iounmap(parent_gpu->smmu_war.smmu_cmdqv_base);
parent_gpu->smmu_war.smmu_cmdqv_base = NULL;
return status;
}
static void uvm_ats_smmu_war_deinit(uvm_parent_gpu_t *parent_gpu)
{
void __iomem *smmu_cmdqv_base = parent_gpu->smmu_war.smmu_cmdqv_base;
NvU32 cmdq_alloc_map;
if (parent_gpu->smmu_war.smmu_cmdqv_base) {
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONFIG, 0);
cmdq_alloc_map = ioread32(smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
iowrite32(cmdq_alloc_map & SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC, smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
smmu_vintf_write32(smmu_cmdqv_base, SMMU_VINTF_CONFIG, 0);
}
if (parent_gpu->smmu_war.smmu_cmdq)
__free_page(parent_gpu->smmu_war.smmu_cmdq);
if (parent_gpu->smmu_war.smmu_cmdqv_base)
iounmap(parent_gpu->smmu_war.smmu_cmdqv_base);
}
// The SMMU on ARM64 can run under different translation regimes depending on
// what features the OS and CPU variant support. The CPU for GH180 supports
// virtualisation extensions and starts the kernel at EL2 meaning SMMU operates
// under the NS-EL2-E2H translation regime. Therefore we need to use the
// TLBI_EL2_* commands which invalidate TLB entries created under this
// translation regime.
#define CMDQ_OP_TLBI_EL2_ASID 0x21;
#define CMDQ_OP_TLBI_EL2_VA 0x22;
#define CMDQ_OP_CMD_SYNC 0x46
// Use the same maximum as used for MAX_TLBI_OPS in the upstream
// kernel.
#define UVM_MAX_TLBI_OPS (1UL << (PAGE_SHIFT - 3))
#if UVM_ATS_SMMU_WAR_REQUIRED()
void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size)
{
struct mm_struct *mm = gpu_va_space->va_space->va_space_mm.mm;
uvm_parent_gpu_t *parent_gpu = gpu_va_space->gpu->parent;
struct {
NvU64 low;
NvU64 high;
} *vcmdq;
unsigned long vcmdq_prod;
NvU64 end;
uvm_spin_loop_t spin;
NvU16 asid;
if (!parent_gpu->smmu_war.smmu_cmdqv_base)
return;
asid = arm64_mm_context_get(mm);
vcmdq = kmap(parent_gpu->smmu_war.smmu_cmdq);
uvm_mutex_lock(&parent_gpu->smmu_war.smmu_lock);
vcmdq_prod = parent_gpu->smmu_war.smmu_prod;
// Our queue management is very simple. The mutex prevents multiple
// producers writing to the queue and all our commands require waiting for
// the queue to drain so we know it's empty. If we can't fit enough commands
// in the queue we just invalidate the whole ASID.
//
// The command queue is a cirular buffer with the MSB representing a wrap
// bit that must toggle on each wrap. See the SMMU architecture
// specification for more details.
//
// SMMU_VCMDQ_CMDQ_ENTRIES - 1 because we need to leave space for the
// CMD_SYNC.
if ((size >> PAGE_SHIFT) > min(UVM_MAX_TLBI_OPS, SMMU_VCMDQ_CMDQ_ENTRIES - 1)) {
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low = CMDQ_OP_TLBI_EL2_ASID;
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low |= (NvU64) asid << 48;
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].high = 0;
vcmdq_prod++;
}
else {
for (end = addr + size; addr < end; addr += PAGE_SIZE) {
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low = CMDQ_OP_TLBI_EL2_VA;
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low |= (NvU64) asid << 48;
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].high = addr & ~((1UL << 12) - 1);
vcmdq_prod++;
}
}
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low = CMDQ_OP_CMD_SYNC;
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].high = 0x0;
vcmdq_prod++;
// MSB is the wrap bit
vcmdq_prod &= (1UL << (SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE + 1)) - 1;
parent_gpu->smmu_war.smmu_prod = vcmdq_prod;
smmu_vcmdq_write32(parent_gpu->smmu_war.smmu_cmdqv_base, SMMU_VCMDQ_PROD, parent_gpu->smmu_war.smmu_prod);
UVM_SPIN_WHILE(
(smmu_vcmdq_read32(parent_gpu->smmu_war.smmu_cmdqv_base, SMMU_VCMDQ_CONS) & GENMASK(19, 0)) != vcmdq_prod,
&spin);
uvm_mutex_unlock(&parent_gpu->smmu_war.smmu_lock);
kunmap(parent_gpu->smmu_war.smmu_cmdq);
arm64_mm_context_put(mm);
}
#endif
NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
{
#if NV_IS_EXPORT_SYMBOL_GPL_iommu_dev_enable_feature
int ret;
ret = iommu_dev_enable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
if (ret)
return errno_to_nv_status(ret);
#endif
if (UVM_ATS_SMMU_WAR_REQUIRED())
return uvm_ats_smmu_war_init(parent_gpu);
else
return NV_OK;
}
void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu)
{
if (UVM_ATS_SMMU_WAR_REQUIRED())
uvm_ats_smmu_war_deinit(parent_gpu);
#if NV_IS_EXPORT_SYMBOL_GPL_iommu_dev_disable_feature
iommu_dev_disable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
#endif
}
NV_STATUS uvm_ats_sva_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
{
NV_STATUS status = NV_OK;
struct iommu_sva *iommu_handle;
struct pci_dev *pci_dev = gpu_va_space->gpu->parent->pci_dev;
uvm_sva_gpu_va_space_t *sva_gpu_va_space = &gpu_va_space->ats.sva;
struct mm_struct *mm = gpu_va_space->va_space->va_space_mm.mm;
UVM_ASSERT(gpu_va_space->ats.enabled);
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_INIT);
UVM_ASSERT(mm);
// The mmput() below may trigger the kernel's mm teardown with exit_mmap()
// and uvm_va_space_mm_shutdown() and uvm_vm_close_managed() in that path
// will try to grab the va_space lock and deadlock if va_space was already
// locked.
uvm_assert_unlocked_order(UVM_LOCK_ORDER_VA_SPACE);
// iommu_sva_bind_device() requires the mm reference to be acquired. Since
// the mm is already retained, mm is still valid but may be inactive since
// mm_users can still be zero since UVM doesn't use mm_users and maintains a
// separate refcount (retained_count) for the mm in va_space_mm. See the
// block comment in va_space_mm.c for more details. So, return an error if
// mm_users is zero.
if (!mmget_not_zero(mm))
return NV_ERR_PAGE_TABLE_NOT_AVAIL;
// Multiple calls for the {same pci_dev, mm} pair are refcounted by the ARM
// SMMU Layer.
iommu_handle = UVM_IOMMU_SVA_BIND_DEVICE(&pci_dev->dev, mm);
if (IS_ERR(iommu_handle)) {
status = errno_to_nv_status(PTR_ERR(iommu_handle));
goto out;
}
// If this is not the first bind of the gpu in the mm, then the previously
// stored iommu_handle in the gpu_va_space must match the handle returned by
// iommu_sva_bind_device().
if (sva_gpu_va_space->iommu_handle) {
UVM_ASSERT(sva_gpu_va_space->iommu_handle == iommu_handle);
nv_kref_get(&sva_gpu_va_space->kref);
}
else {
sva_gpu_va_space->iommu_handle = iommu_handle;
nv_kref_init(&sva_gpu_va_space->kref);
}
out:
mmput(mm);
return status;
}
static void uvm_sva_reset_iommu_handle(nv_kref_t *nv_kref)
{
uvm_sva_gpu_va_space_t *sva_gpu_va_space = container_of(nv_kref, uvm_sva_gpu_va_space_t, kref);
sva_gpu_va_space->iommu_handle = NULL;
}
void uvm_ats_sva_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
{
uvm_sva_gpu_va_space_t *sva_gpu_va_space = &gpu_va_space->ats.sva;
// ARM SMMU layer decrements the refcount for the {pci_dev, mm} pair.
// The actual unbind happens only when the refcount reaches zero.
if (sva_gpu_va_space->iommu_handle) {
iommu_sva_unbind_device(sva_gpu_va_space->iommu_handle);
nv_kref_put(&sva_gpu_va_space->kref, uvm_sva_reset_iommu_handle);
}
}
NV_STATUS uvm_ats_sva_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
{
NvU32 pasid;
NV_STATUS status = NV_OK;
uvm_sva_gpu_va_space_t *sva_gpu_va_space = &gpu_va_space->ats.sva;
// A successful iommu_sva_bind_device() should have preceded this call.
UVM_ASSERT(sva_gpu_va_space->iommu_handle);
pasid = iommu_sva_get_pasid(sva_gpu_va_space->iommu_handle);
if (pasid == IOMMU_PASID_INVALID)
status = errno_to_nv_status(ENODEV);
else
gpu_va_space->ats.pasid = pasid;
return status;
}
void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
{
gpu_va_space->ats.pasid = -1U;
}
#endif // UVM_ATS_SVA_SUPPORTED()

View File

@@ -0,0 +1,143 @@
/*******************************************************************************
Copyright (c) 2018-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_ATS_SVA_H__
#define __UVM_ATS_SVA_H__
#include "uvm_gpu.h"
#include "uvm_forward_decl.h"
#include <linux/iommu.h>
// For ATS support on aarch64, arm_smmu_sva_bind() is needed for
// iommu_sva_bind_device() calls. Unfortunately, arm_smmu_sva_bind() is not
// conftest-able. We instead look for the presence of ioasid_get() or
// mm_pasid_drop(). ioasid_get() was added in the same patch series as
// arm_smmu_sva_bind() and removed in v6.0. mm_pasid_drop() was added in the
// same patch as the removal of ioasid_get(). We assume the presence of
// arm_smmu_sva_bind() if ioasid_get(v5.11 - v5.17) or mm_pasid_drop(v5.18+) is
// present.
//
// arm_smmu_sva_bind() was added with commit
// 32784a9562fb0518b12e9797ee2aec52214adf6f and ioasid_get() was added with
// commit cb4789b0d19ff231ce9f73376a023341300aed96 (11/23/2020). Commit
// 701fac40384f07197b106136012804c3cae0b3de (02/15/2022) removed ioasid_get()
// and added mm_pasid_drop().
#if UVM_CAN_USE_MMU_NOTIFIERS() && (defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_DROP_PRESENT))
#if defined(CONFIG_IOMMU_SVA)
#define UVM_ATS_SVA_SUPPORTED() 1
#else
#define UVM_ATS_SVA_SUPPORTED() 0
#endif
#else
#define UVM_ATS_SVA_SUPPORTED() 0
#endif
// If NV_ARCH_INVALIDATE_SECONDARY_TLBS is defined it means the upstream fix is
// in place so no need for the WAR from Bug 4130089: [GH180][r535] WAR for
// kernel not issuing SMMU TLB invalidates on read-only
#if defined(NV_ARCH_INVALIDATE_SECONDARY_TLBS)
#define UVM_ATS_SMMU_WAR_REQUIRED() 0
#elif NVCPU_IS_AARCH64
#define UVM_ATS_SMMU_WAR_REQUIRED() 1
#else
#define UVM_ATS_SMMU_WAR_REQUIRED() 0
#endif
typedef struct
{
int placeholder;
} uvm_sva_va_space_t;
typedef struct
{
// Reference count for the iommu_handle
nv_kref_t kref;
struct iommu_sva *iommu_handle;
} uvm_sva_gpu_va_space_t;
#if UVM_ATS_SVA_SUPPORTED()
NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu);
void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu);
// LOCKING: mmap_lock must be lockable
// VA space lock must not be held.
NV_STATUS uvm_ats_sva_bind_gpu(uvm_gpu_va_space_t *gpu_va_space);
// LOCKING: VA space lock must not be held.
void uvm_ats_sva_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space);
// LOCKING: None
NV_STATUS uvm_ats_sva_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
// LOCKING: None
void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
// Fix for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU
// TLB invalidates on read-only to read-write upgrades
#if UVM_ATS_SMMU_WAR_REQUIRED()
void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size);
#else
static void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size)
{
}
#endif
#else
static NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
{
return NV_OK;
}
static void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu)
{
}
static NV_STATUS uvm_ats_sva_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
{
return NV_OK;
}
static void uvm_ats_sva_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
{
}
static NV_STATUS uvm_ats_sva_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
{
return NV_OK;
}
static void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
{
}
static void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size)
{
}
#endif // UVM_ATS_SVA_SUPPORTED
#endif // __UVM_ATS_SVA_H__

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -24,12 +24,14 @@
#include "uvm_channel.h"
#include "uvm_global.h"
#include "uvm_hal.h"
#include "uvm_kvmalloc.h"
#include "uvm_push.h"
#include "uvm_test.h"
#include "uvm_tracker.h"
#include "uvm_va_space.h"
#include "uvm_rm_mem.h"
#include "uvm_mem.h"
#include "uvm_gpu.h"
#define CE_TEST_MEM_SIZE (2 * 1024 * 1024)
#define CE_TEST_MEM_END_SIZE 32
@@ -52,6 +54,11 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
uvm_push_t push;
bool is_proxy;
// TODO: Bug 3839176: the test is waived on Confidential Computing because
// it assumes that GPU can access system memory without using encryption.
if (uvm_conf_computing_mode_enabled(gpu))
return NV_OK;
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, CE_TEST_MEM_SIZE, 0, &host_mem);
TEST_CHECK_GOTO(status == NV_OK, done);
host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
@@ -66,7 +73,7 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
TEST_CHECK_GOTO(status == NV_OK, done);
is_proxy = uvm_channel_is_proxy(push.channel);
host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, is_proxy);
host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, is_proxy).address;
// All of the following CE transfers are done from a single (L)CE and
// disabling pipelining is enough to order them when needed. Only push_end
@@ -74,7 +81,7 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
// Initialize to a bad value
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy);
mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy).address;
uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
@@ -83,7 +90,7 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
// Set the first buffer to 1
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[0], gpu, is_proxy);
mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[0], gpu, is_proxy).address;
gpu->parent->ce_hal->memset_v_4(&push, mem_gpu_va, 1, CE_TEST_MEM_SIZE);
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
@@ -91,9 +98,9 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
if (dst == CE_TEST_MEM_COUNT)
dst_va = host_mem_gpu_va;
else
dst_va = uvm_rm_mem_get_gpu_va(mem[dst], gpu, is_proxy);
dst_va = uvm_rm_mem_get_gpu_va(mem[dst], gpu, is_proxy).address;
src_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy);
src_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy).address;
// The first memcpy needs to be non-pipelined as otherwise the previous
// memset/memcpy to the source may not be done yet.
@@ -167,6 +174,11 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
uvm_push_t push;
NvU32 value;
// TODO: Bug 3839176: the test is waived on Confidential Computing because
// it assumes that GPU can access system memory without using encryption.
if (uvm_conf_computing_mode_enabled(gpu))
return NV_OK;
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(NvU32), 0, &host_mem);
TEST_CHECK_GOTO(status == NV_OK, done);
host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
@@ -175,11 +187,11 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Membar test");
TEST_CHECK_GOTO(status == NV_OK, done);
host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, uvm_channel_is_proxy(push.channel));
host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, uvm_channel_is_proxy(push.channel)).address;
for (i = 0; i < REDUCTIONS; ++i) {
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->semaphore_reduction_inc(&push, host_mem_gpu_va, REDUCTIONS + 1);
gpu->parent->ce_hal->semaphore_reduction_inc(&push, host_mem_gpu_va, REDUCTIONS);
}
// Without a sys membar the channel tracking semaphore can and does complete
@@ -333,6 +345,16 @@ static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
return NV_ERR_INVALID_STATE;
}
// If physical accesses aren't supported, silently convert to virtual to
// test the flat mapping.
TEST_CHECK_RET(gpu_verif_addr.is_virtual);
if (!src.is_virtual)
src = uvm_gpu_address_copy(gpu, uvm_gpu_phys_address(src.aperture, src.address));
if (!dst.is_virtual)
dst = uvm_gpu_address_copy(gpu, uvm_gpu_phys_address(dst.aperture, dst.address));
// Memset src with the appropriate element size, then memcpy to dst and from
// dst to the verif location (physical sysmem).
@@ -374,7 +396,7 @@ static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
{
NV_STATUS status = NV_OK;
bool is_proxy_va_space;
bool is_proxy_va_space = false;
uvm_gpu_address_t gpu_verif_addr;
void *cpu_verif_addr;
uvm_mem_t *verif_mem = NULL;
@@ -382,17 +404,17 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
uvm_mem_t *gpu_uvm_mem = NULL;
uvm_rm_mem_t *sys_rm_mem = NULL;
uvm_rm_mem_t *gpu_rm_mem = NULL;
uvm_gpu_address_t gpu_addresses[4];
NvU64 gpu_va;
size_t size;
uvm_gpu_address_t gpu_addresses[4] = {0};
size_t size = gpu->big_page.internal_size;
static const size_t element_sizes[] = {1, 4, 8};
const size_t iterations = 4;
size_t i, j, k, s;
uvm_mem_alloc_params_t mem_params = {0};
size = gpu->big_page.internal_size;
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &verif_mem), done);
if (uvm_conf_computing_mode_enabled(gpu))
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(size, gpu, current->mm, &verif_mem), done);
else
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &verif_mem), done);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, gpu), done);
gpu_verif_addr = uvm_mem_gpu_address_virtual_kernel(verif_mem, gpu);
@@ -410,6 +432,34 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
}
}
// Virtual address (in UVM's internal address space) backed by sysmem
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
gpu_addresses[0] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
if (uvm_conf_computing_mode_enabled(gpu)) {
for (i = 0; i < iterations; ++i) {
for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
gpu_addresses[0],
gpu_addresses[0],
size,
element_sizes[s],
gpu_verif_addr,
cpu_verif_addr,
i),
done);
}
}
// Because gpu_verif_addr is in sysmem, when the Confidential
// Computing feature is enabled, only the previous cases are valid.
// TODO: Bug 3839176: the test partially waived on Confidential
// Computing because it assumes that GPU can access system memory
// without using encryption.
goto done;
}
// Using a page size equal to the allocation size ensures that the UVM
// memories about to be allocated are physically contiguous. And since the
// size is a valid GPU page size, the memories can be virtually mapped on
@@ -421,23 +471,17 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
// Physical address in sysmem
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &sys_uvm_mem), done);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(sys_uvm_mem, gpu), done);
gpu_addresses[0] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
gpu_addresses[1] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
// Physical address in vidmem
mem_params.backing_gpu = gpu;
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
gpu_addresses[1] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
gpu_addresses[2] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
// Virtual address (in UVM's internal address space) backed by vidmem
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
is_proxy_va_space = false;
gpu_va = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
gpu_addresses[2] = uvm_gpu_address_virtual(gpu_va);
gpu_addresses[3] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
// Virtual address (in UVM's internal address space) backed by sysmem
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
gpu_va = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
gpu_addresses[3] = uvm_gpu_address_virtual(gpu_va);
for (i = 0; i < iterations; ++i) {
for (j = 0; j < ARRAY_SIZE(gpu_addresses); ++j) {
@@ -513,6 +557,11 @@ static NV_STATUS test_semaphore_reduction_inc(uvm_gpu_t *gpu)
// Semaphore reduction needs 1 word (4 bytes).
const size_t size = sizeof(NvU32);
// TODO: Bug 3839176: the test is waived on Confidential Computing because
// it assumes that GPU can access system memory without using encryption.
if (uvm_conf_computing_mode_enabled(gpu))
return NV_OK;
status = test_semaphore_alloc_sem(gpu, size, &mem);
TEST_CHECK_RET(status == NV_OK);
@@ -528,7 +577,7 @@ static NV_STATUS test_semaphore_reduction_inc(uvm_gpu_t *gpu)
for (i = 0; i < REDUCTIONS; i++) {
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, i+1);
gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, REDUCTIONS);
}
status = uvm_push_end_and_wait(&push);
@@ -560,6 +609,11 @@ static NV_STATUS test_semaphore_release(uvm_gpu_t *gpu)
// Semaphore release needs 1 word (4 bytes).
const size_t size = sizeof(NvU32);
// TODO: Bug 3839176: the test is waived on Confidential Computing because
// it assumes that GPU can access system memory without using encryption.
if (uvm_conf_computing_mode_enabled(gpu))
return NV_OK;
status = test_semaphore_alloc_sem(gpu, size, &mem);
TEST_CHECK_RET(status == NV_OK);
@@ -609,6 +663,11 @@ static NV_STATUS test_semaphore_timestamp(uvm_gpu_t *gpu)
// The semaphore is 4 words long (16 bytes).
const size_t size = 16;
// TODO: Bug 3839176: the test is waived on Confidential Computing because
// it assumes that GPU can access system memory without using encryption.
if (uvm_conf_computing_mode_enabled(gpu))
return NV_OK;
status = test_semaphore_alloc_sem(gpu, size, &mem);
TEST_CHECK_RET(status == NV_OK);
@@ -645,6 +704,517 @@ done:
return status;
}
static bool mem_match(uvm_mem_t *mem1, uvm_mem_t *mem2, size_t size)
{
void *mem1_addr;
void *mem2_addr;
UVM_ASSERT(uvm_mem_is_sysmem(mem1));
UVM_ASSERT(uvm_mem_is_sysmem(mem2));
UVM_ASSERT(mem1->size >= size);
UVM_ASSERT(mem2->size >= size);
mem1_addr = uvm_mem_get_cpu_addr_kernel(mem1);
mem2_addr = uvm_mem_get_cpu_addr_kernel(mem2);
return !memcmp(mem1_addr, mem2_addr, size);
}
static NV_STATUS zero_vidmem(uvm_mem_t *mem)
{
uvm_push_t push;
uvm_gpu_address_t gpu_address;
uvm_gpu_t *gpu = mem->backing_gpu;
UVM_ASSERT(uvm_mem_is_vidmem(mem));
TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "zero vidmem"));
gpu_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu);
gpu->parent->ce_hal->memset_1(&push, gpu_address, 0, mem->size);
TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
return NV_OK;
}
static void write_range_cpu(uvm_mem_t *mem, NvU64 base_val)
{
NvU64 *mem_cpu_va;
unsigned i;
UVM_ASSERT(uvm_mem_is_sysmem(mem));
UVM_ASSERT(IS_ALIGNED(mem->size, sizeof(*mem_cpu_va)));
mem_cpu_va = (NvU64 *) uvm_mem_get_cpu_addr_kernel(mem);
for (i = 0; i < (mem->size / sizeof(*mem_cpu_va)); i++)
mem_cpu_va[i] = base_val++;
}
static NV_STATUS alloc_vidmem_protected(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size)
{
NV_STATUS status;
UVM_ASSERT(mem);
*mem = NULL;
TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem(size, gpu, mem));
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
TEST_NV_CHECK_GOTO(zero_vidmem(*mem), err);
return NV_OK;
err:
uvm_mem_free(*mem);
return status;
}
static NV_STATUS alloc_sysmem_unprotected(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size)
{
NV_STATUS status;
UVM_ASSERT(mem);
*mem = NULL;
TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem_dma(size, gpu, NULL, mem));
TEST_NV_CHECK_GOTO(uvm_mem_map_cpu_kernel(*mem), err);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
memset(uvm_mem_get_cpu_addr_kernel(*mem), 0, (*mem)->size);
return NV_OK;
err:
uvm_mem_free(*mem);
return status;
}
static void cpu_encrypt(uvm_channel_t *channel,
uvm_mem_t *dst_mem,
uvm_mem_t *src_mem,
uvm_mem_t *auth_tag_mem,
size_t size,
NvU32 copy_size)
{
size_t offset = 0;
char *src_plain = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
char *dst_cipher = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
while (offset < size) {
uvm_conf_computing_cpu_encrypt(channel, dst_cipher, src_plain, NULL, copy_size, auth_tag_buffer);
offset += copy_size;
dst_cipher += copy_size;
src_plain += copy_size;
auth_tag_buffer += UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
}
}
static void cpu_acquire_encryption_ivs(uvm_channel_t *channel,
size_t size,
NvU32 copy_size,
UvmCslIv *ivs)
{
size_t offset = 0;
int i = 0;
for (; offset < size; offset += copy_size)
uvm_conf_computing_acquire_encryption_iv(channel, &ivs[i++]);
}
static void cpu_encrypt_rev(uvm_channel_t *channel,
uvm_mem_t *dst_mem,
uvm_mem_t *src_mem,
uvm_mem_t *auth_tag_mem,
size_t size,
NvU32 copy_size,
UvmCslIv *encrypt_iv)
{
char *src_plain = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
char *dst_cipher = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
int i;
// CPU encrypt order is the opposite of the GPU decrypt order
for (i = (size / copy_size) - 1; i >= 0; i--) {
uvm_conf_computing_cpu_encrypt(channel,
dst_cipher + i * copy_size,
src_plain + i * copy_size,
encrypt_iv + i,
copy_size,
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
}
}
static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
uvm_mem_t *dst_mem,
uvm_mem_t *src_mem,
const UvmCslIv *decrypt_iv,
uvm_mem_t *auth_tag_mem,
size_t size,
NvU32 copy_size)
{
size_t i;
char *dst_plain = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
char *src_cipher = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
for (i = 0; i < size / copy_size; i++) {
TEST_NV_CHECK_RET(uvm_conf_computing_cpu_decrypt(channel,
dst_plain + i * copy_size,
src_cipher + i * copy_size,
decrypt_iv + i,
copy_size,
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
}
return NV_OK;
}
static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
uvm_mem_t *dst_mem,
uvm_mem_t *src_mem,
const UvmCslIv *decrypt_iv,
uvm_mem_t *auth_tag_mem,
size_t size,
NvU32 copy_size)
{
int i;
char *dst_plain = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
char *src_cipher = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
UVM_ASSERT((size / copy_size) <= INT_MAX);
// CPU decrypt order is the opposite of the GPU decrypt order
for (i = (size / copy_size) - 1; i >= 0; i--) {
TEST_NV_CHECK_RET(uvm_conf_computing_cpu_decrypt(channel,
dst_plain + i * copy_size,
src_cipher + i * copy_size,
decrypt_iv + i,
copy_size,
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
}
return NV_OK;
}
// GPU address to use as source or destination in CE decrypt/encrypt operations.
// If the uvm_mem backing storage is contiguous in the [offset, offset + size)
// interval, the physical address gets priority over the virtual counterpart.
static uvm_gpu_address_t gpu_address(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU64 offset, NvU32 size)
{
uvm_gpu_address_t gpu_virtual_address;
if (uvm_mem_is_physically_contiguous(mem, offset, size))
return uvm_mem_gpu_address_physical(mem, gpu, offset, size);
gpu_virtual_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu);
gpu_virtual_address.address += offset;
return gpu_virtual_address;
}
// Automatically get the correct address for the authentication tag. The
// addressing mode of the tag should match that of the reference address
// (destination pointer for GPU encrypt, source pointer for GPU encrypt)
static uvm_gpu_address_t auth_tag_gpu_address(uvm_mem_t *auth_tag_mem,
uvm_gpu_t *gpu,
size_t offset,
uvm_gpu_address_t reference)
{
uvm_gpu_address_t auth_tag_gpu_address;
if (!reference.is_virtual)
return uvm_mem_gpu_address_physical(auth_tag_mem, gpu, offset, UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(auth_tag_mem, gpu);
auth_tag_gpu_address.address += offset;
return auth_tag_gpu_address;
}
// Note: no membar is issued in any of the GPU transfers (encryptions)
static void gpu_encrypt(uvm_push_t *push,
uvm_mem_t *dst_mem,
uvm_mem_t *src_mem,
uvm_mem_t *auth_tag_mem,
UvmCslIv *decrypt_iv,
size_t size,
NvU32 copy_size)
{
size_t i;
size_t num_iterations = size / copy_size;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
for (i = 0; i < num_iterations; i++) {
uvm_gpu_address_t dst_cipher = gpu_address(dst_mem, gpu, i * copy_size, copy_size);
uvm_gpu_address_t src_plain = gpu_address(src_mem, gpu, i * copy_size, copy_size);
uvm_gpu_address_t auth_tag = auth_tag_gpu_address(auth_tag_mem,
gpu,
i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
dst_cipher);
uvm_conf_computing_log_gpu_encryption(push->channel, decrypt_iv);
if (i > 0)
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->encrypt(push, dst_cipher, src_plain, copy_size, auth_tag);
decrypt_iv++;
}
}
// Note: no membar is issued in any of the GPU transfers (decryptions)
static void gpu_decrypt(uvm_push_t *push,
uvm_mem_t *dst_mem,
uvm_mem_t *src_mem,
uvm_mem_t *auth_tag_mem,
size_t size,
NvU32 copy_size)
{
size_t i;
size_t num_iterations = size / copy_size;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
for (i = 0; i < num_iterations; i++) {
uvm_gpu_address_t dst_plain = gpu_address(dst_mem, gpu, i * copy_size, copy_size);
uvm_gpu_address_t src_cipher = gpu_address(src_mem, gpu, i * copy_size, copy_size);
uvm_gpu_address_t auth_tag = auth_tag_gpu_address(auth_tag_mem,
gpu,
i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
src_cipher);
if (i > 0)
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->decrypt(push, dst_plain, src_cipher, copy_size, auth_tag);
}
}
static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
uvm_channel_type_t decrypt_channel_type,
uvm_channel_type_t encrypt_channel_type,
size_t size,
NvU32 copy_size,
bool decrypt_in_order,
bool encrypt_in_order)
{
uvm_push_t push;
NvU64 init_value;
NV_STATUS status = NV_OK;
uvm_mem_t *src_plain = NULL;
uvm_mem_t *src_cipher = NULL;
uvm_mem_t *dst_cipher = NULL;
uvm_mem_t *dst_plain_gpu = NULL;
uvm_mem_t *dst_plain = NULL;
uvm_mem_t *auth_tag_mem = NULL;
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
UvmCslIv *decrypt_iv = NULL;
UvmCslIv *encrypt_iv = NULL;
uvm_tracker_t tracker;
size_t src_plain_size;
TEST_CHECK_RET(copy_size <= size);
TEST_CHECK_RET(IS_ALIGNED(size, copy_size));
uvm_tracker_init(&tracker);
decrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv));
if (!decrypt_iv) {
status = NV_ERR_NO_MEMORY;
goto out;
}
encrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv));
if (!encrypt_iv) {
status = NV_ERR_NO_MEMORY;
goto out;
}
TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &src_cipher, size), out);
TEST_NV_CHECK_GOTO(alloc_vidmem_protected(gpu, &dst_plain_gpu, size), out);
TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &dst_cipher, size), out);
TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &dst_plain, size), out);
TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &auth_tag_mem, auth_tag_buffer_size), out);
// The plaintext CPU buffer size should fit the initialization value
src_plain_size = UVM_ALIGN_UP(size, sizeof(init_value));
TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &src_plain, src_plain_size), out);
// Initialize the plaintext CPU buffer using a value that uniquely
// identifies the given inputs
TEST_CHECK_GOTO((((NvU64) size) < (1ULL << 63)), out);
init_value = ((NvU64) decrypt_in_order << 63) | ((NvU64) size) | ((NvU64) copy_size);
write_range_cpu(src_plain, init_value);
TEST_NV_CHECK_GOTO(uvm_push_begin(gpu->channel_manager,
decrypt_channel_type,
&push,
"CPU > GPU decrypt"),
out);
// CPU (decrypted) > CPU (encrypted), using CPU, if in-order
// acquire IVs if not in-order
if (encrypt_in_order)
cpu_encrypt(push.channel, src_cipher, src_plain, auth_tag_mem, size, copy_size);
else
cpu_acquire_encryption_ivs(push.channel, size, copy_size, encrypt_iv);
// CPU (encrypted) > GPU (decrypted), using GPU
gpu_decrypt(&push, dst_plain_gpu, src_cipher, auth_tag_mem, size, copy_size);
// Use acquired IVs to encrypt in reverse order
if (!encrypt_in_order)
cpu_encrypt_rev(push.channel, src_cipher, src_plain, auth_tag_mem, size, copy_size, encrypt_iv);
uvm_push_end(&push);
TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), out);
// GPU (decrypted) > CPU (encrypted), using GPU
TEST_NV_CHECK_GOTO(uvm_push_begin_acquire(gpu->channel_manager,
encrypt_channel_type,
&tracker,
&push,
"GPU > CPU encrypt"),
out);
gpu_encrypt(&push, dst_cipher, dst_plain_gpu, auth_tag_mem, decrypt_iv, size, copy_size);
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
TEST_CHECK_GOTO(!mem_match(src_plain, src_cipher, size), out);
TEST_CHECK_GOTO(!mem_match(dst_cipher, src_plain, size), out);
// CPU (encrypted) > CPU (decrypted), using CPU
if (decrypt_in_order) {
TEST_NV_CHECK_GOTO(cpu_decrypt_in_order(push.channel,
dst_plain,
dst_cipher,
decrypt_iv,
auth_tag_mem,
size,
copy_size),
out);
}
else {
TEST_NV_CHECK_GOTO(cpu_decrypt_out_of_order(push.channel,
dst_plain,
dst_cipher,
decrypt_iv,
auth_tag_mem,
size,
copy_size),
out);
}
TEST_CHECK_GOTO(mem_match(src_plain, dst_plain, size), out);
out:
uvm_mem_free(auth_tag_mem);
uvm_mem_free(dst_plain);
uvm_mem_free(dst_plain_gpu);
uvm_mem_free(dst_cipher);
uvm_mem_free(src_cipher);
uvm_mem_free(src_plain);
uvm_tracker_deinit(&tracker);
uvm_kvfree(decrypt_iv);
uvm_kvfree(encrypt_iv);
return status;
}
static NV_STATUS test_encryption_decryption(uvm_gpu_t *gpu,
uvm_channel_type_t decrypt_channel_type,
uvm_channel_type_t encrypt_channel_type)
{
bool cpu_decrypt_in_order = true;
bool cpu_encrypt_in_order = true;
size_t size[] = {UVM_PAGE_SIZE_4K, UVM_PAGE_SIZE_4K * 2, UVM_PAGE_SIZE_2M};
size_t copy_size[] = {UVM_PAGE_SIZE_4K, UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_2M};
unsigned i;
struct {
bool encrypt_in_order;
bool decrypt_in_order;
} orders[] = {{true, true}, {true, false}, {false, true}, {false, false}};
struct {
size_t size;
NvU32 copy_size;
} small_sizes[] = {{1, 1}, {3, 1}, {8, 1}, {2, 2}, {8, 4}, {UVM_PAGE_SIZE_4K - 8, 8}, {UVM_PAGE_SIZE_4K + 8, 8}};
// Only Confidential Computing uses CE encryption/decryption
if (!uvm_conf_computing_mode_enabled(gpu))
return NV_OK;
// Use a size, and copy size, that are not a multiple of common page sizes.
for (i = 0; i < ARRAY_SIZE(small_sizes); ++i) {
// Skip tests that need large pushbuffer on WLC. Secure work launch
// needs to do at least one decrypt operation so tests that only need
// one operation work ok. Tests using more operations might overflow
// UVM_MAX_WLC_PUSH_SIZE.
if (encrypt_channel_type == UVM_CHANNEL_TYPE_WLC && (small_sizes[i].size / small_sizes[i].copy_size > 1))
continue;
TEST_NV_CHECK_RET(test_cpu_to_gpu_roundtrip(gpu,
decrypt_channel_type,
encrypt_channel_type,
small_sizes[i].size,
small_sizes[i].copy_size,
cpu_decrypt_in_order,
cpu_encrypt_in_order));
}
// Use sizes, and copy sizes, that are a multiple of common page sizes.
// This is the most typical usage of encrypt/decrypt in the UVM driver.
for (i = 0; i < ARRAY_SIZE(orders); ++i) {
unsigned j;
cpu_encrypt_in_order = orders[i].encrypt_in_order;
cpu_decrypt_in_order = orders[i].decrypt_in_order;
for (j = 0; j < ARRAY_SIZE(size); ++j) {
unsigned k;
for (k = 0; k < ARRAY_SIZE(copy_size); ++k) {
if (copy_size[k] > size[j])
continue;
// Skip tests that need large pushbuffer on WLC. Secure work
// launch needs to do at least one decrypt operation so tests
// that only need one operation work ok. Tests using more
// operations might overflow UVM_MAX_WLC_PUSH_SIZE.
if (encrypt_channel_type == UVM_CHANNEL_TYPE_WLC && (size[j] / copy_size[k] > 1))
continue;
// There is no difference between in-order and out-of-order
// decryption when encrypting once.
if ((copy_size[k] == size[j]) && !cpu_decrypt_in_order)
continue;
TEST_NV_CHECK_RET(test_cpu_to_gpu_roundtrip(gpu,
decrypt_channel_type,
encrypt_channel_type,
size[j],
copy_size[k],
cpu_decrypt_in_order,
cpu_encrypt_in_order));
}
}
}
return NV_OK;
}
static NV_STATUS test_ce(uvm_va_space_t *va_space, bool skipTimestampTest)
{
uvm_gpu_t *gpu;
@@ -655,9 +1225,13 @@ static NV_STATUS test_ce(uvm_va_space_t *va_space, bool skipTimestampTest)
TEST_NV_CHECK_RET(test_memcpy_and_memset(gpu));
TEST_NV_CHECK_RET(test_semaphore_reduction_inc(gpu));
TEST_NV_CHECK_RET(test_semaphore_release(gpu));
if (!skipTimestampTest)
TEST_NV_CHECK_RET(test_semaphore_timestamp(gpu));
}
TEST_NV_CHECK_RET(test_encryption_decryption(gpu, UVM_CHANNEL_TYPE_CPU_TO_GPU, UVM_CHANNEL_TYPE_GPU_TO_CPU));
TEST_NV_CHECK_RET(test_encryption_decryption(gpu, UVM_CHANNEL_TYPE_WLC, UVM_CHANNEL_TYPE_WLC));
}
return NV_OK;
}

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More