mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-27 11:39:46 +00:00
Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ed4be64962 | ||
|
|
315fd96d2d | ||
|
|
448d5cc656 | ||
|
|
5fdf5032fb | ||
|
|
171c735e57 | ||
|
|
74ee05e160 | ||
|
|
3084c04453 | ||
|
|
caa2dd11a0 | ||
|
|
e45d91de02 | ||
|
|
083cd9cf17 | ||
|
|
ea4c27fad6 | ||
|
|
3bf16b890c | ||
|
|
12933b2d3c |
210
CHANGELOG.md
210
CHANGELOG.md
@@ -1,210 +0,0 @@
|
||||
# Changelog
|
||||
|
||||
## Release 550 Entries
|
||||
|
||||
### [550.54.14] 2024-02-23
|
||||
|
||||
#### Added
|
||||
|
||||
- Added vGPU Host and vGPU Guest support. For vGPU Host, please refer to the README.vgpu packaged in the vGPU Host Package for more details.
|
||||
|
||||
### [550.40.07] 2024-01-24
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Set INSTALL_MOD_DIR only if it's not defined, [#570](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/570) by @keelung-yang
|
||||
|
||||
## Release 545 Entries
|
||||
|
||||
### [545.29.06] 2023-11-22
|
||||
|
||||
#### Fixed
|
||||
|
||||
- The brightness control of NVIDIA seems to be broken, [#573](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/573)
|
||||
|
||||
### [545.29.02] 2023-10-31
|
||||
|
||||
### [545.23.06] 2023-10-17
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix always-false conditional, [#493](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/493) by @meme8383
|
||||
|
||||
#### Added
|
||||
|
||||
- Added beta-quality support for GeForce and Workstation GPUs. Please see the "Open Linux Kernel Modules" chapter in the NVIDIA GPU driver end user README for details.
|
||||
|
||||
## Release 535 Entries
|
||||
|
||||
### [535.129.03] 2023-10-31
|
||||
|
||||
### [535.113.01] 2023-09-21
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed building main against current centos stream 8 fails, [#550](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/550) by @airlied
|
||||
|
||||
### [535.104.05] 2023-08-22
|
||||
|
||||
### [535.98] 2023-08-08
|
||||
|
||||
### [535.86.10] 2023-07-31
|
||||
|
||||
### [535.86.05] 2023-07-18
|
||||
|
||||
### [535.54.03] 2023-06-14
|
||||
|
||||
### [535.43.02] 2023-05-30
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed console restore with traditional VGA consoles.
|
||||
|
||||
#### Added
|
||||
|
||||
- Added support for Run Time D3 (RTD3) on Ampere and later GPUs.
|
||||
- Added support for G-Sync on desktop GPUs.
|
||||
|
||||
## Release 530 Entries
|
||||
|
||||
### [530.41.03] 2023-03-23
|
||||
|
||||
### [530.30.02] 2023-02-28
|
||||
|
||||
#### Changed
|
||||
|
||||
- GSP firmware is now distributed as `gsp_tu10x.bin` and `gsp_ga10x.bin` to better reflect the GPU architectures supported by each firmware file in this release.
|
||||
- The .run installer will continue to install firmware to /lib/firmware/nvidia/<version> and the nvidia.ko kernel module will load the appropriate firmware for each GPU at runtime.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Add support for resizable BAR on Linux when NVreg_EnableResizableBar=1 module param is set. [#3](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/3) by @sjkelly
|
||||
|
||||
#### Added
|
||||
|
||||
- Support for power management features like Suspend, Hibernate and Resume.
|
||||
|
||||
## Release 525 Entries
|
||||
|
||||
### [525.147.05] 2023-10-31
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix nvidia_p2p_get_pages(): Fix double-free in register-callback error path, [#557](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/557) by @BrendanCunningham
|
||||
|
||||
### [525.125.06] 2023-06-26
|
||||
|
||||
### [525.116.04] 2023-05-09
|
||||
|
||||
### [525.116.03] 2023-04-25
|
||||
|
||||
### [525.105.17] 2023-03-30
|
||||
|
||||
### [525.89.02] 2023-02-08
|
||||
|
||||
### [525.85.12] 2023-01-30
|
||||
|
||||
### [525.85.05] 2023-01-19
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix build problems with Clang 15.0, [#377](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/377) by @ptr1337
|
||||
|
||||
### [525.78.01] 2023-01-05
|
||||
|
||||
### [525.60.13] 2022-12-05
|
||||
|
||||
### [525.60.11] 2022-11-28
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed nvenc compatibility with usermode clients [#104](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/104)
|
||||
|
||||
### [525.53] 2022-11-10
|
||||
|
||||
#### Changed
|
||||
|
||||
- GSP firmware is now distributed as multiple firmware files: this release has `gsp_tu10x.bin` and `gsp_ad10x.bin` replacing `gsp.bin` from previous releases.
|
||||
- Each file is named after a GPU architecture and supports GPUs from one or more architectures. This allows GSP firmware to better leverage each architecture's capabilities.
|
||||
- The .run installer will continue to install firmware to `/lib/firmware/nvidia/<version>` and the `nvidia.ko` kernel module will load the appropriate firmware for each GPU at runtime.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Add support for IBT (indirect branch tracking) on supported platforms, [#256](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/256) by @rnd-ash
|
||||
- Return EINVAL when [failing to] allocating memory, [#280](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/280) by @YusufKhan-gamedev
|
||||
- Fix various typos in nvidia/src/kernel, [#16](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/16) by @alexisgeoffrey
|
||||
- Added support for rotation in X11, Quadro Sync, Stereo, and YUV 4:2:0 on Turing.
|
||||
|
||||
## Release 520 Entries
|
||||
|
||||
### [520.61.07] 2022-10-20
|
||||
|
||||
### [520.56.06] 2022-10-12
|
||||
|
||||
#### Added
|
||||
|
||||
- Introduce support for GeForce RTX 4090 GPUs.
|
||||
|
||||
### [520.61.05] 2022-10-10
|
||||
|
||||
#### Added
|
||||
|
||||
- Introduce support for NVIDIA H100 GPUs.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix/Improve Makefile, [#308](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/308/) by @izenynn
|
||||
- Make nvLogBase2 more efficient, [#177](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/177/) by @DMaroo
|
||||
- nv-pci: fixed always true expression, [#195](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/195/) by @ValZapod
|
||||
|
||||
## Release 515 Entries
|
||||
|
||||
### [515.76] 2022-09-20
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Improved compatibility with new Linux kernel releases
|
||||
- Fixed possible excessive GPU power draw on an idle X11 or Wayland desktop when driving high resolutions or refresh rates
|
||||
|
||||
### [515.65.07] 2022-10-19
|
||||
|
||||
### [515.65.01] 2022-08-02
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Collection of minor fixes to issues, [#6](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/61) by @Joshua-Ashton
|
||||
- Remove unnecessary use of acpi_bus_get_device().
|
||||
|
||||
### [515.57] 2022-06-28
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Backtick is deprecated, [#273](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/273) by @arch-user-france1
|
||||
|
||||
### [515.48.07] 2022-05-31
|
||||
|
||||
#### Added
|
||||
|
||||
- List of compatible GPUs in README.md.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix various README capitalizations, [#8](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/8) by @27lx
|
||||
- Automatically tag bug report issues, [#15](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/15) by @thebeanogamer
|
||||
- Improve conftest.sh Script, [#37](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/37) by @Nitepone
|
||||
- Update HTTP link to HTTPS, [#101](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/101) by @alcaparra
|
||||
- moved array sanity check to before the array access, [#117](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/117) by @RealAstolfo
|
||||
- Fixed some typos, [#122](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/122) by @FEDOyt
|
||||
- Fixed capitalization, [#123](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/123) by @keroeslux
|
||||
- Fix typos in NVDEC Engine Descriptor, [#126](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/126) from @TrickyDmitriy
|
||||
- Extranous apostrohpes in a makefile script [sic], [#14](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/14) by @kiroma
|
||||
- HDMI no audio @ 4K above 60Hz, [#75](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/75) by @adolfotregosa
|
||||
- dp_configcaps.cpp:405: array index sanity check in wrong place?, [#110](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/110) by @dcb314
|
||||
- NVRM kgspInitRm_IMPL: missing NVDEC0 engine, cannot initialize GSP-RM, [#116](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/116) by @kfazz
|
||||
- ERROR: modpost: "backlight_device_register" [...nvidia-modeset.ko] undefined, [#135](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/135) by @sndirsch
|
||||
- aarch64 build fails, [#151](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/151) by @frezbo
|
||||
|
||||
### [515.43.04] 2022-05-11
|
||||
|
||||
- Initial release.
|
||||
|
||||
40
README.md
40
README.md
@@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 550.54.14.
|
||||
version 560.35.03.
|
||||
|
||||
|
||||
## How to Build
|
||||
@@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
550.54.14 driver release. This can be achieved by installing
|
||||
560.35.03 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@@ -74,7 +74,7 @@ kernel.
|
||||
|
||||
The NVIDIA open kernel modules support the same range of Linux kernel
|
||||
versions that are supported with the proprietary NVIDIA kernel modules.
|
||||
This is currently Linux kernel 3.10 or newer.
|
||||
This is currently Linux kernel 4.15 or newer.
|
||||
|
||||
|
||||
## How to Contribute
|
||||
@@ -179,16 +179,13 @@ software applications.
|
||||
|
||||
## Compatible GPUs
|
||||
|
||||
The NVIDIA open kernel modules can be used on any Turing or later GPU
|
||||
(see the table below). However, in the __DRIVER_VERION__ release, GeForce and
|
||||
Workstation support is considered to be Beta quality. The open kernel modules
|
||||
are suitable for broad usage, and NVIDIA requests feedback on any issues
|
||||
encountered specific to them.
|
||||
The NVIDIA open kernel modules can be used on any Turing or later GPU (see the
|
||||
table below).
|
||||
|
||||
For details on feature support and limitations, see the NVIDIA GPU driver
|
||||
end user README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/550.54.14/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/560.35.03/README/kernel_open.html
|
||||
|
||||
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
|
||||
Package for more details.
|
||||
@@ -651,6 +648,7 @@ Subsystem Device ID.
|
||||
| NVIDIA T1000 8GB | 1FF0 17AA 1612 |
|
||||
| NVIDIA T400 4GB | 1FF2 1028 1613 |
|
||||
| NVIDIA T400 4GB | 1FF2 103C 1613 |
|
||||
| NVIDIA T400E | 1FF2 103C 18FF |
|
||||
| NVIDIA T400 4GB | 1FF2 103C 8A80 |
|
||||
| NVIDIA T400 4GB | 1FF2 10DE 1613 |
|
||||
| NVIDIA T400E | 1FF2 10DE 18FF |
|
||||
@@ -756,6 +754,8 @@ Subsystem Device ID.
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
|
||||
| NVIDIA H100 PCIe | 2331 10DE 1626 |
|
||||
| NVIDIA H200 | 2335 10DE 18BE |
|
||||
| NVIDIA H200 | 2335 10DE 18BF |
|
||||
| NVIDIA H100 | 2339 10DE 17FC |
|
||||
| NVIDIA H800 NVL | 233A 10DE 183A |
|
||||
| NVIDIA GH200 120GB | 2342 10DE 16EB |
|
||||
@@ -829,6 +829,16 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 3050 4GB Laptop GPU | 25AB |
|
||||
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25AC |
|
||||
| NVIDIA GeForce RTX 2050 | 25AD |
|
||||
| NVIDIA RTX A1000 | 25B0 1028 1878 |
|
||||
| NVIDIA RTX A1000 | 25B0 103C 1878 |
|
||||
| NVIDIA RTX A1000 | 25B0 103C 8D96 |
|
||||
| NVIDIA RTX A1000 | 25B0 10DE 1878 |
|
||||
| NVIDIA RTX A1000 | 25B0 17AA 1878 |
|
||||
| NVIDIA RTX A400 | 25B2 1028 1879 |
|
||||
| NVIDIA RTX A400 | 25B2 103C 1879 |
|
||||
| NVIDIA RTX A400 | 25B2 103C 8D95 |
|
||||
| NVIDIA RTX A400 | 25B2 10DE 1879 |
|
||||
| NVIDIA RTX A400 | 25B2 17AA 1879 |
|
||||
| NVIDIA A16 | 25B6 10DE 14A9 |
|
||||
| NVIDIA A2 | 25B6 10DE 157E |
|
||||
| NVIDIA RTX A2000 Laptop GPU | 25B8 |
|
||||
@@ -847,6 +857,7 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX A500 Embedded GPU | 25FB |
|
||||
| NVIDIA GeForce RTX 4090 | 2684 |
|
||||
| NVIDIA GeForce RTX 4090 D | 2685 |
|
||||
| NVIDIA GeForce RTX 4070 Ti SUPER | 2689 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 1028 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |
|
||||
@@ -864,9 +875,11 @@ Subsystem Device ID.
|
||||
| NVIDIA L40S | 26B9 10DE 1851 |
|
||||
| NVIDIA L40S | 26B9 10DE 18CF |
|
||||
| NVIDIA L20 | 26BA 10DE 1957 |
|
||||
| NVIDIA L20 | 26BA 10DE 1990 |
|
||||
| NVIDIA GeForce RTX 4080 SUPER | 2702 |
|
||||
| NVIDIA GeForce RTX 4080 | 2704 |
|
||||
| NVIDIA GeForce RTX 4070 Ti SUPER | 2705 |
|
||||
| NVIDIA GeForce RTX 4070 | 2709 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
|
||||
| NVIDIA RTX 5000 Ada Generation Laptop GPU | 2730 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2757 |
|
||||
@@ -874,6 +887,7 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 4070 Ti | 2782 |
|
||||
| NVIDIA GeForce RTX 4070 SUPER | 2783 |
|
||||
| NVIDIA GeForce RTX 4070 | 2786 |
|
||||
| NVIDIA GeForce RTX 4060 Ti | 2788 |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27A0 |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 1028 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
|
||||
@@ -896,7 +910,9 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX 3500 Ada Generation Embedded GPU | 27FB |
|
||||
| NVIDIA GeForce RTX 4060 Ti | 2803 |
|
||||
| NVIDIA GeForce RTX 4060 Ti | 2805 |
|
||||
| NVIDIA GeForce RTX 4060 | 2808 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2820 |
|
||||
| NVIDIA GeForce RTX 3050 A Laptop GPU | 2822 |
|
||||
| NVIDIA RTX 3000 Ada Generation Laptop GPU | 2838 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2860 |
|
||||
| NVIDIA GeForce RTX 4060 | 2882 |
|
||||
@@ -904,9 +920,15 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 1028 1870 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 103C 1870 |
|
||||
| NVIDIA RTX 2000E Ada Generation | 28B0 103C 1871 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 10DE 1870 |
|
||||
| NVIDIA RTX 2000E Ada Generation | 28B0 10DE 1871 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 17AA 1870 |
|
||||
| NVIDIA RTX 2000E Ada Generation | 28B0 17AA 1871 |
|
||||
| NVIDIA RTX 2000 Ada Generation Laptop GPU | 28B8 |
|
||||
| NVIDIA RTX 1000 Ada Generation Laptop GPU | 28B9 |
|
||||
| NVIDIA RTX 500 Ada Generation Laptop GPU | 28BA |
|
||||
| NVIDIA RTX 500 Ada Generation Laptop GPU | 28BB |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
|
||||
| NVIDIA RTX 2000 Ada Generation Embedded GPU | 28F8 |
|
||||
|
||||
@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.54.14\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"560.35.03\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
@@ -118,7 +118,7 @@ ifeq ($(ARCH),x86_64)
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH),powerpc)
|
||||
EXTRA_CFLAGS += -mlittle-endian -mno-strict-align -mno-altivec
|
||||
EXTRA_CFLAGS += -mlittle-endian -mno-strict-align
|
||||
endif
|
||||
|
||||
EXTRA_CFLAGS += -DNV_UVM_ENABLE
|
||||
@@ -170,6 +170,9 @@ NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \
|
||||
NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)
|
||||
|
||||
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
|
||||
NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
|
||||
NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
|
||||
NV_CONFTEST_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types,)
|
||||
NV_CONFTEST_CFLAGS += -Wno-error
|
||||
|
||||
NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h
|
||||
|
||||
@@ -28,7 +28,7 @@ else
|
||||
else
|
||||
KERNEL_UNAME ?= $(shell uname -r)
|
||||
KERNEL_MODLIB := /lib/modules/$(KERNEL_UNAME)
|
||||
KERNEL_SOURCES := $(shell test -d $(KERNEL_MODLIB)/source && echo $(KERNEL_MODLIB)/source || echo $(KERNEL_MODLIB)/build)
|
||||
KERNEL_SOURCES := $(shell ((test -d $(KERNEL_MODLIB)/source && echo $(KERNEL_MODLIB)/source) || (test -d $(KERNEL_MODLIB)/build/source && echo $(KERNEL_MODLIB)/build/source)) || echo $(KERNEL_MODLIB)/build)
|
||||
endif
|
||||
|
||||
KERNEL_OUTPUT := $(KERNEL_SOURCES)
|
||||
@@ -42,7 +42,11 @@ else
|
||||
else
|
||||
KERNEL_UNAME ?= $(shell uname -r)
|
||||
KERNEL_MODLIB := /lib/modules/$(KERNEL_UNAME)
|
||||
ifeq ($(KERNEL_SOURCES), $(KERNEL_MODLIB)/source)
|
||||
# $(filter patter...,text) - Returns all whitespace-separated words in text that
|
||||
# do match any of the pattern words, removing any words that do not match.
|
||||
# Set the KERNEL_OUTPUT only if either $(KERNEL_MODLIB)/source or
|
||||
# $(KERNEL_MODLIB)/build/source path matches the KERNEL_SOURCES.
|
||||
ifneq ($(filter $(KERNEL_SOURCES),$(KERNEL_MODLIB)/source $(KERNEL_MODLIB)/build/source),)
|
||||
KERNEL_OUTPUT := $(KERNEL_MODLIB)/build
|
||||
KBUILD_PARAMS := KBUILD_OUTPUT=$(KERNEL_OUTPUT)
|
||||
endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -44,6 +44,7 @@ typedef enum
|
||||
NV_FIRMWARE_CHIP_FAMILY_GA10X = 4,
|
||||
NV_FIRMWARE_CHIP_FAMILY_AD10X = 5,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GH100 = 6,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GB10X = 8,
|
||||
NV_FIRMWARE_CHIP_FAMILY_END,
|
||||
} nv_firmware_chip_family_t;
|
||||
|
||||
@@ -52,6 +53,7 @@ static inline const char *nv_firmware_chip_family_to_string(
|
||||
)
|
||||
{
|
||||
switch (fw_chip_family) {
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GB10X: return "gb10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: return "gh100";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: return "ad10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X: return "ga10x";
|
||||
@@ -66,13 +68,13 @@ static inline const char *nv_firmware_chip_family_to_string(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// The includer (presumably nv.c) may optionally define
|
||||
// NV_FIRMWARE_PATH_FOR_FILENAME(filename)
|
||||
// to return a string "path" given a gsp_*.bin or gsp_log_*.bin filename.
|
||||
// The includer may optionally define
|
||||
// NV_FIRMWARE_FOR_NAME(name)
|
||||
// to return a platform-defined string for a given a gsp_* or gsp_log_* name.
|
||||
//
|
||||
// The function nv_firmware_path will then be available.
|
||||
#if defined(NV_FIRMWARE_PATH_FOR_FILENAME)
|
||||
static inline const char *nv_firmware_path(
|
||||
// The function nv_firmware_for_chip_family will then be available.
|
||||
#if defined(NV_FIRMWARE_FOR_NAME)
|
||||
static inline const char *nv_firmware_for_chip_family(
|
||||
nv_firmware_type_t fw_type,
|
||||
nv_firmware_chip_family_t fw_chip_family
|
||||
)
|
||||
@@ -81,15 +83,16 @@ static inline const char *nv_firmware_path(
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GB10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_ga10x.bin");
|
||||
return NV_FIRMWARE_FOR_NAME("gsp_ga10x");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_tu10x.bin");
|
||||
return NV_FIRMWARE_FOR_NAME("gsp_tu10x");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
@@ -100,15 +103,16 @@ static inline const char *nv_firmware_path(
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GB10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_ga10x.bin");
|
||||
return NV_FIRMWARE_FOR_NAME("gsp_log_ga10x");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_tu10x.bin");
|
||||
return NV_FIRMWARE_FOR_NAME("gsp_log_tu10x");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
@@ -118,15 +122,15 @@ static inline const char *nv_firmware_path(
|
||||
|
||||
return "";
|
||||
}
|
||||
#endif // defined(NV_FIRMWARE_PATH_FOR_FILENAME)
|
||||
#endif // defined(NV_FIRMWARE_FOR_NAME)
|
||||
|
||||
// The includer (presumably nv.c) may optionally define
|
||||
// NV_FIRMWARE_DECLARE_GSP_FILENAME(filename)
|
||||
// The includer may optionally define
|
||||
// NV_FIRMWARE_DECLARE_GSP(name)
|
||||
// which will then be invoked (at the top-level) for each
|
||||
// gsp_*.bin (but not gsp_log_*.bin)
|
||||
#if defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
|
||||
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_ga10x.bin")
|
||||
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_tu10x.bin")
|
||||
#endif // defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
|
||||
// gsp_* (but not gsp_log_*)
|
||||
#if defined(NV_FIRMWARE_DECLARE_GSP)
|
||||
NV_FIRMWARE_DECLARE_GSP("gsp_ga10x")
|
||||
NV_FIRMWARE_DECLARE_GSP("gsp_tu10x")
|
||||
#endif // defined(NV_FIRMWARE_DECLARE_GSP)
|
||||
|
||||
#endif // NV_FIRMWARE_DECLARE_GSP_FILENAME
|
||||
#endif // NV_FIRMWARE_DECLARE_GSP
|
||||
|
||||
@@ -37,13 +37,11 @@ typedef enum _HYPERVISOR_TYPE
|
||||
OS_HYPERVISOR_UNKNOWN
|
||||
} HYPERVISOR_TYPE;
|
||||
|
||||
#define CMD_VGPU_VFIO_WAKE_WAIT_QUEUE 0
|
||||
#define CMD_VGPU_VFIO_INJECT_INTERRUPT 1
|
||||
#define CMD_VGPU_VFIO_REGISTER_MDEV 2
|
||||
#define CMD_VGPU_VFIO_PRESENT 3
|
||||
#define CMD_VFIO_PCI_CORE_PRESENT 4
|
||||
#define CMD_VFIO_WAKE_REMOVE_GPU 1
|
||||
#define CMD_VGPU_VFIO_PRESENT 2
|
||||
#define CMD_VFIO_PCI_CORE_PRESENT 3
|
||||
|
||||
#define MAX_VF_COUNT_PER_GPU 64
|
||||
#define MAX_VF_COUNT_PER_GPU 64
|
||||
|
||||
typedef enum _VGPU_TYPE_INFO
|
||||
{
|
||||
@@ -54,17 +52,11 @@ typedef enum _VGPU_TYPE_INFO
|
||||
|
||||
typedef struct
|
||||
{
|
||||
void *vgpuVfioRef;
|
||||
void *waitQueue;
|
||||
void *nv;
|
||||
NvU32 *vgpuTypeIds;
|
||||
NvU8 **vgpuNames;
|
||||
NvU32 numVgpuTypes;
|
||||
NvU32 domain;
|
||||
NvU8 bus;
|
||||
NvU8 slot;
|
||||
NvU8 function;
|
||||
NvBool is_virtfn;
|
||||
NvU32 domain;
|
||||
NvU32 bus;
|
||||
NvU32 device;
|
||||
NvU32 return_status;
|
||||
} vgpu_vfio_info;
|
||||
|
||||
typedef struct
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -58,14 +58,10 @@
|
||||
#include <linux/version.h>
|
||||
#include <linux/utsname.h>
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
|
||||
#error "This driver does not support kernels older than 2.6.32!"
|
||||
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 7, 0)
|
||||
# define KERNEL_2_6
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0)
|
||||
# define KERNEL_3
|
||||
#else
|
||||
#error "This driver does not support development kernels!"
|
||||
#if LINUX_VERSION_CODE == KERNEL_VERSION(4, 4, 0)
|
||||
// Version 4.4 is allowed, temporarily, although not officially supported.
|
||||
#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
|
||||
#error "This driver does not support kernels older than Linux 4.15!"
|
||||
#endif
|
||||
|
||||
#if defined (CONFIG_SMP) && !defined (__SMP__)
|
||||
@@ -474,7 +470,9 @@ static inline void *nv_vmalloc(unsigned long size)
|
||||
void *ptr = __vmalloc(size, GFP_KERNEL);
|
||||
#endif
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -492,7 +490,9 @@ static inline void *nv_ioremap(NvU64 phys, NvU64 size)
|
||||
void *ptr = ioremap(phys, size);
|
||||
#endif
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -528,8 +528,9 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
|
||||
#endif
|
||||
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -545,8 +546,9 @@ static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
|
||||
#endif
|
||||
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -675,7 +677,9 @@ static inline NvUPtr nv_vmap(struct page **pages, NvU32 page_count,
|
||||
/* All memory cached in PPC64LE; can't honor 'cached' input. */
|
||||
ptr = vmap(pages, page_count, VM_MAP, prot);
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, page_count * PAGE_SIZE);
|
||||
}
|
||||
return (NvUPtr)ptr;
|
||||
}
|
||||
|
||||
@@ -836,16 +840,16 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
|
||||
#define NV_PRINT_AT(nv_debug_level,at) \
|
||||
{ \
|
||||
nv_printf(nv_debug_level, \
|
||||
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, flags = 0x%08x, " \
|
||||
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, " \
|
||||
"page_table = 0x%p\n", __FUNCTION__, __LINE__, at, \
|
||||
at->num_pages, NV_ATOMIC_READ(at->usage_count), \
|
||||
at->flags, at->page_table); \
|
||||
at->page_table); \
|
||||
}
|
||||
|
||||
#define NV_PRINT_VMA(nv_debug_level,vma) \
|
||||
{ \
|
||||
nv_printf(nv_debug_level, \
|
||||
"NVRM: VM: %s:%d: 0x%lx - 0x%lx, 0x%08x bytes @ 0x%016llx, 0x%p, 0x%p\n", \
|
||||
"NVRM: VM: %s:%d: 0x%lx - 0x%lx, 0x%08lx bytes @ 0x%016llx, 0x%p, 0x%p\n", \
|
||||
__FUNCTION__, __LINE__, vma->vm_start, vma->vm_end, NV_VMA_SIZE(vma), \
|
||||
NV_VMA_OFFSET(vma), NV_VMA_PRIVATE(vma), NV_VMA_FILE(vma)); \
|
||||
}
|
||||
@@ -1078,6 +1082,8 @@ static inline void nv_kmem_ctor_dummy(void *arg)
|
||||
kmem_cache_destroy(kmem_cache); \
|
||||
}
|
||||
|
||||
#define NV_KMEM_CACHE_ALLOC_ATOMIC(kmem_cache) \
|
||||
kmem_cache_alloc(kmem_cache, GFP_ATOMIC)
|
||||
#define NV_KMEM_CACHE_ALLOC(kmem_cache) \
|
||||
kmem_cache_alloc(kmem_cache, GFP_KERNEL)
|
||||
#define NV_KMEM_CACHE_FREE(ptr, kmem_cache) \
|
||||
@@ -1104,6 +1110,23 @@ static inline void *nv_kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int nv_kmem_cache_alloc_stack_atomic(nvidia_stack_t **stack)
|
||||
{
|
||||
nvidia_stack_t *sp = NULL;
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
sp = NV_KMEM_CACHE_ALLOC_ATOMIC(nvidia_stack_t_cache);
|
||||
if (sp == NULL)
|
||||
return -ENOMEM;
|
||||
sp->size = sizeof(sp->stack);
|
||||
sp->top = sp->stack + sp->size;
|
||||
}
|
||||
#endif
|
||||
*stack = sp;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
|
||||
{
|
||||
nvidia_stack_t *sp = NULL;
|
||||
@@ -1159,6 +1182,16 @@ typedef struct nvidia_pte_s {
|
||||
unsigned int page_count;
|
||||
} nvidia_pte_t;
|
||||
|
||||
#if defined(CONFIG_DMA_SHARED_BUFFER)
|
||||
/* Standard dma_buf-related information. */
|
||||
struct nv_dma_buf
|
||||
{
|
||||
struct dma_buf *dma_buf;
|
||||
struct dma_buf_attachment *dma_attach;
|
||||
struct sg_table *sgt;
|
||||
};
|
||||
#endif // CONFIG_DMA_SHARED_BUFFER
|
||||
|
||||
typedef struct nv_alloc_s {
|
||||
struct nv_alloc_s *next;
|
||||
struct device *dev;
|
||||
@@ -1614,6 +1647,10 @@ typedef struct nv_linux_state_s {
|
||||
nv_kthread_q_t open_q;
|
||||
NvBool is_accepting_opens;
|
||||
struct semaphore open_q_lock;
|
||||
#if defined(NV_VGPU_KVM_BUILD)
|
||||
wait_queue_head_t wait;
|
||||
NvS32 return_status;
|
||||
#endif
|
||||
} nv_linux_state_t;
|
||||
|
||||
extern nv_linux_state_t *nv_linux_devices;
|
||||
@@ -1989,31 +2026,6 @@ static inline NvBool nv_platform_use_auto_online(nv_linux_state_t *nvl)
|
||||
return nvl->numa_info.use_auto_online;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
NvU64 base;
|
||||
NvU64 size;
|
||||
NvU32 nodeId;
|
||||
int ret;
|
||||
} remove_numa_memory_info_t;
|
||||
|
||||
static void offline_numa_memory_callback
|
||||
(
|
||||
void *args
|
||||
)
|
||||
{
|
||||
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
|
||||
remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
|
||||
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
|
||||
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->nodeId,
|
||||
pNumaInfo->base,
|
||||
pNumaInfo->size);
|
||||
#else
|
||||
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->base,
|
||||
pNumaInfo->size);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_NUMA_STATUS_DISABLED = 0,
|
||||
|
||||
@@ -29,17 +29,17 @@
|
||||
typedef int vm_fault_t;
|
||||
#endif
|
||||
|
||||
/* pin_user_pages
|
||||
/*
|
||||
* pin_user_pages()
|
||||
*
|
||||
* Presence of pin_user_pages() also implies the presence of unpin-user_page().
|
||||
* Both were added in the v5.6-rc1
|
||||
* Both were added in the v5.6.
|
||||
*
|
||||
* pin_user_pages() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6-rc1 (2020-01-30)
|
||||
*
|
||||
* Removed vmas parameter from pin_user_pages() by commit 40896a02751
|
||||
* ("mm/gup: remove vmas parameter from pin_user_pages()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-17)
|
||||
* pin_user_pages() was added by commit eddb1c228f79
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6.
|
||||
*
|
||||
* Removed vmas parameter from pin_user_pages() by commit 4c630f307455
|
||||
* ("mm/gup: remove vmas parameter from pin_user_pages()") in v6.5.
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
@@ -63,25 +63,28 @@ typedef int vm_fault_t;
|
||||
#define NV_UNPIN_USER_PAGE put_page
|
||||
#endif // NV_PIN_USER_PAGES_PRESENT
|
||||
|
||||
/* get_user_pages
|
||||
/*
|
||||
* get_user_pages()
|
||||
*
|
||||
* The 8-argument version of get_user_pages was deprecated by commit
|
||||
* (2016 Feb 12: cde70140fed8429acf7a14e2e2cbd3e329036653)for the non-remote case
|
||||
* The 8-argument version of get_user_pages() was deprecated by commit
|
||||
* cde70140fed8 ("mm/gup: Overload get_user_pages() functions") in v4.6-rc1.
|
||||
* (calling get_user_pages with current and current->mm).
|
||||
*
|
||||
* Completely moved to the 6 argument version of get_user_pages -
|
||||
* 2016 Apr 4: c12d2da56d0e07d230968ee2305aaa86b93a6832
|
||||
* Completely moved to the 6 argument version of get_user_pages() by
|
||||
* commit c12d2da56d0e ("mm/gup: Remove the macro overload API migration
|
||||
* helpers from the get_user*() APIs") in v4.6-rc4.
|
||||
*
|
||||
* write and force parameters were replaced with gup_flags by -
|
||||
* 2016 Oct 12: 768ae309a96103ed02eb1e111e838c87854d8b51
|
||||
* write and force parameters were replaced with gup_flags by
|
||||
* commit 768ae309a961 ("mm: replace get_user_pages() write/force parameters
|
||||
* with gup_flags") in v4.9.
|
||||
*
|
||||
* A 7-argument version of get_user_pages was introduced into linux-4.4.y by
|
||||
* commit 8e50b8b07f462ab4b91bc1491b1c91bd75e4ad40 which cherry-picked the
|
||||
* replacement of the write and force parameters with gup_flags
|
||||
* commit 8e50b8b07f462 ("mm: replace get_user_pages() write/force parameters
|
||||
* with gup_flags") which cherry-picked the replacement of the write and
|
||||
* force parameters with gup_flags.
|
||||
*
|
||||
* Removed vmas parameter from get_user_pages() by commit 7bbf9c8c99
|
||||
* ("mm/gup: remove unused vmas parameter from get_user_pages()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-17)
|
||||
* Removed vmas parameter from get_user_pages() by commit 54d020692b34
|
||||
* ("mm/gup: remove unused vmas parameter from get_user_pages()") in v6.5.
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -112,18 +115,19 @@ typedef int vm_fault_t;
|
||||
}
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS
|
||||
|
||||
/* pin_user_pages_remote
|
||||
/*
|
||||
* pin_user_pages_remote()
|
||||
*
|
||||
* pin_user_pages_remote() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6 (2020-01-30)
|
||||
* pin_user_pages_remote() was added by commit eddb1c228f79
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6.
|
||||
*
|
||||
* pin_user_pages_remote() removed 'tsk' parameter by commit
|
||||
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
|
||||
* in v5.9-rc1 (2020-08-11). *
|
||||
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
|
||||
* in v5.9.
|
||||
*
|
||||
* Removed unused vmas parameter from pin_user_pages_remote() by commit
|
||||
* 83bcc2e132("mm/gup: remove unused vmas parameter from pin_user_pages_remote()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-14)
|
||||
* 0b295316b3a9 ("mm/gup: remove unused vmas parameter from
|
||||
* pin_user_pages_remote()") in v6.5.
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -143,7 +147,7 @@ typedef int vm_fault_t;
|
||||
|
||||
/*
|
||||
* get_user_pages_remote() was added by commit 1e9877902dc7
|
||||
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6 (2016-02-12).
|
||||
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6.
|
||||
*
|
||||
* Note that get_user_pages_remote() requires the caller to hold a reference on
|
||||
* the task_struct (if non-NULL and if this API has tsk argument) and the mm_struct.
|
||||
@@ -153,19 +157,17 @@ typedef int vm_fault_t;
|
||||
*
|
||||
* get_user_pages_remote() write/force parameters were replaced
|
||||
* with gup_flags by commit 9beae1ea8930 ("mm: replace get_user_pages_remote()
|
||||
* write/force parameters with gup_flags") in v4.9 (2016-10-13).
|
||||
* write/force parameters with gup_flags") in v4.9.
|
||||
*
|
||||
* get_user_pages_remote() added 'locked' parameter by commit 5b56d49fc31d
|
||||
* ("mm: add locked parameter to get_user_pages_remote()") in
|
||||
* v4.10 (2016-12-14).
|
||||
* ("mm: add locked parameter to get_user_pages_remote()") in v4.10.
|
||||
*
|
||||
* get_user_pages_remote() removed 'tsk' parameter by
|
||||
* commit 64019a2e467a ("mm/gup: remove task_struct pointer for
|
||||
* all gup code") in v5.9-rc1 (2020-08-11).
|
||||
* all gup code") in v5.9.
|
||||
*
|
||||
* Removed vmas parameter from get_user_pages_remote() by commit a4bde14d549
|
||||
* ("mm/gup: remove vmas parameter from get_user_pages_remote()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-14)
|
||||
* Removed vmas parameter from get_user_pages_remote() by commit ca5e863233e8
|
||||
* ("mm/gup: remove vmas parameter from get_user_pages_remote()") in v6.5.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -110,15 +110,15 @@ typedef enum _TEGRASOC_WHICH_CLK
|
||||
TEGRASOC_WHICH_CLK_DSIPLL_CLKOUTPN,
|
||||
TEGRASOC_WHICH_CLK_DSIPLL_CLKOUTA,
|
||||
TEGRASOC_WHICH_CLK_SPPLL0_VCO,
|
||||
TEGRASOC_WHICH_CLK_SPPLL0_CLKOUTPN,
|
||||
TEGRASOC_WHICH_CLK_SPPLL0_CLKOUTA,
|
||||
TEGRASOC_WHICH_CLK_SPPLL0_CLKOUTB,
|
||||
TEGRASOC_WHICH_CLK_SPPLL0_CLKOUTPN,
|
||||
TEGRASOC_WHICH_CLK_SPPLL1_CLKOUTPN,
|
||||
TEGRASOC_WHICH_CLK_SPPLL0_DIV27,
|
||||
TEGRASOC_WHICH_CLK_SPPLL1_DIV27,
|
||||
TEGRASOC_WHICH_CLK_SPPLL0_DIV10,
|
||||
TEGRASOC_WHICH_CLK_SPPLL0_DIV25,
|
||||
TEGRASOC_WHICH_CLK_SPPLL0_DIV27,
|
||||
TEGRASOC_WHICH_CLK_SPPLL1_VCO,
|
||||
TEGRASOC_WHICH_CLK_SPPLL1_CLKOUTPN,
|
||||
TEGRASOC_WHICH_CLK_SPPLL1_DIV27,
|
||||
TEGRASOC_WHICH_CLK_VPLL0_REF,
|
||||
TEGRASOC_WHICH_CLK_VPLL0,
|
||||
TEGRASOC_WHICH_CLK_VPLL1,
|
||||
@@ -132,7 +132,7 @@ typedef enum _TEGRASOC_WHICH_CLK
|
||||
TEGRASOC_WHICH_CLK_DSI_PIXEL,
|
||||
TEGRASOC_WHICH_CLK_PRE_SOR0,
|
||||
TEGRASOC_WHICH_CLK_PRE_SOR1,
|
||||
TEGRASOC_WHICH_CLK_DP_LINK_REF,
|
||||
TEGRASOC_WHICH_CLK_DP_LINKA_REF,
|
||||
TEGRASOC_WHICH_CLK_SOR_LINKA_INPUT,
|
||||
TEGRASOC_WHICH_CLK_SOR_LINKA_AFIFO,
|
||||
TEGRASOC_WHICH_CLK_SOR_LINKA_AFIFO_M,
|
||||
@@ -143,7 +143,7 @@ typedef enum _TEGRASOC_WHICH_CLK
|
||||
TEGRASOC_WHICH_CLK_PLLHUB,
|
||||
TEGRASOC_WHICH_CLK_SOR0,
|
||||
TEGRASOC_WHICH_CLK_SOR1,
|
||||
TEGRASOC_WHICH_CLK_SOR_PAD_INPUT,
|
||||
TEGRASOC_WHICH_CLK_SOR_PADA_INPUT,
|
||||
TEGRASOC_WHICH_CLK_PRE_SF0,
|
||||
TEGRASOC_WHICH_CLK_SF0,
|
||||
TEGRASOC_WHICH_CLK_SF1,
|
||||
@@ -332,7 +332,9 @@ typedef struct nv_soc_irq_info_s {
|
||||
|
||||
#define NV_MAX_SOC_IRQS 6
|
||||
#define NV_MAX_DPAUX_NUM_DEVICES 4
|
||||
#define NV_MAX_SOC_DPAUX_NUM_DEVICES 2 // From SOC_DEV_MAPPING
|
||||
|
||||
#define NV_MAX_SOC_DPAUX_NUM_DEVICES 2
|
||||
|
||||
|
||||
#define NV_IGPU_LEGACY_STALL_IRQ 70
|
||||
#define NV_IGPU_MAX_STALL_IRQS 3
|
||||
@@ -495,12 +497,6 @@ typedef struct nv_state_t
|
||||
} iommus;
|
||||
} nv_state_t;
|
||||
|
||||
// These define need to be in sync with defines in system.h
|
||||
#define OS_TYPE_LINUX 0x1
|
||||
#define OS_TYPE_FREEBSD 0x2
|
||||
#define OS_TYPE_SUNOS 0x3
|
||||
#define OS_TYPE_VMWARE 0x4
|
||||
|
||||
#define NVFP_TYPE_NONE 0x0
|
||||
#define NVFP_TYPE_REFCOUNTED 0x1
|
||||
#define NVFP_TYPE_REGISTERED 0x2
|
||||
@@ -609,6 +605,15 @@ typedef enum
|
||||
NV_POWER_STATE_RUNNING
|
||||
} nv_power_state_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
const char *vidmem_power_status;
|
||||
const char *dynamic_power_status;
|
||||
const char *gc6_support;
|
||||
const char *gcoff_support;
|
||||
const char *s0ix_status;
|
||||
} nv_power_info_t;
|
||||
|
||||
#define NV_PRIMARY_VGA(nv) ((nv)->primary_vga)
|
||||
|
||||
#define NV_IS_CTL_DEVICE(nv) ((nv)->flags & NV_FLAG_CONTROL)
|
||||
@@ -778,7 +783,7 @@ nv_state_t* NV_API_CALL nv_get_ctl_state (void);
|
||||
|
||||
void NV_API_CALL nv_set_dma_address_size (nv_state_t *, NvU32 );
|
||||
|
||||
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU32, NvU32, NvU64, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU64, NvU32, NvU32, NvU64, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvU64, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_free_pages (nv_state_t *, NvU32, NvBool, NvU32, void *);
|
||||
|
||||
@@ -822,6 +827,7 @@ void NV_API_CALL nv_acpi_methods_init (NvU32 *);
|
||||
void NV_API_CALL nv_acpi_methods_uninit (void);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_acpi_method (NvU32, NvU32, NvU32, void *, NvU16, NvU32 *, void *, NvU16 *);
|
||||
NV_STATUS NV_API_CALL nv_acpi_d3cold_dsm_for_upstream_port (nv_state_t *, NvU8 *, NvU32, NvU32, NvU32 *);
|
||||
NV_STATUS NV_API_CALL nv_acpi_dsm_method (nv_state_t *, NvU8 *, NvU32, NvBool, NvU32, void *, NvU16, NvU32 *, void *, NvU16 *);
|
||||
NV_STATUS NV_API_CALL nv_acpi_ddc_method (nv_state_t *, void *, NvU32 *, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_acpi_dod_method (nv_state_t *, NvU32 *, NvU32 *);
|
||||
@@ -883,8 +889,6 @@ void NV_API_CALL nv_cap_drv_exit(void);
|
||||
NvBool NV_API_CALL nv_is_gpu_accessible(nv_state_t *);
|
||||
NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
|
||||
|
||||
NvU32 NV_API_CALL nv_get_os_type(void);
|
||||
|
||||
void NV_API_CALL nv_get_updated_emu_seg(NvU32 *start, NvU32 *end);
|
||||
void NV_API_CALL nv_get_screen_info(nv_state_t *, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64 *);
|
||||
|
||||
@@ -990,10 +994,10 @@ NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU6
|
||||
NV_STATUS NV_API_CALL rm_p2p_destroy_mapping (nvidia_stack_t *, NvU64);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, NvU64, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU8 **, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_gpu_info (nvidia_stack_t *, NvU64, NvU64, NvU8 **, void **);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, void *, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, void *, void *, void **);
|
||||
NV_STATUS NV_API_CALL rm_p2p_register_callback (nvidia_stack_t *, NvU64, NvU64, NvU64, void *, void (*)(void *), void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
|
||||
void NV_API_CALL rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
|
||||
@@ -1027,9 +1031,7 @@ void NV_API_CALL rm_enable_dynamic_power_management(nvidia_stack_t *, nv_s
|
||||
NV_STATUS NV_API_CALL rm_ref_dynamic_power(nvidia_stack_t *, nv_state_t *, nv_dynamic_power_mode_t);
|
||||
void NV_API_CALL rm_unref_dynamic_power(nvidia_stack_t *, nv_state_t *, nv_dynamic_power_mode_t);
|
||||
NV_STATUS NV_API_CALL rm_transition_dynamic_power(nvidia_stack_t *, nv_state_t *, NvBool, NvBool *);
|
||||
const char* NV_API_CALL rm_get_vidmem_power_status(nvidia_stack_t *, nv_state_t *);
|
||||
const char* NV_API_CALL rm_get_dynamic_power_management_status(nvidia_stack_t *, nv_state_t *);
|
||||
const char* NV_API_CALL rm_get_gpu_gcx_support(nvidia_stack_t *, nv_state_t *, NvBool);
|
||||
void NV_API_CALL rm_get_power_info(nvidia_stack_t *, nv_state_t *, nv_power_info_t *);
|
||||
|
||||
void NV_API_CALL rm_acpi_notify(nvidia_stack_t *, nv_state_t *, NvU32);
|
||||
void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
|
||||
@@ -1041,13 +1043,12 @@ NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, c
|
||||
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *, NvBool *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *,
|
||||
NvU64 *, NvU64 *, NvU32 *, NvBool *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_hbm_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU64 *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_sparse_mmap(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 **, NvU64 **, NvU32 *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_update_request(nvidia_stack_t *, const NvU8 *, NvU32, NvU64 *, NvU64 *, const char *);
|
||||
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *);
|
||||
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *, NvU32, NvBool *);
|
||||
NV_STATUS NV_API_CALL nv_gpu_unbind_event(nvidia_stack_t *, NvU32, NvBool *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_access_params_t*);
|
||||
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);
|
||||
@@ -1078,6 +1079,9 @@ NV_STATUS NV_API_CALL rm_run_nano_timer_callback(nvidia_stack_t *, nv_state_t
|
||||
void NV_API_CALL nv_cancel_nano_timer(nv_state_t *, nv_nano_timer_t *);
|
||||
void NV_API_CALL nv_destroy_nano_timer(nv_state_t *nv, nv_nano_timer_t *);
|
||||
|
||||
// Host1x specific functions.
|
||||
NV_STATUS NV_API_CALL nv_get_syncpoint_aperture(NvU32, NvU64 *, NvU64 *, NvU32 *);
|
||||
|
||||
#if defined(NVCPU_X86_64)
|
||||
|
||||
static inline NvU64 nv_rdtsc(void)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -1505,23 +1505,35 @@ NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
|
||||
void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslUpdateContext
|
||||
nvUvmInterfaceCslRotateKey
|
||||
|
||||
Updates a context after a key rotation event and can only be called once per
|
||||
key rotation event. Following a key rotation event, and before
|
||||
nvUvmInterfaceCslUpdateContext is called, data encrypted by the GPU with the
|
||||
previous key can be decrypted with nvUvmInterfaceCslDecrypt.
|
||||
Disables channels and rotates keys.
|
||||
|
||||
Locking: This function acquires an API lock.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
This function disables channels and rotates associated keys. The channels
|
||||
associated with the given CSL contexts must be idled before this function is
|
||||
called. To trigger key rotation all allocated channels for a given key must
|
||||
be present in the list. If the function returns successfully then the CSL
|
||||
contexts have been updated with the new key.
|
||||
|
||||
Locking: This function attempts to acquire the GPU lock. In case of failure
|
||||
to acquire the return code is NV_ERR_STATE_IN_USE. The caller must
|
||||
guarantee that no CSL function, including this one, is invoked
|
||||
concurrently with the CSL contexts in contextList.
|
||||
Memory : This function dynamically allocates memory.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN] - The CSL context associated with a channel.
|
||||
|
||||
contextList[IN/OUT] - An array of pointers to CSL contexts.
|
||||
contextListCount[IN] - Number of CSL contexts in contextList. Its value
|
||||
must be greater than 0.
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - The CSL context is not associated with a channel.
|
||||
NV_ERR_INVALID_ARGUMENT - contextList is NULL or contextListCount is 0.
|
||||
NV_ERR_STATE_IN_USE - Unable to acquire lock / resource. Caller
|
||||
can retry at a later time.
|
||||
NV_ERR_GENERIC - A failure other than _STATE_IN_USE occurred
|
||||
when attempting to acquire a lock.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext);
|
||||
NV_STATUS nvUvmInterfaceCslRotateKey(UvmCslContext *contextList[],
|
||||
NvU32 contextListCount);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslRotateIv
|
||||
@@ -1529,17 +1541,13 @@ NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext);
|
||||
Rotates the IV for a given channel and operation.
|
||||
|
||||
This function will rotate the IV on both the CPU and the GPU.
|
||||
Outstanding messages that have been encrypted by the GPU should first be
|
||||
decrypted before calling this function with operation equal to
|
||||
UVM_CSL_OPERATION_DECRYPT. Similarly, outstanding messages that have been
|
||||
encrypted by the CPU should first be decrypted before calling this function
|
||||
with operation equal to UVM_CSL_OPERATION_ENCRYPT. For a given operation
|
||||
the channel must be idle before calling this function. This function can be
|
||||
called regardless of the value of the IV's message counter.
|
||||
For a given operation the channel must be idle before calling this function.
|
||||
This function can be called regardless of the value of the IV's message counter.
|
||||
|
||||
Locking: This function attempts to acquire the GPU lock.
|
||||
In case of failure to acquire the return code
|
||||
is NV_ERR_STATE_IN_USE.
|
||||
Locking: This function attempts to acquire the GPU lock. In case of failure to
|
||||
acquire the return code is NV_ERR_STATE_IN_USE. The caller must guarantee
|
||||
that no CSL function, including this one, is invoked concurrently with
|
||||
the same CSL context.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
@@ -1573,8 +1581,8 @@ NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
|
||||
However, it is optional. If it is NULL, the next IV in line will be used.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
@@ -1610,9 +1618,14 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
|
||||
maximized when the input and output buffers are 16-byte aligned. This is
|
||||
natural alignment for AES block.
|
||||
|
||||
During a key rotation event the previous key is stored in the CSL context.
|
||||
This allows data encrypted by the GPU to be decrypted with the previous key.
|
||||
The keyRotationId parameter identifies which key is used. The first key rotation
|
||||
ID has a value of 0 that increments by one for each key rotation event.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
@@ -1622,6 +1635,8 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
|
||||
decryptIv[IN] - IV used to decrypt the ciphertext. Its value can either be given by
|
||||
nvUvmInterfaceCslIncrementIv, or, if NULL, the CSL context's
|
||||
internal counter is used.
|
||||
keyRotationId[IN] - Specifies the key that is used for decryption.
|
||||
A value of NV_U32_MAX specifies the current key.
|
||||
inputBuffer[IN] - Address of ciphertext input buffer.
|
||||
outputBuffer[OUT] - Address of plaintext output buffer.
|
||||
addAuthData[IN] - Address of the plaintext additional authenticated data used to
|
||||
@@ -1642,6 +1657,7 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize,
|
||||
NvU8 const *inputBuffer,
|
||||
UvmCslIv const *decryptIv,
|
||||
NvU32 keyRotationId,
|
||||
NvU8 *outputBuffer,
|
||||
NvU8 const *addAuthData,
|
||||
NvU32 addAuthDataSize,
|
||||
@@ -1656,8 +1672,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
|
||||
undefined behavior.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
@@ -1685,8 +1701,8 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
@@ -1711,8 +1727,8 @@ NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
|
||||
the returned IV can be used in nvUvmInterfaceCslDecrypt.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
@@ -1734,28 +1750,41 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
|
||||
UvmCslIv *iv);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslLogExternalEncryption
|
||||
nvUvmInterfaceCslLogEncryption
|
||||
|
||||
Checks and logs information about non-CSL encryptions, such as those that
|
||||
originate from the GPU.
|
||||
Checks and logs information about encryptions associated with the given
|
||||
CSL context.
|
||||
|
||||
This function does not modify elements of the UvmCslContext.
|
||||
For contexts associated with channels, this function does not modify elements of
|
||||
the UvmCslContext, and must be called for every CPU/GPU encryption.
|
||||
|
||||
For the context associated with fault buffers, bufferSize can encompass multiple
|
||||
encryption invocations, and the UvmCslContext will be updated following a key
|
||||
rotation event.
|
||||
|
||||
In either case the IV remains unmodified after this function is called.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
bufferSize[OUT] - The size of the buffer encrypted by the
|
||||
operation[IN] - If the CSL context is associated with a fault
|
||||
buffer, this argument is ignored. If it is
|
||||
associated with a channel, it must be either
|
||||
- UVM_CSL_OPERATION_ENCRYPT
|
||||
- UVM_CSL_OPERATION_DECRYPT
|
||||
bufferSize[IN] - The size of the buffer(s) encrypted by the
|
||||
external entity in units of bytes.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The device encryption would cause a counter
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The encryption would cause a counter
|
||||
to overflow.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslLogExternalEncryption(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize);
|
||||
NV_STATUS nvUvmInterfaceCslLogEncryption(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation,
|
||||
NvU32 bufferSize);
|
||||
|
||||
#endif // _NV_UVM_INTERFACE_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -39,12 +39,13 @@
|
||||
// are multiple BIG page sizes in RM. These defines are used as flags to "0"
|
||||
// should be OK when user is not sure which pagesize allocation it wants
|
||||
//
|
||||
#define UVM_PAGE_SIZE_DEFAULT 0x0
|
||||
#define UVM_PAGE_SIZE_4K 0x1000
|
||||
#define UVM_PAGE_SIZE_64K 0x10000
|
||||
#define UVM_PAGE_SIZE_128K 0x20000
|
||||
#define UVM_PAGE_SIZE_2M 0x200000
|
||||
#define UVM_PAGE_SIZE_512M 0x20000000
|
||||
#define UVM_PAGE_SIZE_DEFAULT 0x0ULL
|
||||
#define UVM_PAGE_SIZE_4K 0x1000ULL
|
||||
#define UVM_PAGE_SIZE_64K 0x10000ULL
|
||||
#define UVM_PAGE_SIZE_128K 0x20000ULL
|
||||
#define UVM_PAGE_SIZE_2M 0x200000ULL
|
||||
#define UVM_PAGE_SIZE_512M 0x20000000ULL
|
||||
#define UVM_PAGE_SIZE_256G 0x4000000000ULL
|
||||
|
||||
//
|
||||
// When modifying flags, make sure they are compatible with the mirrored
|
||||
@@ -267,6 +268,7 @@ typedef struct UvmGpuChannelInfo_tag
|
||||
|
||||
// The errorNotifier is filled out when the channel hits an RC error.
|
||||
NvNotification *errorNotifier;
|
||||
NvNotification *keyRotationNotifier;
|
||||
|
||||
NvU32 hwRunlistId;
|
||||
NvU32 hwChannelId;
|
||||
@@ -292,13 +294,13 @@ typedef struct UvmGpuChannelInfo_tag
|
||||
|
||||
// GPU VAs of both GPFIFO and GPPUT are needed in Confidential Computing
|
||||
// so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
|
||||
NvU64 gpFifoGpuVa;
|
||||
NvU64 gpPutGpuVa;
|
||||
NvU64 gpGetGpuVa;
|
||||
NvU64 gpFifoGpuVa;
|
||||
NvU64 gpPutGpuVa;
|
||||
NvU64 gpGetGpuVa;
|
||||
// GPU VA of work submission offset is needed in Confidential Computing
|
||||
// so CE channels can ring doorbell of other channels as required for
|
||||
// WLC/LCIC work submission
|
||||
NvU64 workSubmissionOffsetGpuVa;
|
||||
NvU64 workSubmissionOffsetGpuVa;
|
||||
} UvmGpuChannelInfo;
|
||||
|
||||
typedef enum
|
||||
@@ -394,6 +396,7 @@ typedef enum
|
||||
UVM_LINK_TYPE_NVLINK_2,
|
||||
UVM_LINK_TYPE_NVLINK_3,
|
||||
UVM_LINK_TYPE_NVLINK_4,
|
||||
UVM_LINK_TYPE_NVLINK_5,
|
||||
UVM_LINK_TYPE_C2C,
|
||||
} UVM_LINK_TYPE;
|
||||
|
||||
@@ -565,11 +568,6 @@ typedef struct UvmGpuP2PCapsParams_tag
|
||||
// second, not taking into account the protocols overhead. The reported
|
||||
// bandwidth for indirect peers is zero.
|
||||
NvU32 totalLinkLineRateMBps;
|
||||
|
||||
// Out: True if the peers have a indirect link to communicate. On P9
|
||||
// systems, this is true if peers are connected to different NPUs that
|
||||
// forward the requests between them.
|
||||
NvU32 indirectAccess : 1;
|
||||
} UvmGpuP2PCapsParams;
|
||||
|
||||
// Platform-wide information
|
||||
@@ -604,6 +602,8 @@ typedef struct UvmGpuConfComputeCaps_tag
|
||||
{
|
||||
// Out: GPU's confidential compute mode
|
||||
UvmGpuConfComputeMode mode;
|
||||
// Is key rotation enabled for UVM keys
|
||||
NvBool bKeyRotationEnabled;
|
||||
} UvmGpuConfComputeCaps;
|
||||
|
||||
#define UVM_GPU_NAME_LENGTH 0x40
|
||||
@@ -706,6 +706,13 @@ typedef struct UvmGpuInfo_tag
|
||||
|
||||
// EGM base address to offset in the GMMU PTE entry for EGM mappings
|
||||
NvU64 egmBaseAddr;
|
||||
|
||||
// If connectedToSwitch is NV_TRUE,
|
||||
// nvswitchEgmMemoryWindowStart tells the base address for the GPU's EGM memory in the
|
||||
// NVSwitch address space. It is used when creating PTEs of GPU memory mappings
|
||||
// to NVSwitch peers.
|
||||
NvU64 nvswitchEgmMemoryWindowStart;
|
||||
|
||||
} UvmGpuInfo;
|
||||
|
||||
typedef struct UvmGpuFbInfo_tag
|
||||
@@ -1086,4 +1093,21 @@ typedef enum UvmCslOperation
|
||||
UVM_CSL_OPERATION_DECRYPT
|
||||
} UvmCslOperation;
|
||||
|
||||
typedef enum UVM_KEY_ROTATION_STATUS {
|
||||
// Key rotation complete/not in progress
|
||||
UVM_KEY_ROTATION_STATUS_IDLE = 0,
|
||||
// RM is waiting for clients to report their channels are idle for key rotation
|
||||
UVM_KEY_ROTATION_STATUS_PENDING = 1,
|
||||
// Key rotation is in progress
|
||||
UVM_KEY_ROTATION_STATUS_IN_PROGRESS = 2,
|
||||
// Key rotation timeout failure, RM will RC non-idle channels.
|
||||
// UVM should never see this status value.
|
||||
UVM_KEY_ROTATION_STATUS_FAILED_TIMEOUT = 3,
|
||||
// Key rotation failed because upper threshold was crossed, RM will RC non-idle channels
|
||||
UVM_KEY_ROTATION_STATUS_FAILED_THRESHOLD = 4,
|
||||
// Internal RM failure while rotating keys for a certain channel, RM will RC the channel.
|
||||
UVM_KEY_ROTATION_STATUS_FAILED_ROTATION = 5,
|
||||
UVM_KEY_ROTATION_STATUS_MAX_COUNT = 6,
|
||||
} UVM_KEY_ROTATION_STATUS;
|
||||
|
||||
#endif // _NV_UVM_TYPES_H_
|
||||
|
||||
@@ -440,9 +440,9 @@ struct NvKmsLayerCapabilities {
|
||||
NvBool supportsWindowMode :1;
|
||||
|
||||
/*!
|
||||
* Whether layer supports HDR pipe.
|
||||
* Whether layer supports ICtCp pipe.
|
||||
*/
|
||||
NvBool supportsHDR :1;
|
||||
NvBool supportsICtCp :1;
|
||||
|
||||
|
||||
/*!
|
||||
|
||||
@@ -158,13 +158,17 @@ struct NvKmsKapiDeviceResourcesInfo {
|
||||
|
||||
NvU32 hasVideoMemory;
|
||||
|
||||
NvU32 numDisplaySemaphores;
|
||||
|
||||
NvU8 genericPageKind;
|
||||
|
||||
NvBool supportsSyncpts;
|
||||
|
||||
NvBool requiresVrrSemaphores;
|
||||
} caps;
|
||||
|
||||
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
|
||||
NvBool supportsHDR[NVKMS_KAPI_LAYER_MAX];
|
||||
NvBool supportsICtCp[NVKMS_KAPI_LAYER_MAX];
|
||||
};
|
||||
|
||||
#define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
|
||||
@@ -210,18 +214,26 @@ struct NvKmsKapiStaticDisplayInfo {
|
||||
NvU32 headMask;
|
||||
};
|
||||
|
||||
struct NvKmsKapiSyncpt {
|
||||
struct NvKmsKapiSyncParams {
|
||||
union {
|
||||
struct {
|
||||
/*!
|
||||
* Possible syncpt use case in kapi.
|
||||
* For pre-syncpt, use only id and value
|
||||
* and for post-syncpt, use only fd.
|
||||
*/
|
||||
NvU32 preSyncptId;
|
||||
NvU32 preSyncptValue;
|
||||
} syncpt;
|
||||
|
||||
/*!
|
||||
* Possible syncpt use case in kapi.
|
||||
* For pre-syncpt, use only id and value
|
||||
* and for post-syncpt, use only fd.
|
||||
*/
|
||||
NvBool preSyncptSpecified;
|
||||
NvU32 preSyncptId;
|
||||
NvU32 preSyncptValue;
|
||||
struct {
|
||||
NvU32 index;
|
||||
} semaphore;
|
||||
} u;
|
||||
|
||||
NvBool postSyncptRequested;
|
||||
NvBool preSyncptSpecified;
|
||||
NvBool postSyncptRequested;
|
||||
NvBool semaphoreSpecified;
|
||||
};
|
||||
|
||||
struct NvKmsKapiLayerConfig {
|
||||
@@ -231,7 +243,7 @@ struct NvKmsKapiLayerConfig {
|
||||
NvU8 surfaceAlpha;
|
||||
} compParams;
|
||||
struct NvKmsRRParams rrParams;
|
||||
struct NvKmsKapiSyncpt syncptParams;
|
||||
struct NvKmsKapiSyncParams syncParams;
|
||||
|
||||
struct {
|
||||
struct NvKmsHDRStaticMetadata val;
|
||||
@@ -319,7 +331,6 @@ struct NvKmsKapiHeadModeSetConfig {
|
||||
|
||||
struct {
|
||||
struct {
|
||||
NvBool specified;
|
||||
NvU32 depth;
|
||||
NvU32 start;
|
||||
NvU32 end;
|
||||
@@ -327,7 +338,6 @@ struct NvKmsKapiHeadModeSetConfig {
|
||||
} input;
|
||||
|
||||
struct {
|
||||
NvBool specified;
|
||||
NvBool enabled;
|
||||
struct NvKmsLutRamps *pRamps;
|
||||
} output;
|
||||
@@ -342,7 +352,8 @@ struct NvKmsKapiHeadRequestedConfig {
|
||||
NvBool modeChanged : 1;
|
||||
NvBool hdrInfoFrameChanged : 1;
|
||||
NvBool colorimetryChanged : 1;
|
||||
NvBool lutChanged : 1;
|
||||
NvBool ilutChanged : 1;
|
||||
NvBool olutChanged : 1;
|
||||
} flags;
|
||||
|
||||
struct NvKmsKapiCursorRequestedConfig cursorRequestedConfig;
|
||||
@@ -368,6 +379,8 @@ struct NvKmsKapiHeadReplyConfig {
|
||||
|
||||
struct NvKmsKapiModeSetReplyConfig {
|
||||
enum NvKmsFlipResult flipResult;
|
||||
NvBool vrrFlip;
|
||||
NvS32 vrrSemaphoreIndex;
|
||||
struct NvKmsKapiHeadReplyConfig
|
||||
headReplyConfig[NVKMS_KAPI_MAX_HEADS];
|
||||
};
|
||||
@@ -1410,6 +1423,87 @@ struct NvKmsKapiFunctionsTable {
|
||||
(
|
||||
NvKmsKapiSuspendResumeCallbackFunc *function
|
||||
);
|
||||
|
||||
/*!
|
||||
* Immediately initialize the specified display semaphore to the pending state.
|
||||
*
|
||||
* Must be called prior to applying a mode set that utilizes the specified
|
||||
* display semaphore for synchronization.
|
||||
*
|
||||
* \param [in] device The device which will utilize the semaphore.
|
||||
*
|
||||
* \param [in] semaphoreIndex Index of the desired semaphore within the
|
||||
* NVKMS semaphore pool. Must be less than
|
||||
* NvKmsKapiDeviceResourcesInfo::caps::numDisplaySemaphores
|
||||
* for the specified device.
|
||||
*/
|
||||
NvBool
|
||||
(*tryInitDisplaySemaphore)
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvU32 semaphoreIndex
|
||||
);
|
||||
|
||||
/*!
|
||||
* Immediately set the specified display semaphore to the displayable state.
|
||||
*
|
||||
* Must be called after \ref tryInitDisplaySemaphore to indicate a mode
|
||||
* configuration change that utilizes the specified display semaphore for
|
||||
* synchronization may proceed.
|
||||
*
|
||||
* \param [in] device The device which will utilize the semaphore.
|
||||
*
|
||||
* \param [in] semaphoreIndex Index of the desired semaphore within the
|
||||
* NVKMS semaphore pool. Must be less than
|
||||
* NvKmsKapiDeviceResourcesInfo::caps::numDisplaySemaphores
|
||||
* for the specified device.
|
||||
*/
|
||||
void
|
||||
(*signalDisplaySemaphore)
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvU32 semaphoreIndex
|
||||
);
|
||||
|
||||
/*!
|
||||
* Immediately cancel use of a display semaphore by resetting its value to
|
||||
* its initial state.
|
||||
*
|
||||
* This can be used by clients to restore a semaphore to a consistent state
|
||||
* when they have prepared it for use by previously calling
|
||||
* \ref tryInitDisplaySemaphore() on it, but are then prevented from
|
||||
* submitting the associated hardware operations to consume it due to the
|
||||
* subsequent failure of some software or hardware operation.
|
||||
*
|
||||
* \param [in] device The device which will utilize the semaphore.
|
||||
*
|
||||
* \param [in] semaphoreIndex Index of the desired semaphore within the
|
||||
* NVKMS semaphore pool. Must be less than
|
||||
* NvKmsKapiDeviceResourcesInfo::caps::numDisplaySemaphores
|
||||
* for the specified device.
|
||||
*/
|
||||
void
|
||||
(*cancelDisplaySemaphore)
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvU32 semaphoreIndex
|
||||
);
|
||||
|
||||
/*!
|
||||
* Signal the VRR semaphore at the specified index from the CPU.
|
||||
* If device does not support VRR semaphores, this is a no-op.
|
||||
* Returns true if signal is success or no-op, otherwise returns false.
|
||||
*
|
||||
* \param [in] device A device allocated using allocateDevice().
|
||||
*
|
||||
* \param [in] index The VRR semaphore index to be signalled.
|
||||
*/
|
||||
NvBool
|
||||
(*signalVrrSemaphore)
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvS32 index
|
||||
);
|
||||
};
|
||||
|
||||
/** @} */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -67,6 +67,9 @@ extern "C" {
|
||||
#define NVBIT64(b) NVBIT_TYPE(b, NvU64)
|
||||
#endif
|
||||
|
||||
//Concatenate 2 32bit values to a 64bit value
|
||||
#define NV_CONCAT_32_TO_64(hi, lo) ((((NvU64)hi) << 32) | ((NvU64)lo))
|
||||
|
||||
// Helper macro's for 32 bit bitmasks
|
||||
#define NV_BITMASK32_ELEMENT_SIZE (sizeof(NvU32) << 3)
|
||||
#define NV_BITMASK32_IDX(chId) (((chId) & ~(0x1F)) >> 5)
|
||||
@@ -494,6 +497,23 @@ do \
|
||||
//
|
||||
#define NV_TWO_N_MINUS_ONE(n) (((1ULL<<(n/2))<<((n+1)/2))-1)
|
||||
|
||||
//
|
||||
// Create a 64b bitmask with n bits set
|
||||
// This is the same as ((1ULL<<n) - 1), but it doesn't overflow for n=64
|
||||
//
|
||||
// ...
|
||||
// n=-1, 0x0000000000000000
|
||||
// n=0, 0x0000000000000000
|
||||
// n=1, 0x0000000000000001
|
||||
// ...
|
||||
// n=63, 0x7FFFFFFFFFFFFFFF
|
||||
// n=64, 0xFFFFFFFFFFFFFFFF
|
||||
// n=65, 0xFFFFFFFFFFFFFFFF
|
||||
// n=66, 0xFFFFFFFFFFFFFFFF
|
||||
// ...
|
||||
//
|
||||
#define NV_BITMASK64(n) ((n<1) ? 0ULL : (NV_U64_MAX>>((n>64) ? 0 : (64-n))))
|
||||
|
||||
#define DRF_READ_1WORD_BS(d,r,f,v) \
|
||||
((DRF_EXTENT_MW(NV##d##r##f)<8)?DRF_READ_1BYTE_BS(NV##d##r##f,(v)): \
|
||||
((DRF_EXTENT_MW(NV##d##r##f)<16)?DRF_READ_2BYTE_BS(NV##d##r##f,(v)): \
|
||||
@@ -574,6 +594,13 @@ nvMaskPos32(const NvU32 mask, const NvU32 bitIdx)
|
||||
n32 = BIT_IDX_32(LOWESTBIT(n32));\
|
||||
}
|
||||
|
||||
// Destructive operation on n64
|
||||
#define LOWESTBITIDX_64(n64) \
|
||||
{ \
|
||||
n64 = BIT_IDX_64(LOWESTBIT(n64));\
|
||||
}
|
||||
|
||||
|
||||
// Destructive operation on n32
|
||||
#define HIGHESTBITIDX_32(n32) \
|
||||
{ \
|
||||
@@ -918,6 +945,11 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
|
||||
// Use (lo) if (b) is less than 64, and (hi) if >= 64.
|
||||
//
|
||||
#define NV_BIT_SET_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) |= NVBIT64(b); else (hi) |= NVBIT64( b & 0x3F ); }
|
||||
//
|
||||
// Clear the bit at pos (b) for U64 which is < 128.
|
||||
// Use (lo) if (b) is less than 64, and (hi) if >= 64.
|
||||
//
|
||||
#define NV_BIT_CLEAR_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) &= ~NVBIT64(b); else (hi) &= ~NVBIT64( b & 0x3F ); }
|
||||
|
||||
// Get the number of elements the specified fixed-size array
|
||||
#define NV_ARRAY_ELEMENTS(x) ((sizeof(x)/sizeof((x)[0])))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -151,6 +151,10 @@ NV_STATUS_CODE(NV_ERR_RISCV_ERROR, 0x00000079, "Generic RISC
|
||||
NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT, 0x0000007A, "Fabric Manager is not loaded")
|
||||
NV_STATUS_CODE(NV_ERR_ALREADY_SIGNALLED, 0x0000007B, "Semaphore Surface value already >= requested wait value")
|
||||
NV_STATUS_CODE(NV_ERR_QUEUE_TASK_SLOT_NOT_AVAILABLE, 0x0000007C, "PMU RPC error due to no queue slot available for this event")
|
||||
NV_STATUS_CODE(NV_ERR_KEY_ROTATION_IN_PROGRESS, 0x0000007D, "Operation not allowed as key rotation is in progress")
|
||||
NV_STATUS_CODE(NV_ERR_TEST_ONLY_CODE_NOT_ENABLED, 0x0000007E, "Test-only code path not enabled")
|
||||
NV_STATUS_CODE(NV_ERR_SECURE_BOOT_FAILED, 0x0000007F, "GFW secure boot failed")
|
||||
NV_STATUS_CODE(NV_ERR_INSUFFICIENT_ZBC_ENTRY, 0x00000080, "No more ZBC entry for the client")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
|
||||
@@ -152,6 +152,12 @@ typedef signed short NvS16; /* -32768 to 32767 */
|
||||
(((NvU32)(c) & 0xff) << 8) | \
|
||||
(((NvU32)(d) & 0xff))))
|
||||
|
||||
// Macro to build an NvU64 from two DWORDS, listed from msb to lsb
|
||||
#define NvU64_BUILD(a, b) \
|
||||
((NvU64)( \
|
||||
(((NvU64)(a) & ~0U) << 32) | \
|
||||
(((NvU64)(b) & ~0U))))
|
||||
|
||||
#if NVTYPES_USE_STDINT
|
||||
typedef uint32_t NvV32; /* "void": enumerated or multiple fields */
|
||||
typedef uint32_t NvU32; /* 0 to 4294967295 */
|
||||
|
||||
@@ -151,6 +151,7 @@ void NV_API_CALL os_release_rwlock_read (void *);
|
||||
void NV_API_CALL os_release_rwlock_write (void *);
|
||||
NvBool NV_API_CALL os_semaphore_may_sleep (void);
|
||||
NV_STATUS NV_API_CALL os_get_version_info (os_version_info*);
|
||||
NV_STATUS NV_API_CALL os_get_is_openrm (NvBool *);
|
||||
NvBool NV_API_CALL os_is_isr (void);
|
||||
NvBool NV_API_CALL os_pat_supported (void);
|
||||
void NV_API_CALL os_dump_stack (void);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -21,15 +21,19 @@
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef GSP_ERROR_H
|
||||
#define GSP_ERROR_H
|
||||
#ifndef NV_MEMORY_AREA_H
|
||||
#define NV_MEMORY_AREA_H
|
||||
|
||||
// Definitions for GSP-RM to report errors to CPU-RM via mailbox
|
||||
#define NV_GSP_ERROR_CODE 7:0
|
||||
#define NV_GSP_ERROR_REASON 15:8
|
||||
#define NV_GSP_ERROR_TASK 23:16
|
||||
#define NV_GSP_ERROR_SKIPPED 27:24
|
||||
#define NV_GSP_ERROR_TAG 31:28
|
||||
#define NV_GSP_ERROR_TAG_VAL 0xE
|
||||
typedef struct MemoryRange
|
||||
{
|
||||
NvU64 start;
|
||||
NvU64 size;
|
||||
} MemoryRange;
|
||||
|
||||
#endif // GSP_ERROR_H
|
||||
typedef struct MemoryArea
|
||||
{
|
||||
MemoryRange *pRanges;
|
||||
NvU64 numRanges;
|
||||
} MemoryArea;
|
||||
|
||||
#endif /* NV_MEMORY_AREA_H */
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -103,14 +103,14 @@ NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, n
|
||||
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_update(nvidia_stack_t *, struct ccslContext_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_key(nvidia_stack_t *, UvmCslContext *[], NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt_with_iv(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8*, NvU8 *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU8 *, NvU8 const *, NvU32, NvU8 const *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU32, NvU8 *, NvU8 const *, NvU32, NvU8 const *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_sign(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_query_message_pool(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_increment_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_log_device_encryption(nvidia_stack_t *, struct ccslContext_t *, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_log_encryption(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU32);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -71,7 +71,7 @@ test_header_presence() {
|
||||
TEST_CFLAGS="-E -M $CFLAGS"
|
||||
|
||||
file="$1"
|
||||
file_define=NV_`echo $file | tr '/.\-a-z' '___A-Z'`_PRESENT
|
||||
file_define=NV_`echo $file | tr '/.-' '___' | tr 'a-z' 'A-Z'`_PRESENT
|
||||
|
||||
CODE="#include <$file>"
|
||||
|
||||
@@ -1416,6 +1416,42 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_VFIO_REGISTER_EMULATED_IOMMU_DEV_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
bus_type_has_iommu_ops)
|
||||
#
|
||||
# Determine if 'bus_type' structure has a 'iommu_ops' field.
|
||||
#
|
||||
# This field was removed by commit 17de3f5fdd35 (iommu: Retire bus ops)
|
||||
# in v6.8
|
||||
#
|
||||
CODE="
|
||||
#include <linux/device.h>
|
||||
|
||||
int conftest_bus_type_has_iommu_ops(void) {
|
||||
return offsetof(struct bus_type, iommu_ops);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_BUS_TYPE_HAS_IOMMU_OPS" "" "types"
|
||||
;;
|
||||
|
||||
eventfd_signal_has_counter_arg)
|
||||
#
|
||||
# Determine if eventfd_signal() function has an additional 'counter' argument.
|
||||
#
|
||||
# This argument was removed by commit 3652117f8548 (eventfd: simplify
|
||||
# eventfd_signal()) in v6.8
|
||||
#
|
||||
CODE="
|
||||
#include <linux/eventfd.h>
|
||||
|
||||
void conftest_eventfd_signal_has_counter_arg(void) {
|
||||
struct eventfd_ctx *ctx;
|
||||
|
||||
eventfd_signal(ctx, 1);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_EVENTFD_SIGNAL_HAS_COUNTER_ARG" "" "types"
|
||||
;;
|
||||
|
||||
drm_available)
|
||||
# Determine if the DRM subsystem is usable
|
||||
CODE="
|
||||
@@ -3096,6 +3132,22 @@ compile_test() {
|
||||
|
||||
;;
|
||||
|
||||
foll_longterm_present)
|
||||
#
|
||||
# Determine if FOLL_LONGTERM enum is present or not
|
||||
#
|
||||
# Added by commit 932f4a630a69 ("mm/gup: replace
|
||||
# get_user_pages_longterm() with FOLL_LONGTERM") in
|
||||
# v5.2
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
int foll_longterm = FOLL_LONGTERM;
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FOLL_LONGTERM_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
vfio_pin_pages_has_vfio_device_arg)
|
||||
#
|
||||
# Determine if vfio_pin_pages() kABI accepts "struct vfio_device *"
|
||||
@@ -5152,11 +5204,15 @@ compile_test() {
|
||||
# commit 49a3f51dfeee ("drm/gem: Use struct dma_buf_map in GEM
|
||||
# vmap ops and convert GEM backends") in v5.11.
|
||||
#
|
||||
# Note that the 'map' argument type is changed from 'struct dma_buf_map'
|
||||
# to 'struct iosys_map' by commit 7938f4218168 ("dma-buf-map: Rename
|
||||
# to iosys-map) in v5.18.
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_gem.h>
|
||||
int conftest_drm_gem_object_vmap_has_map_arg(
|
||||
struct drm_gem_object *obj, struct dma_buf_map *map) {
|
||||
return obj->funcs->vmap(obj, map);
|
||||
struct drm_gem_object *obj) {
|
||||
return obj->funcs->vmap(obj, NULL);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_VMAP_HAS_MAP_ARG" "" "types"
|
||||
@@ -5196,25 +5252,23 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PCI_CLASS_MULTIMEDIA_HD_AUDIO_PRESENT" "" "generic"
|
||||
;;
|
||||
|
||||
unsafe_follow_pfn)
|
||||
follow_pfn)
|
||||
#
|
||||
# Determine if unsafe_follow_pfn() is present.
|
||||
# Determine if follow_pfn() is present.
|
||||
#
|
||||
# unsafe_follow_pfn() was added by commit 69bacee7f9ad
|
||||
# ("mm: Add unsafe_follow_pfn") in v5.13-rc1.
|
||||
#
|
||||
# Note: this commit never made it to the linux kernel, so
|
||||
# unsafe_follow_pfn() never existed.
|
||||
# follow_pfn() was added by commit 3b6748e2dd69
|
||||
# ("mm: introduce follow_pfn()") in v2.6.31-rc1, and removed
|
||||
# by commit 233eb0bf3b94 ("mm: remove follow_pfn")
|
||||
# from linux-next 233eb0bf3b94.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
void conftest_unsafe_follow_pfn(void) {
|
||||
unsafe_follow_pfn();
|
||||
void conftest_follow_pfn(void) {
|
||||
follow_pfn();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_UNSAFE_FOLLOW_PFN_PRESENT" "" "functions"
|
||||
compile_check_conftest "$CODE" "NV_FOLLOW_PFN_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_plane_atomic_check_has_atomic_state_arg)
|
||||
#
|
||||
# Determine if drm_plane_helper_funcs::atomic_check takes 'state'
|
||||
@@ -5500,7 +5554,8 @@ compile_test() {
|
||||
|
||||
of_dma_configure)
|
||||
#
|
||||
# Determine if of_dma_configure() function is present
|
||||
# Determine if of_dma_configure() function is present, and how
|
||||
# many arguments it takes.
|
||||
#
|
||||
# Added by commit 591c1ee465ce ("of: configure the platform
|
||||
# device dma parameters") in v3.16. However, it was a static,
|
||||
@@ -5510,17 +5565,69 @@ compile_test() {
|
||||
# commit 1f5c69aa51f9 ("of: Move of_dma_configure() to device.c
|
||||
# to help re-use") in v4.1.
|
||||
#
|
||||
CODE="
|
||||
# It subsequently began taking a third parameter with commit
|
||||
# 3d6ce86ee794 ("drivers: remove force dma flag from buses")
|
||||
# in v4.18.
|
||||
#
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
|
||||
#include <linux/of_device.h>
|
||||
#endif
|
||||
|
||||
void conftest_of_dma_configure(void)
|
||||
{
|
||||
of_dma_configure();
|
||||
}
|
||||
"
|
||||
" > conftest$$.c
|
||||
|
||||
compile_check_conftest "$CODE" "NV_OF_DMA_CONFIGURE_PRESENT" "" "functions"
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT" | append_conftest "functions"
|
||||
else
|
||||
echo "#define NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
|
||||
#include <linux/of_device.h>
|
||||
#endif
|
||||
|
||||
void conftest_of_dma_configure(void) {
|
||||
of_dma_configure(NULL, NULL, false);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 3" | append_conftest "functions"
|
||||
return
|
||||
fi
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
|
||||
#include <linux/of_device.h>
|
||||
#endif
|
||||
|
||||
void conftest_of_dma_configure(void) {
|
||||
of_dma_configure(NULL, NULL);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 2" | append_conftest "functions"
|
||||
return
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
|
||||
icc_get)
|
||||
@@ -6741,12 +6848,45 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_MODE_CREATE_DP_COLORSPACE_PROPERTY_HAS_SUPPORTED_COLORSPACES_ARG" "" "types"
|
||||
;;
|
||||
|
||||
drm_syncobj_features_present)
|
||||
# Determine if DRIVER_SYNCOBJ and DRIVER_SYNCOBJ_TIMELINE DRM
|
||||
# driver features are present. Timeline DRM synchronization objects
|
||||
# may only be used if both of these are supported by the driver.
|
||||
#
|
||||
# DRIVER_SYNCOBJ_TIMELINE Added by commit 060cebb20cdb ("drm:
|
||||
# introduce a capability flag for syncobj timeline support") in
|
||||
# v5.2
|
||||
#
|
||||
# DRIVER_SYNCOBJ Added by commit e9083420bbac ("drm: introduce
|
||||
# sync objects (v4)") in v4.12
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
int features = DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE;"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_SYNCOBJ_FEATURES_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
stack_trace)
|
||||
# Determine if functions stack_trace_{save,print} are present.
|
||||
# Added by commit e9b98e162 ("stacktrace: Provide helpers for
|
||||
# common stack trace operations") in v5.2.
|
||||
CODE="
|
||||
#include <linux/stacktrace.h>
|
||||
void conftest_stack_trace(void) {
|
||||
stack_trace_save();
|
||||
stack_trace_print();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_STACK_TRACE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_unlocked_ioctl_flag_present)
|
||||
# Determine if DRM_UNLOCKED IOCTL flag is present.
|
||||
#
|
||||
# DRM_UNLOCKED was removed by commit 2798ffcc1d6a ("drm: Remove
|
||||
# locking for legacy ioctls and DRM_UNLOCKED") in Linux
|
||||
# next-20231208.
|
||||
# locking for legacy ioctls and DRM_UNLOCKED") in v6.8.
|
||||
#
|
||||
# DRM_UNLOCKED definition was moved from drmP.h to drm_ioctl.h by
|
||||
# commit 2640981f3600 ("drm: document drm_ioctl.[hc]") in v4.12.
|
||||
@@ -6762,6 +6902,94 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
fault_flag_remote_present)
|
||||
# Determine if FAULT_FLAG_REMOTE is present in the kernel, either
|
||||
# as a define or an enum
|
||||
#
|
||||
# FAULT_FLAG_REMOTE define added by Kernel commit 1b2ee1266ea6
|
||||
# ("mm/core: Do not enforce PKEY permissions on remote mm access")
|
||||
# in v4.6
|
||||
# FAULT_FLAG_REMOTE changed from define to enum by Kernel commit
|
||||
# da2f5eb3d344 ("mm/doc: turn fault flags into an enum") in v5.13
|
||||
# FAULT_FLAG_REMOTE moved from `mm.h` to `mm_types.h` by Kernel
|
||||
# commit 36090def7bad ("mm: move tlb_flush_pending inline helpers
|
||||
# to mm_inline.h") in v5.17
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
int fault_flag_remote = FAULT_FLAG_REMOTE;
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MM_HAS_FAULT_FLAG_REMOTE" "" "types"
|
||||
;;
|
||||
|
||||
drm_framebuffer_obj_present)
|
||||
#
|
||||
# Determine if the drm_framebuffer struct has an obj member.
|
||||
#
|
||||
# Added by commit 4c3dbb2c312c ("drm: Add GEM backed framebuffer
|
||||
# library") in v4.14.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_FRAMEBUFFER_H_PRESENT)
|
||||
#include <drm/drm_framebuffer.h>
|
||||
#endif
|
||||
|
||||
int conftest_drm_framebuffer_obj_present(void) {
|
||||
return offsetof(struct drm_framebuffer, obj);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_FRAMEBUFFER_OBJ_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
drm_color_ctm_3x4_present)
|
||||
# Determine if struct drm_color_ctm_3x4 is present.
|
||||
#
|
||||
# struct drm_color_ctm_3x4 was added by commit 6872a189be50
|
||||
# ("drm/amd/display: Add 3x4 CTM support for plane CTM") in v6.8.
|
||||
CODE="
|
||||
#include <uapi/drm/drm_mode.h>
|
||||
struct drm_color_ctm_3x4 ctm;"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_COLOR_CTM_3X4_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
drm_color_lut)
|
||||
# Determine if struct drm_color_lut is present.
|
||||
#
|
||||
# struct drm_color_lut was added by commit 5488dc16fde7
|
||||
# ("drm: introduce pipe color correction properties") in v4.6.
|
||||
CODE="
|
||||
#include <uapi/drm/drm_mode.h>
|
||||
struct drm_color_lut lut;"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_COLOR_LUT_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
drm_property_blob_put)
|
||||
#
|
||||
# Determine if function drm_property_blob_put() is present.
|
||||
#
|
||||
# Added by commit 6472e5090be7 ("drm: Introduce
|
||||
# drm_property_blob_{get,put}()") v4.12, when it replaced
|
||||
# drm_property_unreference_blob().
|
||||
#
|
||||
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_PROPERTY_H_PRESENT)
|
||||
#include <drm/drm_property.h>
|
||||
#endif
|
||||
void conftest_drm_property_blob_put(void) {
|
||||
drm_property_blob_put();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_PROPERTY_BLOB_PUT_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
# When adding a new conftest entry, please use the correct format for
|
||||
# specifying the relevant upstream Linux kernel commit. Please
|
||||
# avoid specifying -rc kernels, and only use SHAs that actually exist
|
||||
|
||||
@@ -28,6 +28,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_device.h \
|
||||
drm/drm_mode_config.h \
|
||||
drm/drm_modeset_lock.h \
|
||||
drm/drm_property.h \
|
||||
dt-bindings/interconnect/tegra_icc_id.h \
|
||||
generated/autoconf.h \
|
||||
generated/compile.h \
|
||||
@@ -52,6 +53,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/dma-resv.h \
|
||||
soc/tegra/chip-id.h \
|
||||
soc/tegra/fuse.h \
|
||||
soc/tegra/fuse-helper.h \
|
||||
soc/tegra/tegra_bpmp.h \
|
||||
video/nv_internal.h \
|
||||
linux/platform/tegra/dce/dce-client-ipc.h \
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -176,7 +176,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
{
|
||||
|
||||
unsigned i, j;
|
||||
const static unsigned attempts = 3;
|
||||
static const unsigned attempts = 3;
|
||||
struct task_struct *thread[3];
|
||||
|
||||
for (i = 0;; i++) {
|
||||
|
||||
@@ -42,12 +42,6 @@
|
||||
#include <drm/drm_atomic.h>
|
||||
#include <drm/drm_atomic_helper.h>
|
||||
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
#include <linux/nvhost.h>
|
||||
#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)
|
||||
#include <linux/host1x-next.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_COLOR_MGMT_H_PRESENT)
|
||||
#include <drm/drm_color_mgmt.h>
|
||||
#endif
|
||||
@@ -176,12 +170,10 @@ cursor_plane_req_config_update(struct drm_plane *plane,
|
||||
return;
|
||||
}
|
||||
|
||||
*req_config = (struct NvKmsKapiCursorRequestedConfig) {
|
||||
.surface = to_nv_framebuffer(plane_state->fb)->pSurface,
|
||||
|
||||
.dstX = plane_state->crtc_x,
|
||||
.dstY = plane_state->crtc_y,
|
||||
};
|
||||
memset(req_config, 0, sizeof(*req_config));
|
||||
req_config->surface = to_nv_framebuffer(plane_state->fb)->pSurface;
|
||||
req_config->dstX = plane_state->crtc_x;
|
||||
req_config->dstY = plane_state->crtc_y;
|
||||
|
||||
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE)
|
||||
if (plane->blend_mode_property != NULL && plane->alpha_property != NULL) {
|
||||
@@ -266,7 +258,6 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
{
|
||||
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
|
||||
struct NvKmsKapiLayerConfig old_config = req_config->config;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(plane_state);
|
||||
|
||||
@@ -275,24 +266,22 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
return 0;
|
||||
}
|
||||
|
||||
*req_config = (struct NvKmsKapiLayerRequestedConfig) {
|
||||
.config = {
|
||||
.surface = to_nv_framebuffer(plane_state->fb)->pSurface,
|
||||
memset(req_config, 0, sizeof(*req_config));
|
||||
|
||||
/* Source values are 16.16 fixed point */
|
||||
.srcX = plane_state->src_x >> 16,
|
||||
.srcY = plane_state->src_y >> 16,
|
||||
.srcWidth = plane_state->src_w >> 16,
|
||||
.srcHeight = plane_state->src_h >> 16,
|
||||
req_config->config.surface = to_nv_framebuffer(plane_state->fb)->pSurface;
|
||||
|
||||
.dstX = plane_state->crtc_x,
|
||||
.dstY = plane_state->crtc_y,
|
||||
.dstWidth = plane_state->crtc_w,
|
||||
.dstHeight = plane_state->crtc_h,
|
||||
/* Source values are 16.16 fixed point */
|
||||
req_config->config.srcX = plane_state->src_x >> 16;
|
||||
req_config->config.srcY = plane_state->src_y >> 16;
|
||||
req_config->config.srcWidth = plane_state->src_w >> 16;
|
||||
req_config->config.srcHeight = plane_state->src_h >> 16;
|
||||
|
||||
.csc = old_config.csc
|
||||
},
|
||||
};
|
||||
req_config->config.dstX = plane_state->crtc_x;
|
||||
req_config->config.dstY = plane_state->crtc_y;
|
||||
req_config->config.dstWidth = plane_state->crtc_w;
|
||||
req_config->config.dstHeight = plane_state->crtc_h;
|
||||
|
||||
req_config->config.csc = old_config.csc;
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/*
|
||||
@@ -396,49 +385,16 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
req_config->config.inputColorSpace =
|
||||
nv_drm_plane_state->input_colorspace;
|
||||
|
||||
req_config->config.syncptParams.preSyncptSpecified = false;
|
||||
req_config->config.syncptParams.postSyncptRequested = false;
|
||||
req_config->config.syncParams.preSyncptSpecified = false;
|
||||
req_config->config.syncParams.postSyncptRequested = false;
|
||||
req_config->config.syncParams.semaphoreSpecified = false;
|
||||
|
||||
if (plane_state->fence != NULL || nv_drm_plane_state->fd_user_ptr) {
|
||||
if (!nv_dev->supportsSyncpts) {
|
||||
if (nv_drm_plane_state->fd_user_ptr) {
|
||||
if (to_nv_device(plane->dev)->supportsSyncpts) {
|
||||
req_config->config.syncParams.postSyncptRequested = true;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
#if defined(NV_NVHOST_DMA_FENCE_UNPACK_PRESENT)
|
||||
if (plane_state->fence != NULL) {
|
||||
int ret = nvhost_dma_fence_unpack(
|
||||
plane_state->fence,
|
||||
&req_config->config.syncptParams.preSyncptId,
|
||||
&req_config->config.syncptParams.preSyncptValue);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
req_config->config.syncptParams.preSyncptSpecified = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (nv_drm_plane_state->fd_user_ptr) {
|
||||
req_config->config.syncptParams.postSyncptRequested = true;
|
||||
}
|
||||
#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)
|
||||
if (plane_state->fence != NULL) {
|
||||
int ret = host1x_fence_extract(
|
||||
plane_state->fence,
|
||||
&req_config->config.syncptParams.preSyncptId,
|
||||
&req_config->config.syncptParams.preSyncptValue);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
req_config->config.syncptParams.preSyncptSpecified = true;
|
||||
}
|
||||
|
||||
if (nv_drm_plane_state->fd_user_ptr) {
|
||||
req_config->config.syncptParams.postSyncptRequested = true;
|
||||
}
|
||||
#else
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
@@ -688,9 +644,7 @@ static int nv_drm_plane_atomic_set_property(
|
||||
to_nv_drm_plane_state(state);
|
||||
|
||||
if (property == nv_dev->nv_out_fence_property) {
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
nv_drm_plane_state->fd_user_ptr = u64_to_user_ptr(val);
|
||||
#endif
|
||||
nv_drm_plane_state->fd_user_ptr = (void __user *)(uintptr_t)(val);
|
||||
return 0;
|
||||
} else if (property == nv_dev->nv_input_colorspace_property) {
|
||||
nv_drm_plane_state->input_colorspace = val;
|
||||
@@ -863,7 +817,7 @@ __nv_drm_atomic_helper_crtc_destroy_state(struct drm_crtc *crtc,
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_drm_crtc_duplicate_req_head_modeset_config(
|
||||
static inline bool nv_drm_crtc_duplicate_req_head_modeset_config(
|
||||
const struct NvKmsKapiHeadRequestedConfig *old,
|
||||
struct NvKmsKapiHeadRequestedConfig *new)
|
||||
{
|
||||
@@ -875,15 +829,41 @@ static inline void nv_drm_crtc_duplicate_req_head_modeset_config(
|
||||
* there is no change in new configuration yet with respect
|
||||
* to older one!
|
||||
*/
|
||||
*new = (struct NvKmsKapiHeadRequestedConfig) {
|
||||
.modeSetConfig = old->modeSetConfig,
|
||||
};
|
||||
memset(new, 0, sizeof(*new));
|
||||
new->modeSetConfig = old->modeSetConfig;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(old->layerRequestedConfig); i++) {
|
||||
new->layerRequestedConfig[i] = (struct NvKmsKapiLayerRequestedConfig) {
|
||||
.config = old->layerRequestedConfig[i].config,
|
||||
};
|
||||
new->layerRequestedConfig[i].config =
|
||||
old->layerRequestedConfig[i].config;
|
||||
}
|
||||
|
||||
if (old->modeSetConfig.lut.input.pRamps) {
|
||||
new->modeSetConfig.lut.input.pRamps =
|
||||
nv_drm_calloc(1, sizeof(*new->modeSetConfig.lut.input.pRamps));
|
||||
|
||||
if (!new->modeSetConfig.lut.input.pRamps) {
|
||||
return false;
|
||||
}
|
||||
*new->modeSetConfig.lut.input.pRamps =
|
||||
*old->modeSetConfig.lut.input.pRamps;
|
||||
}
|
||||
if (old->modeSetConfig.lut.output.pRamps) {
|
||||
new->modeSetConfig.lut.output.pRamps =
|
||||
nv_drm_calloc(1, sizeof(*new->modeSetConfig.lut.output.pRamps));
|
||||
|
||||
if (!new->modeSetConfig.lut.output.pRamps) {
|
||||
/*
|
||||
* new->modeSetConfig.lut.input.pRamps is either NULL or it was
|
||||
* just allocated
|
||||
*/
|
||||
nv_drm_free(new->modeSetConfig.lut.input.pRamps);
|
||||
new->modeSetConfig.lut.input.pRamps = NULL;
|
||||
return false;
|
||||
}
|
||||
*new->modeSetConfig.lut.output.pRamps =
|
||||
*old->modeSetConfig.lut.output.pRamps;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline struct nv_drm_crtc_state *nv_drm_crtc_state_alloc(void)
|
||||
@@ -955,17 +935,24 @@ nv_drm_atomic_crtc_duplicate_state(struct drm_crtc *crtc)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
__drm_atomic_helper_crtc_duplicate_state(crtc, &nv_state->base);
|
||||
|
||||
INIT_LIST_HEAD(&nv_state->nv_flip->list_entry);
|
||||
INIT_LIST_HEAD(&nv_state->nv_flip->deferred_flip_list);
|
||||
|
||||
nv_drm_crtc_duplicate_req_head_modeset_config(
|
||||
&(to_nv_crtc_state(crtc->state)->req_config),
|
||||
&nv_state->req_config);
|
||||
/*
|
||||
* nv_drm_crtc_duplicate_req_head_modeset_config potentially allocates
|
||||
* nv_state->req_config.modeSetConfig.lut.{in,out}put.pRamps, so they should
|
||||
* be freed in any following failure paths.
|
||||
*/
|
||||
if (!nv_drm_crtc_duplicate_req_head_modeset_config(
|
||||
&(to_nv_crtc_state(crtc->state)->req_config),
|
||||
&nv_state->req_config)) {
|
||||
|
||||
nv_state->ilut_ramps = NULL;
|
||||
nv_state->olut_ramps = NULL;
|
||||
nv_drm_free(nv_state->nv_flip);
|
||||
nv_drm_free(nv_state);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
__drm_atomic_helper_crtc_duplicate_state(crtc, &nv_state->base);
|
||||
|
||||
return &nv_state->base;
|
||||
}
|
||||
@@ -990,8 +977,8 @@ static void nv_drm_atomic_crtc_destroy_state(struct drm_crtc *crtc,
|
||||
|
||||
__nv_drm_atomic_helper_crtc_destroy_state(crtc, &nv_state->base);
|
||||
|
||||
nv_drm_free(nv_state->ilut_ramps);
|
||||
nv_drm_free(nv_state->olut_ramps);
|
||||
nv_drm_free(nv_state->req_config.modeSetConfig.lut.input.pRamps);
|
||||
nv_drm_free(nv_state->req_config.modeSetConfig.lut.output.pRamps);
|
||||
|
||||
nv_drm_free(nv_state);
|
||||
}
|
||||
@@ -1074,94 +1061,82 @@ static int color_mgmt_config_set_luts(struct nv_drm_crtc_state *nv_crtc_state,
|
||||
* According to the comment in the Linux kernel's
|
||||
* drivers/gpu/drm/drm_color_mgmt.c, if either property is NULL, that LUT
|
||||
* needs to be changed to a linear LUT
|
||||
*
|
||||
* On failure, any LUT ramps allocated in this function are freed when the
|
||||
* subsequent atomic state cleanup calls nv_drm_atomic_crtc_destroy_state.
|
||||
*/
|
||||
|
||||
req_config->flags.lutChanged = NV_TRUE;
|
||||
if (crtc_state->degamma_lut) {
|
||||
struct drm_color_lut *degamma_lut = NULL;
|
||||
uint64_t degamma_len = 0;
|
||||
|
||||
nv_crtc_state->ilut_ramps = nv_drm_calloc(1, sizeof(*nv_crtc_state->ilut_ramps));
|
||||
if (!nv_crtc_state->ilut_ramps) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
if (!modeset_config->lut.input.pRamps) {
|
||||
modeset_config->lut.input.pRamps =
|
||||
nv_drm_calloc(1, sizeof(*modeset_config->lut.input.pRamps));
|
||||
if (!modeset_config->lut.input.pRamps) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
degamma_lut = (struct drm_color_lut *)crtc_state->degamma_lut->data;
|
||||
degamma_len = crtc_state->degamma_lut->length /
|
||||
sizeof(struct drm_color_lut);
|
||||
|
||||
if ((ret = color_mgmt_config_copy_lut(nv_crtc_state->ilut_ramps,
|
||||
if ((ret = color_mgmt_config_copy_lut(modeset_config->lut.input.pRamps,
|
||||
degamma_lut,
|
||||
degamma_len)) != 0) {
|
||||
goto fail;
|
||||
return ret;
|
||||
}
|
||||
|
||||
modeset_config->lut.input.specified = NV_TRUE;
|
||||
modeset_config->lut.input.depth = 30; /* specify the full LUT */
|
||||
modeset_config->lut.input.start = 0;
|
||||
modeset_config->lut.input.end = degamma_len - 1;
|
||||
modeset_config->lut.input.pRamps = nv_crtc_state->ilut_ramps;
|
||||
} else {
|
||||
/* setting input.end to 0 is equivalent to disabling the LUT, which
|
||||
* should be equivalent to a linear LUT */
|
||||
modeset_config->lut.input.specified = NV_TRUE;
|
||||
modeset_config->lut.input.depth = 30; /* specify the full LUT */
|
||||
modeset_config->lut.input.start = 0;
|
||||
modeset_config->lut.input.end = 0;
|
||||
modeset_config->lut.input.pRamps = NULL;
|
||||
|
||||
nv_drm_free(modeset_config->lut.input.pRamps);
|
||||
modeset_config->lut.input.pRamps = NULL;
|
||||
}
|
||||
req_config->flags.ilutChanged = NV_TRUE;
|
||||
|
||||
if (crtc_state->gamma_lut) {
|
||||
struct drm_color_lut *gamma_lut = NULL;
|
||||
uint64_t gamma_len = 0;
|
||||
|
||||
nv_crtc_state->olut_ramps = nv_drm_calloc(1, sizeof(*nv_crtc_state->olut_ramps));
|
||||
if (!nv_crtc_state->olut_ramps) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
if (!modeset_config->lut.output.pRamps) {
|
||||
modeset_config->lut.output.pRamps =
|
||||
nv_drm_calloc(1, sizeof(*modeset_config->lut.output.pRamps));
|
||||
if (!modeset_config->lut.output.pRamps) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
gamma_lut = (struct drm_color_lut *)crtc_state->gamma_lut->data;
|
||||
gamma_len = crtc_state->gamma_lut->length /
|
||||
sizeof(struct drm_color_lut);
|
||||
|
||||
if ((ret = color_mgmt_config_copy_lut(nv_crtc_state->olut_ramps,
|
||||
if ((ret = color_mgmt_config_copy_lut(modeset_config->lut.output.pRamps,
|
||||
gamma_lut,
|
||||
gamma_len)) != 0) {
|
||||
goto fail;
|
||||
return ret;
|
||||
}
|
||||
|
||||
modeset_config->lut.output.specified = NV_TRUE;
|
||||
modeset_config->lut.output.enabled = NV_TRUE;
|
||||
modeset_config->lut.output.pRamps = nv_crtc_state->olut_ramps;
|
||||
} else {
|
||||
/* disabling the output LUT should be equivalent to setting a linear
|
||||
* LUT */
|
||||
modeset_config->lut.output.specified = NV_TRUE;
|
||||
modeset_config->lut.output.enabled = NV_FALSE;
|
||||
|
||||
nv_drm_free(modeset_config->lut.output.pRamps);
|
||||
modeset_config->lut.output.pRamps = NULL;
|
||||
}
|
||||
req_config->flags.olutChanged = NV_TRUE;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
/* free allocated state */
|
||||
nv_drm_free(nv_crtc_state->ilut_ramps);
|
||||
nv_drm_free(nv_crtc_state->olut_ramps);
|
||||
|
||||
/* remove dangling pointers */
|
||||
nv_crtc_state->ilut_ramps = NULL;
|
||||
nv_crtc_state->olut_ramps = NULL;
|
||||
modeset_config->lut.input.pRamps = NULL;
|
||||
modeset_config->lut.output.pRamps = NULL;
|
||||
|
||||
/* prevent attempts at reading NULLs */
|
||||
modeset_config->lut.input.specified = NV_FALSE;
|
||||
modeset_config->lut.output.specified = NV_FALSE;
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif /* NV_DRM_COLOR_MGMT_AVAILABLE */
|
||||
|
||||
@@ -1186,9 +1161,6 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
|
||||
struct NvKmsKapiHeadRequestedConfig *req_config =
|
||||
&nv_crtc_state->req_config;
|
||||
int ret = 0;
|
||||
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
|
||||
struct nv_drm_device *nv_dev = to_nv_device(crtc_state->crtc->dev);
|
||||
#endif
|
||||
|
||||
if (crtc_state->mode_changed) {
|
||||
drm_mode_to_nvkms_display_mode(&crtc_state->mode,
|
||||
@@ -1232,13 +1204,6 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
|
||||
if (nv_dev->drmMasterChangedSinceLastAtomicCommit &&
|
||||
(crtc_state->degamma_lut ||
|
||||
crtc_state->ctm ||
|
||||
crtc_state->gamma_lut)) {
|
||||
|
||||
crtc_state->color_mgmt_changed = NV_TRUE;
|
||||
}
|
||||
if (crtc_state->color_mgmt_changed) {
|
||||
if ((ret = color_mgmt_config_set_luts(nv_crtc_state, req_config)) != 0) {
|
||||
return ret;
|
||||
@@ -1264,7 +1229,7 @@ static const struct drm_crtc_helper_funcs nv_crtc_helper_funcs = {
|
||||
|
||||
static void nv_drm_plane_install_properties(
|
||||
struct drm_plane *plane,
|
||||
NvBool supportsHDR)
|
||||
NvBool supportsICtCp)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
|
||||
@@ -1280,7 +1245,7 @@ static void nv_drm_plane_install_properties(
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
if (supportsHDR && nv_dev->nv_hdr_output_metadata_property) {
|
||||
if (supportsICtCp && nv_dev->nv_hdr_output_metadata_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_hdr_output_metadata_property, 0);
|
||||
}
|
||||
@@ -1466,7 +1431,7 @@ nv_drm_plane_create(struct drm_device *dev,
|
||||
if (plane_type != DRM_PLANE_TYPE_CURSOR) {
|
||||
nv_drm_plane_install_properties(
|
||||
plane,
|
||||
pResInfo->supportsHDR[layer_idx]);
|
||||
pResInfo->supportsICtCp[layer_idx]);
|
||||
}
|
||||
|
||||
__nv_drm_plane_create_alpha_blending_properties(
|
||||
@@ -1689,7 +1654,7 @@ int nv_drm_get_crtc_crc32_v2_ioctl(struct drm_device *dev,
|
||||
struct NvKmsKapiCrcs crc32;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
return -ENOENT;
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
|
||||
@@ -1717,7 +1682,7 @@ int nv_drm_get_crtc_crc32_ioctl(struct drm_device *dev,
|
||||
struct NvKmsKapiCrcs crc32;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
return -ENOENT;
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
|
||||
|
||||
@@ -129,9 +129,6 @@ struct nv_drm_crtc_state {
|
||||
*/
|
||||
struct NvKmsKapiHeadRequestedConfig req_config;
|
||||
|
||||
struct NvKmsLutRamps *ilut_ramps;
|
||||
struct NvKmsLutRamps *olut_ramps;
|
||||
|
||||
/**
|
||||
* @nv_flip:
|
||||
*
|
||||
|
||||
@@ -373,19 +373,15 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
len++;
|
||||
}
|
||||
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
if (!nv_dev->supportsSyncpts) {
|
||||
return 0;
|
||||
if (nv_dev->supportsSyncpts) {
|
||||
nv_dev->nv_out_fence_property =
|
||||
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_ATOMIC,
|
||||
"NV_DRM_OUT_FENCE_PTR", 0, U64_MAX);
|
||||
if (nv_dev->nv_out_fence_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
nv_dev->nv_out_fence_property =
|
||||
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_ATOMIC,
|
||||
"NV_DRM_OUT_FENCE_PTR", 0, U64_MAX);
|
||||
if (nv_dev->nv_out_fence_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
nv_dev->nv_input_colorspace_property =
|
||||
drm_property_create_enum(nv_dev->dev, 0, "NV_INPUT_COLORSPACE",
|
||||
enum_list, len);
|
||||
@@ -434,7 +430,7 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
|
||||
|
||||
struct NvKmsKapiAllocateDeviceParams allocateDeviceParams;
|
||||
struct NvKmsKapiDeviceResourcesInfo resInfo;
|
||||
#endif
|
||||
#endif /* defined(NV_DRM_ATOMIC_MODESET_AVAILABLE) */
|
||||
#if defined(NV_DRM_FORMAT_MODIFIERS_PRESENT)
|
||||
NvU64 kind;
|
||||
NvU64 gen;
|
||||
@@ -480,6 +476,22 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
/*
|
||||
* If fbdev is enabled, take modeset ownership now before other DRM clients
|
||||
* can take master (and thus NVKMS ownership).
|
||||
*/
|
||||
if (nv_drm_fbdev_module_param) {
|
||||
if (!nvKms->grabOwnership(pDevice)) {
|
||||
nvKms->freeDevice(pDevice);
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to grab NVKMS modeset ownership");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
nv_dev->hasFramebufferConsole = NV_TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
mutex_lock(&nv_dev->lock);
|
||||
|
||||
/* Set NvKmsKapiDevice */
|
||||
@@ -505,6 +517,12 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
|
||||
nv_dev->semsurf_max_submitted_offset =
|
||||
resInfo.caps.semsurf.maxSubmittedOffset;
|
||||
|
||||
nv_dev->display_semaphores.count =
|
||||
resInfo.caps.numDisplaySemaphores;
|
||||
nv_dev->display_semaphores.next_index = 0;
|
||||
|
||||
nv_dev->requiresVrrSemaphores = resInfo.caps.requiresVrrSemaphores;
|
||||
|
||||
#if defined(NV_DRM_FORMAT_MODIFIERS_PRESENT)
|
||||
gen = nv_dev->pageKindGeneration;
|
||||
kind = nv_dev->genericPageKind;
|
||||
@@ -590,6 +608,15 @@ static void __nv_drm_unload(struct drm_device *dev)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Release modeset ownership if fbdev is enabled */
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
if (nv_dev->hasFramebufferConsole) {
|
||||
drm_atomic_helper_shutdown(dev);
|
||||
nvKms->releaseOwnership(nv_dev->pDevice);
|
||||
}
|
||||
#endif
|
||||
|
||||
cancel_delayed_work_sync(&nv_dev->hotplug_event_work);
|
||||
mutex_lock(&nv_dev->lock);
|
||||
|
||||
@@ -652,7 +679,6 @@ static int __nv_drm_master_set(struct drm_device *dev,
|
||||
!nvKms->grabOwnership(nv_dev->pDevice)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
nv_dev->drmMasterChangedSinceLastAtomicCommit = NV_TRUE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -781,6 +807,14 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_drm_get_drm_file_unique_id_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_get_drm_file_unique_id_params *params = data;
|
||||
params->id = (u64)(filep->driver_priv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_drm_dmabuf_supported_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
@@ -834,13 +868,18 @@ static int nv_drm_get_dpy_id_for_connector_id_ioctl(struct drm_device *dev,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_get_dpy_id_for_connector_id_params *params = data;
|
||||
struct drm_connector *connector;
|
||||
struct nv_drm_connector *nv_connector;
|
||||
int ret = 0;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
// Importantly, drm_connector_lookup (with filep) will only return the
|
||||
// connector if we are master, a lessee with the connector, or not master at
|
||||
// all. It will return NULL if we are a lessee with other connectors.
|
||||
struct drm_connector *connector =
|
||||
nv_drm_connector_lookup(dev, filep, params->connectorId);
|
||||
struct nv_drm_connector *nv_connector;
|
||||
int ret = 0;
|
||||
connector = nv_drm_connector_lookup(dev, filep, params->connectorId);
|
||||
|
||||
if (!connector) {
|
||||
return -EINVAL;
|
||||
@@ -873,6 +912,11 @@ static int nv_drm_get_connector_id_for_dpy_id_ioctl(struct drm_device *dev,
|
||||
int ret = -EINVAL;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
#endif
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
|
||||
@@ -1085,6 +1129,10 @@ static int nv_drm_grant_permission_ioctl(struct drm_device *dev, void *data,
|
||||
{
|
||||
struct drm_nvidia_grant_permissions_params *params = data;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (params->type == NV_DRM_PERMISSIONS_TYPE_MODESET) {
|
||||
return nv_drm_grant_modeset_permission(dev, params, filep);
|
||||
} else if (params->type == NV_DRM_PERMISSIONS_TYPE_SUB_OWNER) {
|
||||
@@ -1250,6 +1298,10 @@ static int nv_drm_revoke_permission_ioctl(struct drm_device *dev, void *data,
|
||||
{
|
||||
struct drm_nvidia_revoke_permissions_params *params = data;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (params->type == NV_DRM_PERMISSIONS_TYPE_MODESET) {
|
||||
if (!params->dpyId) {
|
||||
return -EINVAL;
|
||||
@@ -1279,6 +1331,17 @@ static void nv_drm_postclose(struct drm_device *dev, struct drm_file *filep)
|
||||
}
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
|
||||
static int nv_drm_open(struct drm_device *dev, struct drm_file *filep)
|
||||
{
|
||||
_Static_assert(sizeof(filep->driver_priv) >= sizeof(u64),
|
||||
"filep->driver_priv can not hold an u64");
|
||||
static atomic64_t id = ATOMIC_INIT(0);
|
||||
|
||||
filep->driver_priv = (void *)atomic64_inc_return(&id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_MASTER_HAS_LEASES)
|
||||
static struct drm_master *nv_drm_find_lessee(struct drm_master *master,
|
||||
int lessee_id)
|
||||
@@ -1522,6 +1585,9 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_DEV_INFO,
|
||||
nv_drm_get_dev_info_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_DRM_FILE_UNIQUE_ID,
|
||||
nv_drm_get_drm_file_unique_id_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_SUPPORTED,
|
||||
@@ -1604,6 +1670,9 @@ static struct drm_driver nv_drm_driver = {
|
||||
.driver_features =
|
||||
#if defined(NV_DRM_DRIVER_PRIME_FLAG_PRESENT)
|
||||
DRIVER_PRIME |
|
||||
#endif
|
||||
#if defined(NV_DRM_SYNCOBJ_FEATURES_PRESENT)
|
||||
DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE |
|
||||
#endif
|
||||
DRIVER_GEM | DRIVER_RENDER,
|
||||
|
||||
@@ -1615,14 +1684,14 @@ static struct drm_driver nv_drm_driver = {
|
||||
.num_ioctls = ARRAY_SIZE(nv_drm_ioctls),
|
||||
|
||||
/*
|
||||
* linux-next commit 71a7974ac701 ("drm/prime: Unexport helpers for fd/handle
|
||||
* conversion") unexports drm_gem_prime_handle_to_fd() and
|
||||
* Linux kernel v6.6 commit 71a7974ac701 ("drm/prime: Unexport helpers
|
||||
* for fd/handle conversion") unexports drm_gem_prime_handle_to_fd() and
|
||||
* drm_gem_prime_fd_to_handle().
|
||||
*
|
||||
* Prior linux-next commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for
|
||||
* all drivers") made these helpers the default when .prime_handle_to_fd /
|
||||
* .prime_fd_to_handle are unspecified, so it's fine to just skip specifying
|
||||
* them if the helpers aren't present.
|
||||
* Prior Linux kernel v6.6 commit 6b85aa68d9d5 ("drm: Enable PRIME
|
||||
* import/export for all drivers") made these helpers the default when
|
||||
* .prime_handle_to_fd / .prime_fd_to_handle are unspecified, so it's fine
|
||||
* to just skip specifying them if the helpers aren't present.
|
||||
*/
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_handle_to_fd
|
||||
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
|
||||
@@ -1656,6 +1725,7 @@ static struct drm_driver nv_drm_driver = {
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
.postclose = nv_drm_postclose,
|
||||
#endif
|
||||
.open = nv_drm_open,
|
||||
|
||||
.fops = &nv_drm_fops,
|
||||
|
||||
@@ -1714,6 +1784,7 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
struct nv_drm_device *nv_dev = NULL;
|
||||
struct drm_device *dev = NULL;
|
||||
struct device *device = gpu_info->os_device_ptr;
|
||||
bool bus_is_pci;
|
||||
|
||||
DRM_DEBUG(
|
||||
"Registering device for NVIDIA GPU ID 0x08%x",
|
||||
@@ -1747,7 +1818,7 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
dev->dev_private = nv_dev;
|
||||
nv_dev->dev = dev;
|
||||
|
||||
bool bus_is_pci =
|
||||
bus_is_pci =
|
||||
#if defined(NV_LINUX)
|
||||
device->bus == &pci_bus_type;
|
||||
#elif defined(NV_BSD)
|
||||
@@ -1771,11 +1842,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
if (nv_drm_fbdev_module_param &&
|
||||
drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
|
||||
if (!nvKms->grabOwnership(nv_dev->pDevice)) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to grab NVKMS modeset ownership");
|
||||
goto failed_grab_ownership;
|
||||
}
|
||||
|
||||
if (bus_is_pci) {
|
||||
struct pci_dev *pdev = to_pci_dev(device);
|
||||
|
||||
@@ -1786,8 +1852,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
#endif
|
||||
}
|
||||
drm_fbdev_generic_setup(dev, 32);
|
||||
|
||||
nv_dev->hasFramebufferConsole = NV_TRUE;
|
||||
}
|
||||
#endif /* defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) */
|
||||
|
||||
@@ -1798,12 +1862,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
|
||||
return; /* Success */
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
failed_grab_ownership:
|
||||
|
||||
drm_dev_unregister(dev);
|
||||
#endif
|
||||
|
||||
failed_drm_register:
|
||||
|
||||
nv_drm_dev_free(dev);
|
||||
@@ -1870,12 +1928,6 @@ void nv_drm_remove_devices(void)
|
||||
struct nv_drm_device *next = dev_list->next;
|
||||
struct drm_device *dev = dev_list->dev;
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
if (dev_list->hasFramebufferConsole) {
|
||||
drm_atomic_helper_shutdown(dev);
|
||||
nvKms->releaseOwnership(dev_list->pDevice);
|
||||
}
|
||||
#endif
|
||||
drm_dev_unregister(dev);
|
||||
nv_drm_dev_free(dev);
|
||||
|
||||
@@ -1903,8 +1955,33 @@ void nv_drm_remove_devices(void)
|
||||
*/
|
||||
void nv_drm_suspend_resume(NvBool suspend)
|
||||
{
|
||||
static DEFINE_MUTEX(nv_drm_suspend_mutex);
|
||||
static NvU32 nv_drm_suspend_count = 0;
|
||||
struct nv_drm_device *nv_dev;
|
||||
|
||||
mutex_lock(&nv_drm_suspend_mutex);
|
||||
|
||||
/*
|
||||
* Count the number of times the driver is asked to suspend. Suspend all DRM
|
||||
* devices on the first suspend call and resume them on the last resume
|
||||
* call. This is necessary because the kernel may call nvkms_suspend()
|
||||
* simultaneously for each GPU, but NVKMS itself also suspends all GPUs on
|
||||
* the first call.
|
||||
*/
|
||||
if (suspend) {
|
||||
if (nv_drm_suspend_count++ > 0) {
|
||||
goto done;
|
||||
}
|
||||
} else {
|
||||
BUG_ON(nv_drm_suspend_count == 0);
|
||||
|
||||
if (--nv_drm_suspend_count > 0) {
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
struct nv_drm_device *nv_dev = dev_list;
|
||||
nv_dev = dev_list;
|
||||
|
||||
/*
|
||||
* NVKMS shuts down all heads on suspend. Update DRM state accordingly.
|
||||
@@ -1930,6 +2007,9 @@ void nv_drm_suspend_resume(NvBool suspend)
|
||||
}
|
||||
}
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
|
||||
done:
|
||||
mutex_unlock(&nv_drm_suspend_mutex);
|
||||
}
|
||||
|
||||
#endif /* NV_DRM_AVAILABLE */
|
||||
|
||||
@@ -293,14 +293,12 @@ __nv_drm_prime_fence_context_new(
|
||||
* to check a return value.
|
||||
*/
|
||||
|
||||
*nv_prime_fence_context = (struct nv_drm_prime_fence_context) {
|
||||
.base.ops = &nv_drm_prime_fence_context_ops,
|
||||
.base.nv_dev = nv_dev,
|
||||
.base.context = nv_dma_fence_context_alloc(1),
|
||||
.base.fenceSemIndex = p->index,
|
||||
.pSemSurface = pSemSurface,
|
||||
.pLinearAddress = pLinearAddress,
|
||||
};
|
||||
nv_prime_fence_context->base.ops = &nv_drm_prime_fence_context_ops;
|
||||
nv_prime_fence_context->base.nv_dev = nv_dev;
|
||||
nv_prime_fence_context->base.context = nv_dma_fence_context_alloc(1);
|
||||
nv_prime_fence_context->base.fenceSemIndex = p->index;
|
||||
nv_prime_fence_context->pSemSurface = pSemSurface;
|
||||
nv_prime_fence_context->pLinearAddress = pLinearAddress;
|
||||
|
||||
INIT_LIST_HEAD(&nv_prime_fence_context->pending);
|
||||
|
||||
@@ -465,10 +463,15 @@ int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct drm_nvidia_prime_fence_context_create_params *p = data;
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context =
|
||||
__nv_drm_prime_fence_context_new(nv_dev, p);
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context;
|
||||
int err;
|
||||
|
||||
if (nv_dev->pDevice == NULL) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
nv_prime_fence_context = __nv_drm_prime_fence_context_new(nv_dev, p);
|
||||
|
||||
if (!nv_prime_fence_context) {
|
||||
goto done;
|
||||
}
|
||||
@@ -523,6 +526,11 @@ int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
|
||||
struct nv_drm_fence_context *nv_fence_context;
|
||||
nv_dma_fence_t *fence;
|
||||
|
||||
if (nv_dev->pDevice == NULL) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (p->__pad != 0) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Padding fields must be zeroed");
|
||||
goto done;
|
||||
@@ -1261,18 +1269,16 @@ __nv_drm_semsurf_fence_ctx_new(
|
||||
* to check a return value.
|
||||
*/
|
||||
|
||||
*ctx = (struct nv_drm_semsurf_fence_ctx) {
|
||||
.base.ops = &nv_drm_semsurf_fence_ctx_ops,
|
||||
.base.nv_dev = nv_dev,
|
||||
.base.context = nv_dma_fence_context_alloc(1),
|
||||
.base.fenceSemIndex = p->index,
|
||||
.pSemSurface = pSemSurface,
|
||||
.pSemMapping.pVoid = semMapping,
|
||||
.pMaxSubmittedMapping = (volatile NvU64 *)maxSubmittedMapping,
|
||||
.callback.local = NULL,
|
||||
.callback.nvKms = NULL,
|
||||
.current_wait_value = 0,
|
||||
};
|
||||
ctx->base.ops = &nv_drm_semsurf_fence_ctx_ops;
|
||||
ctx->base.nv_dev = nv_dev;
|
||||
ctx->base.context = nv_dma_fence_context_alloc(1);
|
||||
ctx->base.fenceSemIndex = p->index;
|
||||
ctx->pSemSurface = pSemSurface;
|
||||
ctx->pSemMapping.pVoid = semMapping;
|
||||
ctx->pMaxSubmittedMapping = (volatile NvU64 *)maxSubmittedMapping;
|
||||
ctx->callback.local = NULL;
|
||||
ctx->callback.nvKms = NULL;
|
||||
ctx->current_wait_value = 0;
|
||||
|
||||
spin_lock_init(&ctx->lock);
|
||||
INIT_LIST_HEAD(&ctx->pending_fences);
|
||||
@@ -1312,6 +1318,10 @@ int nv_drm_semsurf_fence_ctx_create_ioctl(struct drm_device *dev,
|
||||
struct nv_drm_semsurf_fence_ctx *ctx;
|
||||
int err;
|
||||
|
||||
if (nv_dev->pDevice == NULL) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (p->__pad != 0) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Padding fields must be zeroed");
|
||||
return -EINVAL;
|
||||
@@ -1473,6 +1483,11 @@ int nv_drm_semsurf_fence_create_ioctl(struct drm_device *dev,
|
||||
int ret = -EINVAL;
|
||||
int fd;
|
||||
|
||||
if (nv_dev->pDevice == NULL) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (p->__pad != 0) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Padding fields must be zeroed");
|
||||
goto done;
|
||||
@@ -1635,6 +1650,10 @@ int nv_drm_semsurf_fence_wait_ioctl(struct drm_device *dev,
|
||||
unsigned long flags;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (nv_dev->pDevice == NULL) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (p->pre_wait_value >= p->post_wait_value) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
@@ -1743,6 +1762,11 @@ int nv_drm_semsurf_fence_attach_ioctl(struct drm_device *dev,
|
||||
nv_dma_fence_t *fence;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (nv_dev->pDevice == NULL) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto done;
|
||||
}
|
||||
|
||||
nv_gem = nv_drm_gem_object_lookup(nv_dev->dev, filep, p->handle);
|
||||
|
||||
if (!nv_gem) {
|
||||
|
||||
@@ -380,7 +380,7 @@ int nv_drm_gem_import_nvkms_memory_ioctl(struct drm_device *dev,
|
||||
int ret;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
ret = -EINVAL;
|
||||
ret = -EOPNOTSUPP;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
@@ -430,7 +430,7 @@ int nv_drm_gem_export_nvkms_memory_ioctl(struct drm_device *dev,
|
||||
int ret = 0;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
ret = -EINVAL;
|
||||
ret = -EOPNOTSUPP;
|
||||
goto done;
|
||||
}
|
||||
|
||||
@@ -483,7 +483,7 @@ int nv_drm_gem_alloc_nvkms_memory_ioctl(struct drm_device *dev,
|
||||
int ret = 0;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
ret = -EINVAL;
|
||||
ret = -EOPNOTSUPP;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
@@ -551,14 +551,12 @@ static struct drm_gem_object *__nv_drm_gem_nvkms_prime_dup(
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
const struct nv_drm_device *nv_dev_src;
|
||||
const struct nv_drm_gem_nvkms_memory *nv_nvkms_memory_src;
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory;
|
||||
struct NvKmsKapiMemory *pMemory;
|
||||
|
||||
BUG_ON(nv_gem_src == NULL || nv_gem_src->ops != &nv_gem_nvkms_memory_ops);
|
||||
|
||||
nv_dev_src = to_nv_device(nv_gem_src->base.dev);
|
||||
nv_nvkms_memory_src = to_nv_nvkms_memory_const(nv_gem_src);
|
||||
|
||||
if ((nv_nvkms_memory =
|
||||
nv_drm_calloc(1, sizeof(*nv_nvkms_memory))) == NULL) {
|
||||
|
||||
@@ -319,7 +319,7 @@ int nv_drm_gem_identify_object_ioctl(struct drm_device *dev,
|
||||
struct nv_drm_gem_object *nv_gem = NULL;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
return -EINVAL;
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
nv_dma_buf = nv_drm_gem_object_dma_buf_lookup(dev, filep, p->handle);
|
||||
|
||||
@@ -45,8 +45,7 @@
|
||||
|
||||
/*
|
||||
* The inclusion of drm_framebuffer.h was removed from drm_crtc.h by commit
|
||||
* 720cf96d8fecde29b72e1101f8a567a0ce99594f ("drm: Drop drm_framebuffer.h from
|
||||
* drm_crtc.h") in linux-next, expected in v5.19-rc7.
|
||||
* 720cf96d8fec ("drm: Drop drm_framebuffer.h from drm_crtc.h") in v6.0.
|
||||
*
|
||||
* We only need drm_framebuffer.h for drm_framebuffer_put(), and it is always
|
||||
* present (v4.9+) when drm_framebuffer_{put,get}() is present (v4.12+), so it
|
||||
|
||||
@@ -613,8 +613,8 @@ static inline int nv_drm_format_num_planes(uint32_t format)
|
||||
#endif /* defined(NV_DRM_FORMAT_MODIFIERS_PRESENT) */
|
||||
|
||||
/*
|
||||
* DRM_UNLOCKED was removed with linux-next commit 2798ffcc1d6a ("drm: Remove
|
||||
* locking for legacy ioctls and DRM_UNLOCKED"), but it was previously made
|
||||
* DRM_UNLOCKED was removed with commit 2798ffcc1d6a ("drm: Remove locking for
|
||||
* legacy ioctls and DRM_UNLOCKED") in v6.8, but it was previously made
|
||||
* implicit for all non-legacy DRM driver IOCTLs since Linux v4.10 commit
|
||||
* fa5386459f06 "drm: Used DRM_LEGACY for all legacy functions" (Linux v4.4
|
||||
* commit ea487835e887 "drm: Enforce unlocked ioctl operation for kms driver
|
||||
|
||||
@@ -52,6 +52,7 @@
|
||||
#define DRM_NVIDIA_SEMSURF_FENCE_CREATE 0x15
|
||||
#define DRM_NVIDIA_SEMSURF_FENCE_WAIT 0x16
|
||||
#define DRM_NVIDIA_SEMSURF_FENCE_ATTACH 0x17
|
||||
#define DRM_NVIDIA_GET_DRM_FILE_UNIQUE_ID 0x18
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_IMPORT_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IMPORT_NVKMS_MEMORY), \
|
||||
@@ -157,6 +158,11 @@
|
||||
DRM_NVIDIA_SEMSURF_FENCE_ATTACH), \
|
||||
struct drm_nvidia_semsurf_fence_attach_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_DRM_FILE_UNIQUE_ID \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + \
|
||||
DRM_NVIDIA_GET_DRM_FILE_UNIQUE_ID), \
|
||||
struct drm_nvidia_get_drm_file_unique_id_params)
|
||||
|
||||
struct drm_nvidia_gem_import_nvkms_memory_params {
|
||||
uint64_t mem_size; /* IN */
|
||||
|
||||
@@ -385,4 +391,8 @@ struct drm_nvidia_semsurf_fence_attach_params {
|
||||
uint64_t wait_value; /* IN Semaphore value to reach before signal */
|
||||
};
|
||||
|
||||
struct drm_nvidia_get_drm_file_unique_id_params {
|
||||
uint64_t id; /* OUT Unique ID of the DRM file */
|
||||
};
|
||||
|
||||
#endif /* _UAPI_NVIDIA_DRM_IOCTL_H_ */
|
||||
|
||||
@@ -42,6 +42,16 @@
|
||||
#include <drm/drm_atomic_helper.h>
|
||||
#include <drm/drm_crtc.h>
|
||||
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
#include <linux/nvhost.h>
|
||||
#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)
|
||||
#include <linux/host1x-next.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
#include "nvidia-dma-fence-helper.h"
|
||||
#endif
|
||||
|
||||
struct nv_drm_atomic_state {
|
||||
struct NvKmsKapiRequestedModeSetConfig config;
|
||||
struct drm_atomic_state base;
|
||||
@@ -146,6 +156,165 @@ static int __nv_drm_put_back_post_fence_fd(
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
struct nv_drm_plane_fence_cb_data {
|
||||
nv_dma_fence_cb_t dma_fence_cb;
|
||||
struct nv_drm_device *nv_dev;
|
||||
NvU32 semaphore_index;
|
||||
};
|
||||
|
||||
static void
|
||||
__nv_drm_plane_fence_cb(
|
||||
nv_dma_fence_t *fence,
|
||||
nv_dma_fence_cb_t *cb_data
|
||||
)
|
||||
{
|
||||
struct nv_drm_plane_fence_cb_data *fence_data =
|
||||
container_of(cb_data, typeof(*fence_data), dma_fence_cb);
|
||||
struct nv_drm_device *nv_dev = fence_data->nv_dev;
|
||||
|
||||
nv_dma_fence_put(fence);
|
||||
nvKms->signalDisplaySemaphore(nv_dev->pDevice, fence_data->semaphore_index);
|
||||
nv_drm_free(fence_data);
|
||||
}
|
||||
|
||||
static int __nv_drm_convert_in_fences(
|
||||
struct nv_drm_device *nv_dev,
|
||||
struct drm_atomic_state *state,
|
||||
struct drm_crtc *crtc,
|
||||
struct drm_crtc_state *crtc_state)
|
||||
{
|
||||
struct drm_plane *plane = NULL;
|
||||
struct drm_plane_state *plane_state = NULL;
|
||||
struct nv_drm_plane *nv_plane = NULL;
|
||||
struct NvKmsKapiLayerRequestedConfig *plane_req_config = NULL;
|
||||
struct NvKmsKapiHeadRequestedConfig *head_req_config =
|
||||
&to_nv_crtc_state(crtc_state)->req_config;
|
||||
struct nv_drm_plane_fence_cb_data *fence_data;
|
||||
uint32_t semaphore_index;
|
||||
uint32_t idx_count;
|
||||
int ret, i;
|
||||
|
||||
if (!crtc_state->active) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
nv_drm_for_each_new_plane_in_state(state, plane, plane_state, i) {
|
||||
if ((plane->type == DRM_PLANE_TYPE_CURSOR) ||
|
||||
(plane_state->crtc != crtc) ||
|
||||
(plane_state->fence == NULL)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
nv_plane = to_nv_plane(plane);
|
||||
plane_req_config =
|
||||
&head_req_config->layerRequestedConfig[nv_plane->layer_idx];
|
||||
|
||||
if (nv_dev->supportsSyncpts) {
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
#if defined(NV_NVHOST_DMA_FENCE_UNPACK_PRESENT)
|
||||
int ret =
|
||||
nvhost_dma_fence_unpack(
|
||||
plane_state->fence,
|
||||
&plane_req_config->config.syncParams.u.syncpt.preSyncptId,
|
||||
&plane_req_config->config.syncParams.u.syncpt.preSyncptValue);
|
||||
if (ret == 0) {
|
||||
plane_req_config->config.syncParams.preSyncptSpecified = true;
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)
|
||||
int ret =
|
||||
host1x_fence_extract(
|
||||
plane_state->fence,
|
||||
&plane_req_config->config.syncParams.u.syncpt.preSyncptId,
|
||||
&plane_req_config->config.syncParams.u.syncpt.preSyncptValue);
|
||||
if (ret == 0) {
|
||||
plane_req_config->config.syncParams.preSyncptSpecified = true;
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Syncpt extraction failed, or syncpts are not supported.
|
||||
* Use general DRM fence support with semaphores instead.
|
||||
*/
|
||||
if (plane_req_config->config.syncParams.postSyncptRequested) {
|
||||
// Can't mix Syncpts and semaphores in a given request.
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (idx_count = 0; idx_count < nv_dev->display_semaphores.count; idx_count++) {
|
||||
semaphore_index = nv_drm_next_display_semaphore(nv_dev);
|
||||
if (nvKms->tryInitDisplaySemaphore(nv_dev->pDevice, semaphore_index)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (idx_count == nv_dev->display_semaphores.count) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Failed to initialize semaphore for plane fence");
|
||||
/*
|
||||
* This should only happen if the semaphore pool was somehow
|
||||
* exhausted. Waiting a bit and retrying may help in that case.
|
||||
*/
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
plane_req_config->config.syncParams.semaphoreSpecified = true;
|
||||
plane_req_config->config.syncParams.u.semaphore.index = semaphore_index;
|
||||
|
||||
fence_data = nv_drm_calloc(1, sizeof(*fence_data));
|
||||
|
||||
if (!fence_data) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Failed to allocate callback data for plane fence");
|
||||
nvKms->cancelDisplaySemaphore(nv_dev->pDevice, semaphore_index);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
fence_data->nv_dev = nv_dev;
|
||||
fence_data->semaphore_index = semaphore_index;
|
||||
|
||||
ret = nv_dma_fence_add_callback(plane_state->fence,
|
||||
&fence_data->dma_fence_cb,
|
||||
__nv_drm_plane_fence_cb);
|
||||
|
||||
switch (ret) {
|
||||
case -ENOENT:
|
||||
/* The fence is already signaled */
|
||||
__nv_drm_plane_fence_cb(plane_state->fence,
|
||||
&fence_data->dma_fence_cb);
|
||||
#if defined(fallthrough)
|
||||
fallthrough;
|
||||
#else
|
||||
/* Fallthrough */
|
||||
#endif
|
||||
case 0:
|
||||
/*
|
||||
* The plane state's fence reference has either been consumed or
|
||||
* belongs to the outstanding callback now.
|
||||
*/
|
||||
plane_state->fence = NULL;
|
||||
break;
|
||||
default:
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Failed plane fence callback registration");
|
||||
/* Fence callback registration failed */
|
||||
nvKms->cancelDisplaySemaphore(nv_dev->pDevice, semaphore_index);
|
||||
nv_drm_free(fence_data);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* defined(NV_DRM_FENCE_AVAILABLE) */
|
||||
|
||||
static int __nv_drm_get_syncpt_data(
|
||||
struct nv_drm_device *nv_dev,
|
||||
struct drm_crtc *crtc,
|
||||
@@ -258,11 +427,6 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
|
||||
commit ? crtc->state : crtc_state;
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
|
||||
requested_config->headRequestedConfig[nv_crtc->head] =
|
||||
to_nv_crtc_state(new_crtc_state)->req_config;
|
||||
|
||||
requested_config->headsMask |= 1 << nv_crtc->head;
|
||||
|
||||
if (commit) {
|
||||
struct drm_crtc_state *old_crtc_state = crtc_state;
|
||||
struct nv_drm_crtc_state *nv_new_crtc_state =
|
||||
@@ -282,7 +446,27 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
|
||||
|
||||
nv_new_crtc_state->nv_flip = NULL;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
ret = __nv_drm_convert_in_fences(nv_dev,
|
||||
state,
|
||||
crtc,
|
||||
new_crtc_state);
|
||||
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
#endif /* defined(NV_DRM_FENCE_AVAILABLE) */
|
||||
}
|
||||
|
||||
/*
|
||||
* Do this deep copy after calling __nv_drm_convert_in_fences,
|
||||
* which modifies the new CRTC state's req_config member
|
||||
*/
|
||||
requested_config->headRequestedConfig[nv_crtc->head] =
|
||||
to_nv_crtc_state(new_crtc_state)->req_config;
|
||||
|
||||
requested_config->headsMask |= 1 << nv_crtc->head;
|
||||
}
|
||||
|
||||
if (commit && nvKms->systemInfo.bAllowWriteCombining) {
|
||||
@@ -313,6 +497,10 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
|
||||
}
|
||||
}
|
||||
|
||||
if (commit && nv_dev->requiresVrrSemaphores && reply_config.vrrFlip) {
|
||||
nvKms->signalVrrSemaphore(nv_dev->pDevice, reply_config.vrrSemaphoreIndex);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -506,7 +694,6 @@ int nv_drm_atomic_commit(struct drm_device *dev,
|
||||
|
||||
goto done;
|
||||
}
|
||||
nv_dev->drmMasterChangedSinceLastAtomicCommit = NV_FALSE;
|
||||
|
||||
nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
@@ -587,6 +774,9 @@ int nv_drm_atomic_commit(struct drm_device *dev,
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Flip event timeout on head %u", nv_crtc->head);
|
||||
while (!list_empty(&nv_crtc->flip_list)) {
|
||||
__nv_drm_handle_flip_event(nv_crtc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -147,22 +147,18 @@ struct nv_drm_device {
|
||||
NvBool hasVideoMemory;
|
||||
|
||||
NvBool supportsSyncpts;
|
||||
NvBool requiresVrrSemaphores;
|
||||
NvBool subOwnershipGranted;
|
||||
NvBool hasFramebufferConsole;
|
||||
|
||||
/**
|
||||
* @drmMasterChangedSinceLastAtomicCommit:
|
||||
*
|
||||
* This flag is set in nv_drm_master_set and reset after a completed atomic
|
||||
* commit. It is used to restore or recommit state that is lost by the
|
||||
* NvKms modeset owner change, such as the CRTC color management
|
||||
* properties.
|
||||
*/
|
||||
NvBool drmMasterChangedSinceLastAtomicCommit;
|
||||
|
||||
struct drm_property *nv_out_fence_property;
|
||||
struct drm_property *nv_input_colorspace_property;
|
||||
|
||||
struct {
|
||||
NvU32 count;
|
||||
NvU32 next_index;
|
||||
} display_semaphores;
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct drm_property *nv_hdr_output_metadata_property;
|
||||
#endif
|
||||
@@ -170,6 +166,19 @@ struct nv_drm_device {
|
||||
struct nv_drm_device *next;
|
||||
};
|
||||
|
||||
static inline NvU32 nv_drm_next_display_semaphore(
|
||||
struct nv_drm_device *nv_dev)
|
||||
{
|
||||
NvU32 current_index = nv_dev->display_semaphores.next_index++;
|
||||
|
||||
if (nv_dev->display_semaphores.next_index >=
|
||||
nv_dev->display_semaphores.count) {
|
||||
nv_dev->display_semaphores.next_index = 0;
|
||||
}
|
||||
|
||||
return current_index;
|
||||
}
|
||||
|
||||
static inline struct nv_drm_device *to_nv_device(
|
||||
struct drm_device *dev)
|
||||
{
|
||||
|
||||
@@ -128,4 +128,5 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers_has_driver_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_syncobj_features_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -176,7 +176,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
{
|
||||
|
||||
unsigned i, j;
|
||||
const static unsigned attempts = 3;
|
||||
static const unsigned attempts = 3;
|
||||
struct task_struct *thread[3];
|
||||
|
||||
for (i = 0;; i++) {
|
||||
|
||||
@@ -56,7 +56,11 @@
|
||||
#include "nv-lock.h"
|
||||
#include "nv-chardev-numbers.h"
|
||||
|
||||
#if !defined(CONFIG_RETPOLINE)
|
||||
/*
|
||||
* Commit aefb2f2e619b ("x86/bugs: Rename CONFIG_RETPOLINE =>
|
||||
* CONFIG_MITIGATION_RETPOLINE) in v6.8 renamed CONFIG_RETPOLINE.
|
||||
*/
|
||||
#if !defined(CONFIG_RETPOLINE) && !defined(CONFIG_MITIGATION_RETPOLINE)
|
||||
#include "nv-retpoline.h"
|
||||
#endif
|
||||
|
||||
@@ -73,10 +77,10 @@ module_param_named(disable_hdmi_frl, disable_hdmi_frl, bool, 0400);
|
||||
static bool disable_vrr_memclk_switch = false;
|
||||
module_param_named(disable_vrr_memclk_switch, disable_vrr_memclk_switch, bool, 0400);
|
||||
|
||||
static bool hdmi_deepcolor = false;
|
||||
static bool hdmi_deepcolor = true;
|
||||
module_param_named(hdmi_deepcolor, hdmi_deepcolor, bool, 0400);
|
||||
|
||||
static bool vblank_sem_control = false;
|
||||
static bool vblank_sem_control = true;
|
||||
module_param_named(vblank_sem_control, vblank_sem_control, bool, 0400);
|
||||
|
||||
static bool opportunistic_display_sync = true;
|
||||
@@ -135,6 +139,20 @@ NvBool nvkms_opportunistic_display_sync(void)
|
||||
return opportunistic_display_sync;
|
||||
}
|
||||
|
||||
NvBool nvkms_kernel_supports_syncpts(void)
|
||||
{
|
||||
/*
|
||||
* Note this only checks that the kernel has the prerequisite
|
||||
* support for syncpts; callers must also check that the hardware
|
||||
* supports syncpts.
|
||||
*/
|
||||
#if (defined(CONFIG_TEGRA_GRHOST) || defined(NV_LINUX_HOST1X_NEXT_H_PRESENT))
|
||||
return NV_TRUE;
|
||||
#else
|
||||
return NV_FALSE;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NVKMS_SYNCPT_STUBS_NEEDED
|
||||
|
||||
/*************************************************************************
|
||||
@@ -499,8 +517,9 @@ nvkms_event_queue_changed(nvkms_per_open_handle_t *pOpenKernel,
|
||||
|
||||
static void nvkms_suspend(NvU32 gpuId)
|
||||
{
|
||||
nvKmsKapiSuspendResume(NV_TRUE /* suspend */);
|
||||
|
||||
if (gpuId == 0) {
|
||||
nvKmsKapiSuspendResume(NV_TRUE /* suspend */);
|
||||
nvkms_write_lock_pm_lock();
|
||||
}
|
||||
|
||||
@@ -517,8 +536,9 @@ static void nvkms_resume(NvU32 gpuId)
|
||||
|
||||
if (gpuId == 0) {
|
||||
nvkms_write_unlock_pm_lock();
|
||||
nvKmsKapiSuspendResume(NV_FALSE /* suspend */);
|
||||
}
|
||||
|
||||
nvKmsKapiSuspendResume(NV_FALSE /* suspend */);
|
||||
}
|
||||
|
||||
|
||||
@@ -1228,6 +1248,26 @@ void nvkms_close_from_kapi(struct nvkms_per_open *popen)
|
||||
nvkms_close_pm_unlocked(popen);
|
||||
}
|
||||
|
||||
NvBool nvkms_ioctl_from_kapi_try_pmlock
|
||||
(
|
||||
struct nvkms_per_open *popen,
|
||||
NvU32 cmd, void *params_address, const size_t param_size
|
||||
)
|
||||
{
|
||||
NvBool ret;
|
||||
|
||||
if (nvkms_read_trylock_pm_lock()) {
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
ret = nvkms_ioctl_common(popen,
|
||||
cmd,
|
||||
(NvU64)(NvUPtr)params_address, param_size) == 0;
|
||||
nvkms_read_unlock_pm_lock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
NvBool nvkms_ioctl_from_kapi
|
||||
(
|
||||
struct nvkms_per_open *popen,
|
||||
|
||||
@@ -304,6 +304,11 @@ NvU32 nvkms_enumerate_gpus(nv_gpu_info_t *gpu_info);
|
||||
|
||||
NvBool nvkms_allow_write_combining(void);
|
||||
|
||||
/*!
|
||||
* Check if OS supports syncpoints.
|
||||
*/
|
||||
NvBool nvkms_kernel_supports_syncpts(void);
|
||||
|
||||
/*!
|
||||
* Checks whether the fd is associated with an nvidia character device.
|
||||
*/
|
||||
@@ -328,6 +333,16 @@ NvBool nvkms_ioctl_from_kapi
|
||||
NvU32 cmd, void *params_address, const size_t params_size
|
||||
);
|
||||
|
||||
/*!
|
||||
* Like nvkms_ioctl_from_kapi, but return NV_FALSE instead of waiting if the
|
||||
* power management read lock cannot be acquired.
|
||||
*/
|
||||
NvBool nvkms_ioctl_from_kapi_try_pmlock
|
||||
(
|
||||
struct nvkms_per_open *popen,
|
||||
NvU32 cmd, void *params_address, const size_t params_size
|
||||
);
|
||||
|
||||
/*!
|
||||
* APIs for locking.
|
||||
*/
|
||||
|
||||
@@ -105,3 +105,4 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kernel_read_has_pointer_pos_arg
|
||||
|
||||
@@ -60,6 +60,13 @@ static int peerdirect_support = NV_MEM_PEERDIRECT_SUPPORT_DEFAULT;
|
||||
module_param(peerdirect_support, int, S_IRUGO);
|
||||
MODULE_PARM_DESC(peerdirect_support, "Set level of support for Peer-direct, 0 [default] or 1 [legacy, for example MLNX_OFED 4.9 LTS]");
|
||||
|
||||
enum {
|
||||
NV_MEM_PERSISTENT_API_SUPPORT_LEGACY = 0,
|
||||
NV_MEM_PERSISTENT_API_SUPPORT_DEFAULT = 1,
|
||||
};
|
||||
static int persistent_api_support = NV_MEM_PERSISTENT_API_SUPPORT_DEFAULT;
|
||||
module_param(persistent_api_support, int, S_IRUGO);
|
||||
MODULE_PARM_DESC(persistent_api_support, "Set level of support for persistent APIs, 0 [legacy] or 1 [default]");
|
||||
|
||||
#define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d ERROR " FMT, __FUNCTION__, __LINE__, ## ARGS)
|
||||
#ifdef NV_MEM_DEBUG
|
||||
@@ -479,32 +486,8 @@ static struct peer_memory_client nv_mem_client_nc = {
|
||||
.release = nv_mem_release,
|
||||
};
|
||||
|
||||
#endif /* NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT */
|
||||
|
||||
static int nv_mem_param_conf_check(void)
|
||||
static int nv_mem_legacy_client_init(void)
|
||||
{
|
||||
int rc = 0;
|
||||
switch (peerdirect_support) {
|
||||
case NV_MEM_PEERDIRECT_SUPPORT_DEFAULT:
|
||||
case NV_MEM_PEERDIRECT_SUPPORT_LEGACY:
|
||||
break;
|
||||
default:
|
||||
peer_err("invalid peerdirect_support param value %d\n", peerdirect_support);
|
||||
rc = -EINVAL;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __init nv_mem_client_init(void)
|
||||
{
|
||||
int rc;
|
||||
rc = nv_mem_param_conf_check();
|
||||
if (rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
#if defined (NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)
|
||||
// off by one, to leave space for the trailing '1' which is flagging
|
||||
// the new client type
|
||||
BUG_ON(strlen(DRV_NAME) > IB_PEER_MEMORY_NAME_MAX-1);
|
||||
@@ -533,19 +516,96 @@ static int __init nv_mem_client_init(void)
|
||||
&mem_invalidate_callback);
|
||||
if (!reg_handle) {
|
||||
peer_err("nv_mem_client_init -- error while registering traditional client\n");
|
||||
rc = -EINVAL;
|
||||
goto out;
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_mem_nc_client_init(void)
|
||||
{
|
||||
// The nc client enables support for persistent pages.
|
||||
if (persistent_api_support == NV_MEM_PERSISTENT_API_SUPPORT_LEGACY)
|
||||
{
|
||||
//
|
||||
// If legacy behavior is forced via module param,
|
||||
// both legacy and persistent clients are registered and are named
|
||||
// "nv_mem"(legacy) and "nv_mem_nc"(persistent).
|
||||
//
|
||||
strcpy(nv_mem_client_nc.name, DRV_NAME "_nc");
|
||||
}
|
||||
else
|
||||
{
|
||||
//
|
||||
// With default persistent behavior, the client name shall be "nv_mem"
|
||||
// so that libraries can use the persistent client under the same name.
|
||||
//
|
||||
strcpy(nv_mem_client_nc.name, DRV_NAME);
|
||||
}
|
||||
|
||||
// The nc client enables support for persistent pages.
|
||||
strcpy(nv_mem_client_nc.name, DRV_NAME "_nc");
|
||||
strcpy(nv_mem_client_nc.version, DRV_VERSION);
|
||||
reg_handle_nc = ib_register_peer_memory_client(&nv_mem_client_nc, NULL);
|
||||
if (!reg_handle_nc) {
|
||||
peer_err("nv_mem_client_init -- error while registering nc client\n");
|
||||
rc = -EINVAL;
|
||||
goto out;
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT */
|
||||
|
||||
static int nv_mem_param_peerdirect_conf_check(void)
|
||||
{
|
||||
int rc = 0;
|
||||
switch (peerdirect_support) {
|
||||
case NV_MEM_PEERDIRECT_SUPPORT_DEFAULT:
|
||||
case NV_MEM_PEERDIRECT_SUPPORT_LEGACY:
|
||||
break;
|
||||
default:
|
||||
peer_err("invalid peerdirect_support param value %d\n", peerdirect_support);
|
||||
rc = -EINVAL;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int nv_mem_param_persistent_api_conf_check(void)
|
||||
{
|
||||
int rc = 0;
|
||||
switch (persistent_api_support) {
|
||||
case NV_MEM_PERSISTENT_API_SUPPORT_DEFAULT:
|
||||
case NV_MEM_PERSISTENT_API_SUPPORT_LEGACY:
|
||||
break;
|
||||
default:
|
||||
peer_err("invalid persistent_api_support param value %d\n", persistent_api_support);
|
||||
rc = -EINVAL;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __init nv_mem_client_init(void)
|
||||
{
|
||||
#if defined (NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)
|
||||
int rc;
|
||||
rc = nv_mem_param_peerdirect_conf_check();
|
||||
if (rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = nv_mem_param_persistent_api_conf_check();
|
||||
if (rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (persistent_api_support == NV_MEM_PERSISTENT_API_SUPPORT_LEGACY) {
|
||||
rc = nv_mem_legacy_client_init();
|
||||
if (rc)
|
||||
goto out;
|
||||
}
|
||||
|
||||
rc = nv_mem_nc_client_init();
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
out:
|
||||
if (rc) {
|
||||
|
||||
329
kernel-open/nvidia-uvm/clc96f.h
Normal file
329
kernel-open/nvidia-uvm/clc96f.h
Normal file
@@ -0,0 +1,329 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2012-2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#ifndef _clc96f_h_
|
||||
#define _clc96f_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
/* class BLACKWELL_CHANNEL_GPFIFO */
|
||||
/*
|
||||
* Documentation for BLACKWELL_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
|
||||
* chapter "User Control Registers". It is documented as device NV_UDMA.
|
||||
* The GPFIFO format itself is also documented in dev_pbdma.ref,
|
||||
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
|
||||
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
|
||||
*
|
||||
* Note there is no .mfs file for this class.
|
||||
*/
|
||||
#define BLACKWELL_CHANNEL_GPFIFO_A (0x0000C96F)
|
||||
|
||||
#define NVC96F_TYPEDEF BLACKWELL_CHANNELChannelGPFifoA
|
||||
|
||||
/* dma flow control data structure */
|
||||
typedef volatile struct Nvc96fControl_struct {
|
||||
NvU32 Ignored00[0x23]; /* 0000-008b*/
|
||||
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
|
||||
NvU32 Ignored01[0x5c];
|
||||
} Nvc96fControl, BlackwellAControlGPFifo;
|
||||
|
||||
/* fields and values */
|
||||
#define NVC96F_NUMBER_OF_SUBCHANNELS (8)
|
||||
#define NVC96F_SET_OBJECT (0x00000000)
|
||||
#define NVC96F_SET_OBJECT_NVCLASS 15:0
|
||||
#define NVC96F_SET_OBJECT_ENGINE 20:16
|
||||
#define NVC96F_SET_OBJECT_ENGINE_SW 0x0000001f
|
||||
#define NVC96F_NOP (0x00000008)
|
||||
#define NVC96F_NOP_HANDLE 31:0
|
||||
#define NVC96F_NON_STALL_INTERRUPT (0x00000020)
|
||||
#define NVC96F_NON_STALL_INTERRUPT_HANDLE 31:0
|
||||
#define NVC96F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
|
||||
#define NVC96F_FB_FLUSH_HANDLE 31:0
|
||||
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
|
||||
// specifying the page address for a targeted TLB invalidate and the uTLB for
|
||||
// a targeted REPLAY_CANCEL for UVM.
|
||||
// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
|
||||
// rearranged fields.
|
||||
#define NVC96F_MEM_OP_A (0x00000028)
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID 5:0 // only relevant for REPLAY_CANCEL_TARGETED
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE 5:0 // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID 10:6 // only relevant for REPLAY_CANCEL_TARGETED
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE 7:6 // only relevant for invalidates with NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE for invalidating link TLB only, or non-link TLB only or all TLBs
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_ALL_TLBS 0
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_LINK_TLBS 1
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_NON_LINK_TLBS 2
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_RSVRVD 3
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID 8:0 // only relevant for REPLAY_CANCEL_VA_GLOBAL
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR 11:11
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN 0x00000001
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS 0x00000000
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO 31:12
|
||||
#define NVC96F_MEM_OP_B (0x0000002c)
|
||||
#define NVC96F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI 31:0
|
||||
#define NVC96F_MEM_OP_C (0x00000030)
|
||||
#define NVC96F_MEM_OP_C_MEMBAR_TYPE 2:0
|
||||
#define NVC96F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR 0x00000000
|
||||
#define NVC96F_MEM_OP_C_MEMBAR_TYPE_MEMBAR 0x00000001
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB 0:0
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE 0x00000000
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL 0x00000001 // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_GPC 1:1
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE 0x00000000
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE 0x00000001
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY 4:2 // only relevant if GPC ENABLE
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE 0x00000000
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START 0x00000001
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL 0x00000002
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED 0x00000003
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL 0x00000004
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL 0x00000005
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE 6:5 // only relevant if GPC ENABLE
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE 0x00000000
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY 0x00000001
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE 0x00000002
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE 9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ 0
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE 1
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG 2
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD 3
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK 4
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL 5
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC 6
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL 7
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL 9:7 // Invalidate affects this level and all below
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL 0x00000000 // Invalidate tlb caches at all levels of the page table
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY 0x00000001
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0 0x00000002
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1 0x00000003
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2 0x00000004
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 0x00000005
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 0x00000006
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5 0x00000007
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE 11:10 // only relevant if PDB_ONE
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM 0x00000000
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT 0x00000002
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO 31:12 // only relevant if PDB_ONE
|
||||
#define NVC96F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG 19:0
|
||||
// MEM_OP_D MUST be preceded by MEM_OPs A-C.
|
||||
#define NVC96F_MEM_OP_D (0x00000034)
|
||||
#define NVC96F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI 26:0 // only relevant if PDB_ONE
|
||||
#define NVC96F_MEM_OP_D_OPERATION 31:27
|
||||
#define NVC96F_MEM_OP_D_OPERATION_MEMBAR 0x00000005
|
||||
#define NVC96F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009
|
||||
#define NVC96F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED 0x0000000a
|
||||
#define NVC96F_MEM_OP_D_OPERATION_MMU_OPERATION 0x0000000b
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
|
||||
// CLEAN_LINES is an alias for Tegra/GPU IP usage
|
||||
#define NVC96F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_NCOH_INVALIDATE 0x00000011
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_COH_INVALIDATE 0x00000012
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015
|
||||
#define NVC96F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR 0x00000016
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE 1:0
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC 0x00000000
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC 0x00000001
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL 0x00000002
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED 0x00000003
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE 2:2
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC 0x00000000
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC 0x00000001
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK 6:3
|
||||
#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE 23:20
|
||||
#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE_RESERVED 0x00000000
|
||||
#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE_VIDMEM_ACCESS_BIT_DUMP 0x00000001
|
||||
#define NVC96F_SEM_ADDR_LO (0x0000005c)
|
||||
#define NVC96F_SEM_ADDR_LO_OFFSET 31:2
|
||||
#define NVC96F_SEM_ADDR_HI (0x00000060)
|
||||
#define NVC96F_SEM_ADDR_HI_OFFSET 24:0
|
||||
#define NVC96F_SEM_PAYLOAD_LO (0x00000064)
|
||||
#define NVC96F_SEM_PAYLOAD_LO_PAYLOAD 31:0
|
||||
#define NVC96F_SEM_PAYLOAD_HI (0x00000068)
|
||||
#define NVC96F_SEM_PAYLOAD_HI_PAYLOAD 31:0
|
||||
#define NVC96F_SEM_EXECUTE (0x0000006c)
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION 2:0
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ 0x00000002
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ 0x00000003
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_AND 0x00000004
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_NOR 0x00000005
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_REDUCTION 0x00000006
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG 12:12
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK 18:18
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK_DIS 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK_EN 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_WFI 20:20
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_WFI_EN 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
|
||||
#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP 25:25
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION 30:27
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IMIN 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IMAX 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IXOR 0x00000002
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IAND 0x00000003
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IOR 0x00000004
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IADD 0x00000005
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_INC 0x00000006
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_DEC 0x00000007
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT 31:31
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED 0x00000001
|
||||
#define NVC96F_WFI (0x00000078)
|
||||
#define NVC96F_WFI_SCOPE 0:0
|
||||
#define NVC96F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
|
||||
#define NVC96F_WFI_SCOPE_CURRENT_VEID 0x00000000
|
||||
#define NVC96F_WFI_SCOPE_ALL 0x00000001
|
||||
#define NVC96F_YIELD (0x00000080)
|
||||
#define NVC96F_YIELD_OP 1:0
|
||||
#define NVC96F_YIELD_OP_NOP 0x00000000
|
||||
#define NVC96F_YIELD_OP_TSG 0x00000003
|
||||
#define NVC96F_CLEAR_FAULTED (0x00000084)
|
||||
// Note: RM provides the HANDLE as an opaque value; the internal detail fields
|
||||
// are intentionally not exposed to the driver through these defines.
|
||||
#define NVC96F_CLEAR_FAULTED_HANDLE 30:0
|
||||
#define NVC96F_CLEAR_FAULTED_TYPE 31:31
|
||||
#define NVC96F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
|
||||
#define NVC96F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVC96F_GP_ENTRY__SIZE 8
|
||||
#define NVC96F_GP_ENTRY0_FETCH 0:0
|
||||
#define NVC96F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
|
||||
#define NVC96F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
|
||||
#define NVC96F_GP_ENTRY0_GET 31:2
|
||||
#define NVC96F_GP_ENTRY0_OPERAND 31:0
|
||||
#define NVC96F_GP_ENTRY0_PB_EXTENDED_BASE_OPERAND 24:8
|
||||
#define NVC96F_GP_ENTRY1_GET_HI 7:0
|
||||
#define NVC96F_GP_ENTRY1_LEVEL 9:9
|
||||
#define NVC96F_GP_ENTRY1_LEVEL_MAIN 0x00000000
|
||||
#define NVC96F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
|
||||
#define NVC96F_GP_ENTRY1_LENGTH 30:10
|
||||
#define NVC96F_GP_ENTRY1_SYNC 31:31
|
||||
#define NVC96F_GP_ENTRY1_SYNC_PROCEED 0x00000000
|
||||
#define NVC96F_GP_ENTRY1_SYNC_WAIT 0x00000001
|
||||
#define NVC96F_GP_ENTRY1_OPCODE 7:0
|
||||
#define NVC96F_GP_ENTRY1_OPCODE_NOP 0x00000000
|
||||
#define NVC96F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
|
||||
#define NVC96F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
|
||||
#define NVC96F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
#define NVC96F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE 0x00000004
|
||||
|
||||
/* dma method formats */
|
||||
#define NVC96F_DMA_METHOD_ADDRESS_OLD 12:2
|
||||
#define NVC96F_DMA_METHOD_ADDRESS 11:0
|
||||
#define NVC96F_DMA_SUBDEVICE_MASK 15:4
|
||||
#define NVC96F_DMA_METHOD_SUBCHANNEL 15:13
|
||||
#define NVC96F_DMA_TERT_OP 17:16
|
||||
#define NVC96F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
|
||||
#define NVC96F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
|
||||
#define NVC96F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
|
||||
#define NVC96F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
|
||||
#define NVC96F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
|
||||
#define NVC96F_DMA_METHOD_COUNT_OLD 28:18
|
||||
#define NVC96F_DMA_METHOD_COUNT 28:16
|
||||
#define NVC96F_DMA_IMMD_DATA 28:16
|
||||
#define NVC96F_DMA_SEC_OP 31:29
|
||||
#define NVC96F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
|
||||
#define NVC96F_DMA_SEC_OP_INC_METHOD (0x00000001)
|
||||
#define NVC96F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
|
||||
#define NVC96F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
|
||||
#define NVC96F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
|
||||
#define NVC96F_DMA_SEC_OP_ONE_INC (0x00000005)
|
||||
#define NVC96F_DMA_SEC_OP_RESERVED6 (0x00000006)
|
||||
#define NVC96F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
|
||||
/* dma incrementing method format */
|
||||
#define NVC96F_DMA_INCR_ADDRESS 11:0
|
||||
#define NVC96F_DMA_INCR_SUBCHANNEL 15:13
|
||||
#define NVC96F_DMA_INCR_COUNT 28:16
|
||||
#define NVC96F_DMA_INCR_OPCODE 31:29
|
||||
#define NVC96F_DMA_INCR_OPCODE_VALUE (0x00000001)
|
||||
#define NVC96F_DMA_INCR_DATA 31:0
|
||||
/* dma non-incrementing method format */
|
||||
#define NVC96F_DMA_NONINCR_ADDRESS 11:0
|
||||
#define NVC96F_DMA_NONINCR_SUBCHANNEL 15:13
|
||||
#define NVC96F_DMA_NONINCR_COUNT 28:16
|
||||
#define NVC96F_DMA_NONINCR_OPCODE 31:29
|
||||
#define NVC96F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
|
||||
#define NVC96F_DMA_NONINCR_DATA 31:0
|
||||
/* dma increment-once method format */
|
||||
#define NVC96F_DMA_ONEINCR_ADDRESS 11:0
|
||||
#define NVC96F_DMA_ONEINCR_SUBCHANNEL 15:13
|
||||
#define NVC96F_DMA_ONEINCR_COUNT 28:16
|
||||
#define NVC96F_DMA_ONEINCR_OPCODE 31:29
|
||||
#define NVC96F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
|
||||
#define NVC96F_DMA_ONEINCR_DATA 31:0
|
||||
/* dma no-operation format */
|
||||
#define NVC96F_DMA_NOP (0x00000000)
|
||||
/* dma immediate-data format */
|
||||
#define NVC96F_DMA_IMMD_ADDRESS 11:0
|
||||
#define NVC96F_DMA_IMMD_SUBCHANNEL 15:13
|
||||
#define NVC96F_DMA_IMMD_DATA 28:16
|
||||
#define NVC96F_DMA_IMMD_OPCODE 31:29
|
||||
#define NVC96F_DMA_IMMD_OPCODE_VALUE (0x00000004)
|
||||
/* dma set sub-device mask format */
|
||||
#define NVC96F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC96F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC96F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
|
||||
/* dma store sub-device mask format */
|
||||
#define NVC96F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC96F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC96F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
|
||||
/* dma use sub-device mask format */
|
||||
#define NVC96F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC96F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
|
||||
/* dma end-segment format */
|
||||
#define NVC96F_DMA_ENDSEG_OPCODE 31:29
|
||||
#define NVC96F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
|
||||
/* dma legacy incrementing/non-incrementing formats */
|
||||
#define NVC96F_DMA_ADDRESS 12:2
|
||||
#define NVC96F_DMA_SUBCH 15:13
|
||||
#define NVC96F_DMA_OPCODE3 17:16
|
||||
#define NVC96F_DMA_OPCODE3_NONE (0x00000000)
|
||||
#define NVC96F_DMA_COUNT 28:18
|
||||
#define NVC96F_DMA_OPCODE 31:29
|
||||
#define NVC96F_DMA_OPCODE_METHOD (0x00000000)
|
||||
#define NVC96F_DMA_OPCODE_NONINC_METHOD (0x00000002)
|
||||
#define NVC96F_DMA_DATA 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* _clc96f_h_ */
|
||||
460
kernel-open/nvidia-uvm/clc9b5.h
Normal file
460
kernel-open/nvidia-uvm/clc9b5.h
Normal file
@@ -0,0 +1,460 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 1993-2004 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#ifndef _clc9b5_h_
|
||||
#define _clc9b5_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define BLACKWELL_DMA_COPY_A (0x0000C9B5)
|
||||
|
||||
typedef volatile struct _clc9b5_tag0 {
|
||||
NvV32 Reserved00[0x40];
|
||||
NvV32 Nop; // 0x00000100 - 0x00000103
|
||||
NvV32 Reserved01[0xF];
|
||||
NvV32 PmTrigger; // 0x00000140 - 0x00000143
|
||||
NvV32 Reserved02[0x36];
|
||||
NvV32 SetMonitoredFenceType; // 0x0000021C - 0x0000021F
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseUpper; // 0x00000220 - 0x00000223
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseLower; // 0x00000224 - 0x00000227
|
||||
NvV32 Reserved03[0x6];
|
||||
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
|
||||
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
|
||||
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
|
||||
NvV32 SetSemaphorePayloadUpper; // 0x0000024C - 0x0000024F
|
||||
NvV32 Reserved04[0x1];
|
||||
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
|
||||
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
|
||||
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
|
||||
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
|
||||
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
|
||||
NvV32 Reserved05[0x26];
|
||||
NvV32 LaunchDma; // 0x00000300 - 0x00000303
|
||||
NvV32 Reserved06[0x3F];
|
||||
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
|
||||
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
|
||||
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
|
||||
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
|
||||
NvV32 PitchIn; // 0x00000410 - 0x00000413
|
||||
NvV32 PitchOut; // 0x00000414 - 0x00000417
|
||||
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
|
||||
NvV32 LineCount; // 0x0000041C - 0x0000041F
|
||||
NvV32 Reserved07[0x38];
|
||||
NvV32 SetSecureCopyMode; // 0x00000500 - 0x00000503
|
||||
NvV32 SetDecryptIv0; // 0x00000504 - 0x00000507
|
||||
NvV32 SetDecryptIv1; // 0x00000508 - 0x0000050B
|
||||
NvV32 SetDecryptIv2; // 0x0000050C - 0x0000050F
|
||||
NvV32 Reserved_SetAESCounter; // 0x00000510 - 0x00000513
|
||||
NvV32 SetDecryptAuthTagCompareAddrUpper; // 0x00000514 - 0x00000517
|
||||
NvV32 SetDecryptAuthTagCompareAddrLower; // 0x00000518 - 0x0000051B
|
||||
NvV32 Reserved08[0x5];
|
||||
NvV32 SetEncryptAuthTagAddrUpper; // 0x00000530 - 0x00000533
|
||||
NvV32 SetEncryptAuthTagAddrLower; // 0x00000534 - 0x00000537
|
||||
NvV32 SetEncryptIvAddrUpper; // 0x00000538 - 0x0000053B
|
||||
NvV32 SetEncryptIvAddrLower; // 0x0000053C - 0x0000053F
|
||||
NvV32 Reserved09[0x10];
|
||||
NvV32 SetCompressionParameters; // 0x00000580 - 0x00000583
|
||||
NvV32 SetDecompressOutLength; // 0x00000584 - 0x00000587
|
||||
NvV32 SetDecompressOutLengthAddrUpper; // 0x00000588 - 0x0000058B
|
||||
NvV32 SetDecompressOutLengthAddrLower; // 0x0000058C - 0x0000058F
|
||||
NvV32 SetDecompressChecksum; // 0x00000590 - 0x00000593
|
||||
NvV32 Reserved10[0x5A];
|
||||
NvV32 SetMemoryScrubParameters; // 0x000006FC - 0x000006FF
|
||||
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
|
||||
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
|
||||
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
|
||||
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
|
||||
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
|
||||
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
|
||||
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
|
||||
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
|
||||
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
|
||||
NvV32 Reserved11[0x1];
|
||||
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
|
||||
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
|
||||
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
|
||||
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
|
||||
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
|
||||
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
|
||||
NvV32 Reserved12[0x1];
|
||||
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
|
||||
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
|
||||
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
|
||||
NvV32 DstOriginY; // 0x00000750 - 0x00000753
|
||||
NvV32 Reserved13[0x270];
|
||||
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
|
||||
NvV32 Reserved14[0x3BA];
|
||||
} blackwell_dma_copy_aControlPio;
|
||||
|
||||
#define NVC9B5_NOP (0x00000100)
|
||||
#define NVC9B5_NOP_PARAMETER 31:0
|
||||
#define NVC9B5_PM_TRIGGER (0x00000140)
|
||||
#define NVC9B5_PM_TRIGGER_V 31:0
|
||||
#define NVC9B5_SET_MONITORED_FENCE_TYPE (0x0000021C)
|
||||
#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE 0:0
|
||||
#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE (0x00000000)
|
||||
#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE_EXT (0x00000001)
|
||||
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER (0x00000220)
|
||||
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER_UPPER 24:0
|
||||
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER (0x00000224)
|
||||
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER_LOWER 31:0
|
||||
#define NVC9B5_SET_SEMAPHORE_A (0x00000240)
|
||||
#define NVC9B5_SET_SEMAPHORE_A_UPPER 24:0
|
||||
#define NVC9B5_SET_SEMAPHORE_B (0x00000244)
|
||||
#define NVC9B5_SET_SEMAPHORE_B_LOWER 31:0
|
||||
#define NVC9B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
|
||||
#define NVC9B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
|
||||
#define NVC9B5_SET_SEMAPHORE_PAYLOAD_UPPER (0x0000024C)
|
||||
#define NVC9B5_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD 31:0
|
||||
#define NVC9B5_SET_RENDER_ENABLE_A (0x00000254)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_A_UPPER 24:0
|
||||
#define NVC9B5_SET_RENDER_ENABLE_B (0x00000258)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_B_LOWER 31:0
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C (0x0000025C)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE 2:0
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE (0x00000260)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET 1:0
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_FLA 9:9
|
||||
#define NVC9B5_SET_DST_PHYS_MODE (0x00000264)
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_TARGET 1:0
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_FLA 9:9
|
||||
#define NVC9B5_LAUNCH_DMA (0x00000300)
|
||||
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE 25:25
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_NO_TIMESTAMP (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_WITH_TIMESTAMP (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE (0x00000003)
|
||||
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
|
||||
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
|
||||
#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
|
||||
#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE 10:10
|
||||
#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE 11:11
|
||||
#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_TYPE 12:12
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_DST_TYPE 13:13
|
||||
#define NVC9B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDA (0x00000008)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDB (0x00000009)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMIN (0x0000000B)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMAX (0x0000000C)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDC (0x0000000D)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDD (0x0000000E)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDE (0x0000000F)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE 21:20
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_PROT2PROT (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_DEFAULT (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_SECURE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_NONPROT2NONPROT (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_RESERVED (0x00000003)
|
||||
#define NVC9B5_LAUNCH_DMA_VPRMODE 22:22
|
||||
#define NVC9B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE 23:23
|
||||
#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC9B5_LAUNCH_DMA_DISABLE_PLC 26:26
|
||||
#define NVC9B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_DISABLE_PLC_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE 27:27
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_ONE_WORD (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_TWO_WORD (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
|
||||
#define NVC9B5_OFFSET_IN_UPPER (0x00000400)
|
||||
#define NVC9B5_OFFSET_IN_UPPER_UPPER 24:0
|
||||
#define NVC9B5_OFFSET_IN_LOWER (0x00000404)
|
||||
#define NVC9B5_OFFSET_IN_LOWER_VALUE 31:0
|
||||
#define NVC9B5_OFFSET_OUT_UPPER (0x00000408)
|
||||
#define NVC9B5_OFFSET_OUT_UPPER_UPPER 24:0
|
||||
#define NVC9B5_OFFSET_OUT_LOWER (0x0000040C)
|
||||
#define NVC9B5_OFFSET_OUT_LOWER_VALUE 31:0
|
||||
#define NVC9B5_PITCH_IN (0x00000410)
|
||||
#define NVC9B5_PITCH_IN_VALUE 31:0
|
||||
#define NVC9B5_PITCH_OUT (0x00000414)
|
||||
#define NVC9B5_PITCH_OUT_VALUE 31:0
|
||||
#define NVC9B5_LINE_LENGTH_IN (0x00000418)
|
||||
#define NVC9B5_LINE_LENGTH_IN_VALUE 31:0
|
||||
#define NVC9B5_LINE_COUNT (0x0000041C)
|
||||
#define NVC9B5_LINE_COUNT_VALUE 31:0
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE (0x00000500)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_MODE 0:0
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_MODE_ENCRYPT (0x00000000)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_MODE_DECRYPT (0x00000001)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET 20:19
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_PEER_ID 23:21
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_FLA 24:24
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET 26:25
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_PEER_ID 29:27
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_FLA 30:30
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_END_OF_COPY 31:31
|
||||
#define NVC9B5_SET_DECRYPT_IV0 (0x00000504)
|
||||
#define NVC9B5_SET_DECRYPT_IV0_VALUE 31:0
|
||||
#define NVC9B5_SET_DECRYPT_IV1 (0x00000508)
|
||||
#define NVC9B5_SET_DECRYPT_IV1_VALUE 31:0
|
||||
#define NVC9B5_SET_DECRYPT_IV2 (0x0000050C)
|
||||
#define NVC9B5_SET_DECRYPT_IV2_VALUE 31:0
|
||||
#define NVC9B5_RESERVED_SET_AESCOUNTER (0x00000510)
|
||||
#define NVC9B5_RESERVED_SET_AESCOUNTER_VALUE 31:0
|
||||
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER (0x00000514)
|
||||
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER (0x00000518)
|
||||
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER (0x00000530)
|
||||
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER (0x00000534)
|
||||
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC9B5_SET_ENCRYPT_IV_ADDR_UPPER (0x00000538)
|
||||
#define NVC9B5_SET_ENCRYPT_IV_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC9B5_SET_ENCRYPT_IV_ADDR_LOWER (0x0000053C)
|
||||
#define NVC9B5_SET_ENCRYPT_IV_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS (0x00000580)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION 0:0
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION_DECOMPRESS (0x00000000)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION_COMPRESS (0x00000001)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO 3:1
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_SNAPPY (0x00000000)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_DATA_ONLY (0x00000001)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_BLOCK (0x00000002)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_BLOCK_CHECKSUM (0x00000003)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_DEFLATE (0x00000004)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_SNAPPY_WITH_LONG_FETCH (0x00000005)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM 29:28
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_NONE (0x00000000)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_ADLER32 (0x00000001)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_CRC32 (0x00000002)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_SNAPPY_CRC (0x00000003)
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH (0x00000584)
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_V 31:0
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_UPPER (0x00000588)
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_LOWER (0x0000058C)
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC9B5_SET_DECOMPRESS_CHECKSUM (0x00000590)
|
||||
#define NVC9B5_SET_DECOMPRESS_CHECKSUM_V 31:0
|
||||
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS (0x000006FC)
|
||||
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE 0:0
|
||||
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_CONST_A (0x00000700)
|
||||
#define NVC9B5_SET_REMAP_CONST_A_V 31:0
|
||||
#define NVC9B5_SET_REMAP_CONST_B (0x00000704)
|
||||
#define NVC9B5_SET_REMAP_CONST_B_V 31:0
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS (0x00000708)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X 2:0
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y 6:4
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z 10:8
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W 14:12
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE (0x0000070C)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC9B5_SET_DST_WIDTH (0x00000710)
|
||||
#define NVC9B5_SET_DST_WIDTH_V 31:0
|
||||
#define NVC9B5_SET_DST_HEIGHT (0x00000714)
|
||||
#define NVC9B5_SET_DST_HEIGHT_V 31:0
|
||||
#define NVC9B5_SET_DST_DEPTH (0x00000718)
|
||||
#define NVC9B5_SET_DST_DEPTH_V 31:0
|
||||
#define NVC9B5_SET_DST_LAYER (0x0000071C)
|
||||
#define NVC9B5_SET_DST_LAYER_V 31:0
|
||||
#define NVC9B5_SET_DST_ORIGIN (0x00000720)
|
||||
#define NVC9B5_SET_DST_ORIGIN_X 15:0
|
||||
#define NVC9B5_SET_DST_ORIGIN_Y 31:16
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE (0x00000728)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC9B5_SET_SRC_WIDTH (0x0000072C)
|
||||
#define NVC9B5_SET_SRC_WIDTH_V 31:0
|
||||
#define NVC9B5_SET_SRC_HEIGHT (0x00000730)
|
||||
#define NVC9B5_SET_SRC_HEIGHT_V 31:0
|
||||
#define NVC9B5_SET_SRC_DEPTH (0x00000734)
|
||||
#define NVC9B5_SET_SRC_DEPTH_V 31:0
|
||||
#define NVC9B5_SET_SRC_LAYER (0x00000738)
|
||||
#define NVC9B5_SET_SRC_LAYER_V 31:0
|
||||
#define NVC9B5_SET_SRC_ORIGIN (0x0000073C)
|
||||
#define NVC9B5_SET_SRC_ORIGIN_X 15:0
|
||||
#define NVC9B5_SET_SRC_ORIGIN_Y 31:16
|
||||
#define NVC9B5_SRC_ORIGIN_X (0x00000744)
|
||||
#define NVC9B5_SRC_ORIGIN_X_VALUE 31:0
|
||||
#define NVC9B5_SRC_ORIGIN_Y (0x00000748)
|
||||
#define NVC9B5_SRC_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC9B5_DST_ORIGIN_X (0x0000074C)
|
||||
#define NVC9B5_DST_ORIGIN_X_VALUE 31:0
|
||||
#define NVC9B5_DST_ORIGIN_Y (0x00000750)
|
||||
#define NVC9B5_DST_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC9B5_PM_TRIGGER_END (0x00001114)
|
||||
#define NVC9B5_PM_TRIGGER_END_V 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
#endif // _clc9b5_h
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100 (0x00000170)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100 (0x00000180)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100 (0x00000190)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100 (0x000001A0)
|
||||
|
||||
/* valid ARCHITECTURE_GP10x implementation values */
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GP100 (0x00000000)
|
||||
|
||||
546
kernel-open/nvidia-uvm/hwref/blackwell/gb100/dev_fault.h
Normal file
546
kernel-open/nvidia-uvm/hwref/blackwell/gb100/dev_fault.h
Normal file
@@ -0,0 +1,546 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2003-2016 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#ifndef __gb100_dev_fault_h__
|
||||
#define __gb100_dev_fault_h__
|
||||
/* This file is autogenerated. Do not edit */
|
||||
#define NV_PFAULT /* ----G */
|
||||
#define NV_PFAULT_MMU_ENG_ID_GRAPHICS 384 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_DISPLAY 1 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_GSP 2 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_IFB 55 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_FLA 4 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1 256 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2 320 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_SEC 6 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_FSP 7 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF 10 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF0 10 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF1 11 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF2 12 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF3 13 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF4 14 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF5 15 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF6 16 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF7 17 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF8 18 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PERF9 19 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_GSPLITE 20 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC 28 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC0 28 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC1 29 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC2 30 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC3 31 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC4 32 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC5 33 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC6 34 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVDEC7 35 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG0 36 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG1 37 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG2 38 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG3 39 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG4 40 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG5 41 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG6 42 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVJPG7 43 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_GRCOPY 65 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE0 65 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE1 66 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE2 67 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE3 68 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE4 69 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE5 70 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE6 71 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE7 72 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE8 73 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE9 74 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE10 75 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE11 76 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE12 77 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE13 78 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE14 79 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE15 80 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE16 81 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE17 82 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE18 83 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_CE19 84 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PWR_PMU 5 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PTP 3 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVENC0 44 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVENC1 45 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVENC2 46 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_NVENC3 47 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_OFA0 48 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_PHYSICAL 56 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST0 85 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST1 86 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST2 87 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST3 88 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST4 89 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST5 90 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST6 91 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST7 92 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST8 93 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST9 94 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST10 95 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST11 96 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST12 97 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST13 98 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST14 99 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST15 100 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST16 101 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST17 102 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST18 103 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST19 104 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST20 105 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST21 106 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST22 107 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST23 108 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST24 109 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST25 110 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST26 111 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST27 112 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST28 113 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST29 114 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST30 115 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST31 116 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST32 117 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST33 118 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST34 119 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST35 120 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST36 121 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST37 122 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST38 123 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST39 124 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST40 125 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST41 126 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST42 127 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST43 128 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_HOST44 129 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN0 256 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN1 257 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN2 258 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN3 259 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN4 260 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN5 261 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN6 262 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN7 263 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN8 264 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN9 265 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN10 266 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN11 267 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN12 268 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN13 269 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN14 270 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN15 271 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN16 272 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN17 273 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN18 274 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN19 275 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN20 276 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN21 277 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN22 278 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN23 279 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN24 280 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN25 281 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN26 282 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN27 283 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN28 284 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN29 285 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN30 286 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN31 287 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN32 288 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN33 289 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN34 290 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN35 291 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN36 292 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN37 293 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN38 294 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN39 295 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN40 296 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN41 297 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN42 298 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN43 299 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN44 300 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN45 301 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN46 302 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN47 303 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN48 304 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN49 305 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN50 306 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN51 307 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN52 308 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN53 309 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN54 310 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN55 311 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN56 312 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN57 313 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN58 314 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN59 315 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN60 316 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN61 317 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN62 318 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN63 319 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN0 320 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN1 321 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN2 322 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN3 323 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN4 324 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN5 325 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN6 326 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN7 327 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN8 328 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN9 329 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN10 330 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN11 331 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN12 332 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN13 333 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN14 334 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN15 335 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN16 336 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN17 337 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN18 338 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN19 339 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN20 340 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN21 341 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN22 342 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN23 343 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN24 344 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN25 345 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN26 346 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN27 347 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN28 348 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN29 349 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN30 350 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN31 351 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN32 352 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN33 353 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN34 354 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN35 355 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN36 356 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN37 357 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN38 358 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN39 359 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN40 360 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN41 361 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN42 362 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN43 363 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN44 364 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN45 365 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN46 366 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN47 367 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN48 368 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN49 369 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN50 370 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN51 371 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN52 372 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN53 373 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN54 374 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN55 375 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN56 376 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN57 377 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN58 378 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN59 379 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN60 380 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN61 381 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN62 382 /* */
|
||||
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN63 383 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE 4:0 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_PDE 0x00000000 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_PDE_SIZE 0x00000001 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_PTE 0x00000002 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION 0x00000003 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK 0x00000004 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION 0x00000005 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_RO_VIOLATION 0x00000006 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_WO_VIOLATION 0x00000007 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION 0x00000008 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_WORK_CREATION 0x00000009 /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE 0x0000000a /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_CC_VIOLATION 0x0000000b /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND 0x0000000c /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_REGION_VIOLATION 0x0000000d /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_POISONED 0x0000000e /* */
|
||||
#define NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION 0x0000000f /* */
|
||||
#define NV_PFAULT_CLIENT 14:8 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_0 0x00000000 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_1 0x00000001 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_2 0x00000002 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_3 0x00000003 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_4 0x00000004 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_5 0x00000005 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_6 0x00000006 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_7 0x00000007 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_0 0x00000008 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_1 0x00000009 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_2 0x0000000A /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_3 0x0000000B /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_4 0x0000000C /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_5 0x0000000D /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_6 0x0000000E /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_7 0x0000000F /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_RAST 0x00000010 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_GCC 0x00000011 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_GPCCS 0x00000012 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PROP_0 0x00000013 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PROP_1 0x00000014 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PROP_2 0x00000015 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PROP_3 0x00000016 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_8 0x00000021 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_9 0x00000022 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_10 0x00000023 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_11 0x00000024 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_12 0x00000025 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_13 0x00000026 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_14 0x00000027 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_15 0x00000028 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_0 0x00000029 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_1 0x0000002A /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_2 0x0000002B /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_3 0x0000002C /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_4 0x0000002D /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_5 0x0000002E /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_6 0x0000002F /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_7 0x00000030 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_8 0x00000031 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_9 0x00000032 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_8 0x00000033 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_9 0x00000034 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_16 0x00000035 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_17 0x00000036 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_18 0x00000037 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_19 0x00000038 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_10 0x00000039 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_11 0x0000003A /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_10 0x0000003B /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_11 0x0000003C /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_20 0x0000003D /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_21 0x0000003E /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_22 0x0000003F /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_23 0x00000040 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_12 0x00000041 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_13 0x00000042 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_12 0x00000043 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_13 0x00000044 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_24 0x00000045 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_25 0x00000046 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_26 0x00000047 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_27 0x00000048 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_14 0x00000049 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_15 0x0000004A /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_14 0x0000004B /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_15 0x0000004C /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_28 0x0000004D /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_29 0x0000004E /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_30 0x0000004F /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_31 0x00000050 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_16 0x00000051 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_17 0x00000052 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_16 0x00000053 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_17 0x00000054 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_32 0x00000055 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_33 0x00000056 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_34 0x00000057 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_35 0x00000058 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_18 0x00000059 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_PE_19 0x0000005A /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_18 0x0000005B /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_TPCCS_19 0x0000005C /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_36 0x0000005D /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_37 0x0000005E /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_38 0x0000005F /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_T1_39 0x00000060 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_ROP_0 0x00000070 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_ROP_1 0x00000071 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_ROP_2 0x00000072 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_ROP_3 0x00000073 /* */
|
||||
#define NV_PFAULT_CLIENT_GPC_GPM 0x00000017 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_VIP 0x00000000 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE0 0x00000001 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE1 0x00000002 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_DNISO 0x00000003 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_DISPNISO 0x00000003 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE0 0x00000004 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE 0x00000004 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS0 0x00000005 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS 0x00000005 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HOST 0x00000006 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HOST_CPU 0x00000007 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HOST_CPU_NB 0x00000008 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ISO 0x00000009 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_MMU 0x0000000A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC0 0x0000000B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC 0x0000000B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE3 0x0000000C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVENC1 0x0000000D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NISO 0x0000000E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ACTRS 0x0000000E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_P2P 0x0000000F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PD 0x00000010 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PD0 0x00000010 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PERF0 0x00000011 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PERF 0x00000011 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PMU 0x00000012 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD 0x00000013 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD0 0x00000013 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC 0x00000014 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC0 0x00000014 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC_NB 0x00000015 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC_NB0 0x00000015 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SEC 0x00000016 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SSYNC 0x00000017 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SSYNC0 0x00000017 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_GRCOPY 0x00000018 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE2 0x00000018 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_XV 0x00000019 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_MMU_NB 0x0000001A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVENC0 0x0000001B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVENC 0x0000001B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_DFALCON 0x0000001C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED0 0x0000001D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED 0x0000001D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PD1 0x0000001E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_DONT_CARE 0x0000001F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE0 0x00000020 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE1 0x00000021 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE2 0x00000022 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE3 0x00000023 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE4 0x00000024 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE5 0x00000025 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE6 0x00000026 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSCE7 0x00000027 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SSYNC1 0x00000028 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SSYNC2 0x00000029 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_HSHUB 0x0000002A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X0 0x0000002B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X1 0x0000002C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X2 0x0000002D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X3 0x0000002E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X4 0x0000002F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X5 0x00000030 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X6 0x00000031 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X7 0x00000032 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVENC2 0x00000033 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER0 0x00000034 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER1 0x00000035 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SSYNC3 0x00000036 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FBFALCON 0x00000037 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE_SHIM 0x00000038 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_CE_SHIM0 0x00000038 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_GSP 0x00000039 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC1 0x0000003A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC2 0x0000003B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG0 0x0000003C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC3 0x0000003D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC4 0x0000003E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_OFA0 0x0000003F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC1 0x00000040 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC_NB1 0x00000041 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC2 0x00000042 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC_NB2 0x00000043 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC3 0x00000044 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC_NB3 0x00000045 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD1 0x00000046 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD2 0x00000047 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD3 0x00000048 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_GSPLITE1 0x00000049 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_GSPLITE2 0x0000004A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_GSPLITE3 0x0000004B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PD2 0x0000004C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PD3 0x0000004D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE1 0x0000004E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE2 0x0000004F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE3 0x00000050 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE4 0x00000051 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE5 0x00000052 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE6 0x00000053 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FE7 0x00000054 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS1 0x00000055 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS2 0x00000056 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS3 0x00000057 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS4 0x00000058 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS5 0x00000059 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS6 0x0000005A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FECS7 0x0000005B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED1 0x0000005C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED2 0x0000005D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED3 0x0000005E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED4 0x0000005F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED5 0x00000060 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED6 0x00000061 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SKED7 0x00000062 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ESC 0x00000063 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ESC0 0x00000063 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ESC1 0x00000064 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ESC2 0x00000065 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ESC3 0x00000066 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ESC4 0x00000067 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ESC5 0x00000068 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ESC6 0x00000069 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ESC7 0x0000006a /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ESC8 0x0000006b /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ESC9 0x0000006c /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ESC10 0x0000006d /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_ESC11 0x0000006e /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC5 0x0000006F /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC6 0x00000070 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVDEC7 0x00000071 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG1 0x00000072 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG2 0x00000073 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG3 0x00000074 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG4 0x00000075 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG5 0x00000076 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG6 0x00000077 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVJPG7 0x00000078 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_FSP 0x00000079 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_BSI 0x0000007A /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_GSPLITE 0x0000007B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_GSPLITE0 0x0000007B /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER2 0x0000007C /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER3 0x0000007D /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER4 0x0000007E /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_NVENC3 0x0000007F /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE 19:16 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_READ 0x00000000 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_WRITE 0x00000001 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_ATOMIC 0x00000002 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_PREFETCH 0x00000003 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_READ 0x00000000 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_WRITE 0x00000001 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC 0x00000002 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_STRONG 0x00000002 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_PREFETCH 0x00000003 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_WEAK 0x00000004 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_PHYS_READ 0x00000008 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_PHYS_WRITE 0x00000009 /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_PHYS_ATOMIC 0x0000000a /* */
|
||||
#define NV_PFAULT_ACCESS_TYPE_PHYS_PREFETCH 0x0000000b /* */
|
||||
#define NV_PFAULT_MMU_CLIENT_TYPE 20:20 /* */
|
||||
#define NV_PFAULT_MMU_CLIENT_TYPE_GPC 0x00000000 /* */
|
||||
#define NV_PFAULT_MMU_CLIENT_TYPE_HUB 0x00000001 /* */
|
||||
#define NV_PFAULT_GPC_ID 28:24 /* */
|
||||
#define NV_PFAULT_PROTECTED_MODE 29:29 /* */
|
||||
#define NV_PFAULT_REPLAYABLE_FAULT_EN 30:30 /* */
|
||||
#define NV_PFAULT_VALID 31:31 /* */
|
||||
#endif // __gb100_dev_fault_h__
|
||||
560
kernel-open/nvidia-uvm/hwref/blackwell/gb100/dev_mmu.h
Normal file
560
kernel-open/nvidia-uvm/hwref/blackwell/gb100/dev_mmu.h
Normal file
@@ -0,0 +1,560 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2003-2016 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#ifndef __gb100_dev_mmu_h__
|
||||
#define __gb100_dev_mmu_h__
|
||||
/* This file is autogenerated. Do not edit */
|
||||
#define NV_MMU_PDE /* ----G */
|
||||
#define NV_MMU_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_PDE_SIZE_FULL 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE_HALF 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
|
||||
#define NV_MMU_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_PDE__SIZE 8
|
||||
#define NV_MMU_PTE /* ----G */
|
||||
#define NV_MMU_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
|
||||
#define NV_MMU_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
|
||||
#define NV_MMU_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
|
||||
#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PTE_LOCK (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_PTE_LOCK_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_LOCK_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
|
||||
#define NV_MMU_PTE_READ_DISABLE (1*32+30):(1*32+30) /* RWXVF */
|
||||
#define NV_MMU_PTE_READ_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_READ_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_WRITE_DISABLE (1*32+31):(1*32+31) /* RWXVF */
|
||||
#define NV_MMU_PTE_WRITE_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_WRITE_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_PTE__SIZE 8
|
||||
#define NV_MMU_PTE_COMPTAGS_NONE 0x0 /* */
|
||||
#define NV_MMU_PTE_COMPTAGS_1 0x1 /* */
|
||||
#define NV_MMU_PTE_COMPTAGS_2 0x2 /* */
|
||||
#define NV_MMU_PTE_KIND (1*32+7):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_PTE_KIND_INVALID 0x07 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_PITCH 0x00 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_GENERIC_MEMORY 0x6 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z16 0x1 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8 0x2 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8Z24 0x3 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_ZF32_X24S8 0x4 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z24S8 0x5 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE 0x8 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE_DISABLE_PLC 0x9 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8_COMPRESSIBLE_DISABLE_PLC 0xA /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC 0xB /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC 0xC /* R---V */
|
||||
#define NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC 0xD /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC 0xE /* R---V */
|
||||
#define NV_MMU_PTE_KIND_SMSKED_MESSAGE 0xF /* R---V */
|
||||
#define NV_MMU_VER1_PDE /* ----G */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_SIZE_FULL 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE_HALF 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER1_PDE__SIZE 8
|
||||
#define NV_MMU_VER1_PTE /* ----G */
|
||||
#define NV_MMU_VER1_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_VER1_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_KIND (1*32+11):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER1_PTE__SIZE 8
|
||||
#define NV_MMU_VER1_PTE_COMPTAGS_NONE 0x0 /* */
|
||||
#define NV_MMU_VER1_PTE_COMPTAGS_1 0x1 /* */
|
||||
#define NV_MMU_VER1_PTE_COMPTAGS_2 0x2 /* */
|
||||
#define NV_MMU_NEW_PDE /* ----G */
|
||||
#define NV_MMU_NEW_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_NEW_PDE__SIZE 8
|
||||
#define NV_MMU_NEW_DUAL_PDE /* ----G */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
|
||||
#define NV_MMU_NEW_DUAL_PDE__SIZE 16
|
||||
#define NV_MMU_NEW_PTE /* ----G */
|
||||
#define NV_MMU_NEW_PTE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ENCRYPTED 4:4 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_NEW_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_NEW_PTE_PRIVILEGE 5:5 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_READ_ONLY 6:6 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_KIND 63:56 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_NEW_PTE__SIZE 8
|
||||
#define NV_MMU_VER2_PDE /* ----G */
|
||||
#define NV_MMU_VER2_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER2_PDE__SIZE 8
|
||||
#define NV_MMU_VER2_DUAL_PDE /* ----G */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
|
||||
#define NV_MMU_VER2_DUAL_PDE__SIZE 16
|
||||
#define NV_MMU_VER2_PTE /* ----G */
|
||||
#define NV_MMU_VER2_PTE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ENCRYPTED 4:4 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_VER2_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_VER2_PTE_PRIVILEGE 5:5 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_READ_ONLY 6:6 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_KIND 63:56 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER2_PTE__SIZE 8
|
||||
#define NV_MMU_VER3_PDE /* ----G */
|
||||
#define NV_MMU_VER3_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF 5:3 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_ADDRESS 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER3_PDE__SIZE 8
|
||||
#define NV_MMU_VER3_DUAL_PDE /* ----G */
|
||||
#define NV_MMU_VER3_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG 5:3 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG 51:8 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL 69:67 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SMALL 115:76 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
|
||||
#define NV_MMU_VER3_DUAL_PDE__SIZE 16
|
||||
#define NV_MMU_VER3_PTE /* ----G */
|
||||
#define NV_MMU_VER3_PTE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF 7:3 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_PCF_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_SPARSE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_MAPPING_NOWHERE 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_NO_VALID_4KB_PAGE 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACE 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACE 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACE 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACE 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACE 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACE 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACE 0x00000008 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACE 0x00000009 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACE 0x0000000A /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACE 0x0000000B /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACE 0x0000000C /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACE 0x0000000D /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACE 0x0000000E /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACE 0x0000000F /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACD 0x00000010 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACD 0x00000011 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACD 0x00000012 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACD 0x00000013 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACD 0x00000014 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACD 0x00000015 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACD 0x00000016 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACD 0x00000017 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACD 0x00000018 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACD 0x00000019 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACD 0x0000001A /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACD 0x0000001B /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACD 0x0000001C /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACD 0x0000001D /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACD 0x0000001E /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD 0x0000001F /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_KIND 11:8 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_SYS 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_PEER 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_VID 39:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID 63:(64-3) /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER3_PTE__SIZE 8
|
||||
#define NV_MMU_CLIENT /* ----G */
|
||||
#define NV_MMU_CLIENT_KIND 2:0 /* RWXVF */
|
||||
#define NV_MMU_CLIENT_KIND_Z16 0x1 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_S8 0x2 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_S8Z24 0x3 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_ZF32_X24S8 0x4 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_Z24S8 0x5 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_GENERIC_MEMORY 0x6 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_INVALID 0x7 /* R---V */
|
||||
#endif // __gb100_dev_mmu_h__
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -176,7 +176,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
{
|
||||
|
||||
unsigned i, j;
|
||||
const static unsigned attempts = 3;
|
||||
static const unsigned attempts = 3;
|
||||
struct task_struct *thread[3];
|
||||
|
||||
for (i = 0;; i++) {
|
||||
|
||||
@@ -6,6 +6,10 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
|
||||
@@ -72,6 +76,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
|
||||
|
||||
@@ -114,6 +114,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += fault_flag_remote_present
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2023 NVIDIA Corporation
|
||||
Copyright (c) 2013-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -58,7 +58,7 @@
|
||||
#ifndef _UVM_H_
|
||||
#define _UVM_H_
|
||||
|
||||
#define UVM_API_LATEST_REVISION 11
|
||||
#define UVM_API_LATEST_REVISION 12
|
||||
|
||||
#if !defined(UVM_API_REVISION)
|
||||
#error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
|
||||
@@ -167,7 +167,7 @@ NV_STATUS UvmSetDriverVersion(NvU32 major, NvU32 changelist);
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The Linux kernel is not able to support UVM. This could be because
|
||||
// The kernel is not able to support UVM. This could be because
|
||||
// the kernel is too old, or because it lacks a feature that UVM
|
||||
// requires. The kernel log will have details.
|
||||
//
|
||||
@@ -1448,7 +1448,9 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
|
||||
//
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if the destination processor is
|
||||
// the CPU.
|
||||
// the CPU. -1 indicates no preference, in which case the pages used
|
||||
// can be on any of the available CPU NUMA nodes. If NUMA is disabled
|
||||
// only 0 and -1 are allowed.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
@@ -1462,6 +1464,11 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
|
||||
// The VA range exceeds the largest virtual address supported by the
|
||||
// destination processor.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// preferredCpuMemoryNode is not a valid CPU NUMA node or it corresponds
|
||||
// to a NUMA node ID for a registered GPU. If NUMA is disabled, it
|
||||
// indicates that preferredCpuMemoryNode was not either 0 or -1.
|
||||
//
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
// destinationUuid does not represent a valid processor such as a CPU or
|
||||
// a GPU with a GPU VA space registered for it. Or destinationUuid is a
|
||||
@@ -1528,8 +1535,9 @@ NV_STATUS UvmMigrate(void *base,
|
||||
//
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if the destination processor is
|
||||
// the CPU. This argument is ignored if the given virtual address range
|
||||
// corresponds to managed memory.
|
||||
// the CPU. -1 indicates no preference, in which case the pages used
|
||||
// can be on any of the available CPU NUMA nodes. If NUMA is disabled
|
||||
// only 0 and -1 are allowed.
|
||||
//
|
||||
// semaphoreAddress: (INPUT)
|
||||
// Base address of the semaphore.
|
||||
@@ -1586,8 +1594,8 @@ NV_STATUS UvmMigrateAsync(void *base,
|
||||
//
|
||||
// Migrates the backing of all virtual address ranges associated with the given
|
||||
// range group to the specified destination processor. The behavior of this API
|
||||
// is equivalent to calling UvmMigrate on each VA range associated with this
|
||||
// range group.
|
||||
// is equivalent to calling UvmMigrate with preferredCpuMemoryNode = -1 on each
|
||||
// VA range associated with this range group.
|
||||
//
|
||||
// Any errors encountered during migration are returned immediately. No attempt
|
||||
// is made to migrate the remaining unmigrated ranges and the ranges that are
|
||||
@@ -2169,7 +2177,8 @@ NV_STATUS UvmMapDynamicParallelismRegion(void *base,
|
||||
//
|
||||
// If any page in the VA range has a preferred location, then the migration and
|
||||
// mapping policies associated with this API take precedence over those related
|
||||
// to the preferred location.
|
||||
// to the preferred location. If the preferred location is a specific CPU NUMA
|
||||
// node, that NUMA node will be used for a CPU-resident copy of the page.
|
||||
//
|
||||
// If any pages in this VA range have any processors present in their
|
||||
// accessed-by list, the migration and mapping policies associated with this
|
||||
@@ -2300,7 +2309,7 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// UvmPreventMigrationRangeGroups has not been called on the range group that
|
||||
// those pages are associated with, then the migration and mapping policies
|
||||
// associated with UvmEnableReadDuplication override the policies outlined
|
||||
// above. Note that enabling read duplication on on any pages in this VA range
|
||||
// above. Note that enabling read duplication on any pages in this VA range
|
||||
// does not clear the state set by this API for those pages. It merely overrides
|
||||
// the policies associated with this state until read duplication is disabled
|
||||
// for those pages.
|
||||
@@ -2333,7 +2342,8 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if preferredLocationUuid is the
|
||||
// UUID of the CPU. -1 is a special value which indicates all CPU nodes
|
||||
// allowed by the global and thread memory policies.
|
||||
// allowed by the global and thread memory policies. If NUMA is disabled
|
||||
// only 0 and -1 are allowed.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
@@ -3463,8 +3473,7 @@ NV_STATUS UvmToolsDestroySession(UvmToolsSessionHandle session);
|
||||
//
|
||||
|
||||
#if UVM_API_REV_IS_AT_MOST(10)
|
||||
// This is deprecated and replaced by sizeof(UvmToolsEventControlData_V1) or
|
||||
// sizeof(UvmToolsEventControlData_V2).
|
||||
// This is deprecated and replaced by sizeof(UvmToolsEventControlData).
|
||||
NvLength UvmToolsGetEventControlSize(void);
|
||||
|
||||
// This is deprecated and replaced by sizeof(UvmEventEntry_V1) or
|
||||
@@ -3487,9 +3496,7 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
//
|
||||
// version: (INPUT)
|
||||
// Requested version for events or counters.
|
||||
// See UvmEventEntry_V1 and UvmEventEntry_V2.
|
||||
// UvmToolsEventControlData_V2::version records the entry version that
|
||||
// will be generated.
|
||||
// See UvmToolsEventQueueVersion.
|
||||
//
|
||||
// event_buffer: (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
@@ -3502,8 +3509,7 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
//
|
||||
// event_control (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
// hold UvmToolsEventControlData_V1 if version is UvmEventEntry_V1 or
|
||||
// UvmToolsEventControlData_V2 (although single page-size allocation
|
||||
// hold UvmToolsEventControlData (although single page-size allocation
|
||||
// should be more than enough). Gets pinned until queue is destroyed.
|
||||
//
|
||||
// queue: (OUTPUT)
|
||||
@@ -3514,10 +3520,16 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
// Session handle does not refer to a valid session
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// The version is not UvmEventEntry_V1 or UvmEventEntry_V2.
|
||||
// The version is not UvmToolsEventQueueVersion_V1 or
|
||||
// UvmToolsEventQueueVersion_V2.
|
||||
// One of the parameters: event_buffer, event_buffer_size, event_control
|
||||
// is not valid
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The requested version queue could not be created
|
||||
// (i.e., the UVM kernel driver is older and doesn't support
|
||||
// UvmToolsEventQueueVersion_V2).
|
||||
//
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES:
|
||||
// There could be multiple reasons for this error. One would be that
|
||||
// it's not possible to allocate a queue of requested size. Another
|
||||
@@ -3970,57 +3982,51 @@ NV_STATUS UvmToolsWriteProcessMemory(UvmToolsSessionHandle session,
|
||||
// version: (INPUT)
|
||||
// Requested version for the UUID table returned. The version must
|
||||
// match the requested version of the event queue created with
|
||||
// UvmToolsCreateEventQueue().
|
||||
// See UvmEventEntry_V1 and UvmEventEntry_V2.
|
||||
// UvmToolsCreateEventQueue(). See UvmToolsEventQueueVersion.
|
||||
// If the version of the event queue does not match the version of the
|
||||
// UUID table, the behavior is undefined.
|
||||
//
|
||||
// table: (OUTPUT)
|
||||
// Array of processor UUIDs, including the CPU's UUID which is always
|
||||
// at index zero. The srcIndex and dstIndex fields of the
|
||||
// UvmEventMigrationInfo struct index this array. Unused indices will
|
||||
// have a UUID of zero. Version UvmEventEntry_V1 only uses GPU UUIDs
|
||||
// for the UUID of the physical GPU and only supports a single SMC
|
||||
// partition registered per process. Version UvmEventEntry_V2 supports
|
||||
// multiple SMC partitions registered per process and uses physical GPU
|
||||
// UUIDs if the GPU is not SMC capable or SMC enabled and GPU instance
|
||||
// UUIDs for SMC partitions.
|
||||
// The table pointer can be NULL in which case, the size of the table
|
||||
// needed to hold all the UUIDs is returned in 'count'.
|
||||
//
|
||||
// table_size: (INPUT)
|
||||
// The size of the table in number of array elements. This can be
|
||||
// zero if the table pointer is NULL.
|
||||
//
|
||||
// count: (OUTPUT)
|
||||
// On output, it is set by UVM to the number of UUIDs needed to hold
|
||||
// all the UUIDs, including any gaps in the table due to unregistered
|
||||
// GPUs.
|
||||
// at index zero. The number of elements in the array must be greater
|
||||
// or equal to UVM_MAX_PROCESSORS_V1 if the version is
|
||||
// UvmToolsEventQueueVersion_V1 and UVM_MAX_PROCESSORS if the version is
|
||||
// UvmToolsEventQueueVersion_V2.
|
||||
// The srcIndex and dstIndex fields of the UvmEventMigrationInfo struct
|
||||
// index this array. Unused indices will have a UUID of zero.
|
||||
// If version is UvmToolsEventQueueVersion_V1 then the reported UUID
|
||||
// will be that of the corresponding physical GPU, even if multiple SMC
|
||||
// partitions are registered under that physical GPU. If version is
|
||||
// UvmToolsEventQueueVersion_V2 then the reported UUID will be the GPU
|
||||
// instance UUID if SMC is enabled, otherwise it will be the UUID of
|
||||
// the physical GPU.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
// writing to table failed or the count pointer was invalid.
|
||||
// writing to table failed.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// The version is not UvmEventEntry_V1 or UvmEventEntry_V2.
|
||||
// The count pointer is NULL.
|
||||
// See UvmToolsEventQueueVersion.
|
||||
// The version is not UvmToolsEventQueueVersion_V1 or
|
||||
// UvmToolsEventQueueVersion_V2.
|
||||
//
|
||||
// NV_WARN_MISMATCHED_TARGET:
|
||||
// The kernel returned a table suitable for UvmEventEntry_V1 events.
|
||||
// (i.e., the kernel is older and doesn't support UvmEventEntry_V2).
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The kernel is not able to support the requested version
|
||||
// (i.e., the UVM kernel driver is older and doesn't support
|
||||
// UvmToolsEventQueueVersion_V2).
|
||||
//
|
||||
// NV_ERR_NO_MEMORY:
|
||||
// Internal memory allocation failed.
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(10)
|
||||
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
|
||||
NvProcessorUuid *table,
|
||||
NvLength *count);
|
||||
#else
|
||||
#if UVM_API_REV_IS_AT_MOST(11)
|
||||
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
|
||||
UvmToolsEventQueueVersion version,
|
||||
NvProcessorUuid *table,
|
||||
NvLength table_size,
|
||||
NvLength *count);
|
||||
#else
|
||||
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
|
||||
UvmToolsEventQueueVersion version,
|
||||
NvProcessorUuid *table);
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
75
kernel-open/nvidia-uvm/uvm_ampere_fault_buffer.c
Normal file
75
kernel-open/nvidia-uvm/uvm_ampere_fault_buffer.c
Normal file
@@ -0,0 +1,75 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "hwref/ampere/ga100/dev_fault.h"
|
||||
|
||||
static bool client_id_ce(NvU16 client_id)
|
||||
{
|
||||
if (client_id >= NV_PFAULT_CLIENT_HUB_HSCE0 && client_id <= NV_PFAULT_CLIENT_HUB_HSCE9)
|
||||
return true;
|
||||
|
||||
if (client_id >= NV_PFAULT_CLIENT_HUB_HSCE10 && client_id <= NV_PFAULT_CLIENT_HUB_HSCE15)
|
||||
return true;
|
||||
|
||||
switch (client_id) {
|
||||
case NV_PFAULT_CLIENT_HUB_CE0:
|
||||
case NV_PFAULT_CLIENT_HUB_CE1:
|
||||
case NV_PFAULT_CLIENT_HUB_CE2:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
uvm_mmu_engine_type_t uvm_hal_ampere_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
|
||||
uvm_fault_client_type_t client_type,
|
||||
NvU16 client_id)
|
||||
{
|
||||
// Servicing CE and Host (HUB clients) faults.
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_HUB) {
|
||||
if (client_id_ce(client_id)) {
|
||||
UVM_ASSERT(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_CE0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_CE9);
|
||||
|
||||
return UVM_MMU_ENGINE_TYPE_CE;
|
||||
}
|
||||
|
||||
if (client_id == NV_PFAULT_CLIENT_HUB_HOST || client_id == NV_PFAULT_CLIENT_HUB_ESC) {
|
||||
UVM_ASSERT(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST31);
|
||||
|
||||
return UVM_MMU_ENGINE_TYPE_HOST;
|
||||
}
|
||||
}
|
||||
|
||||
// We shouldn't be servicing faults from any other engines other than GR.
|
||||
UVM_ASSERT_MSG(client_id <= NV_PFAULT_CLIENT_GPC_ROP_3, "Unexpected client ID: 0x%x\n", client_id);
|
||||
UVM_ASSERT_MSG(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS && mmu_engine_id < NV_PFAULT_MMU_ENG_ID_BAR1,
|
||||
"Unexpected engine ID: 0x%x\n",
|
||||
mmu_engine_id);
|
||||
UVM_ASSERT(client_type == UVM_FAULT_CLIENT_TYPE_GPC);
|
||||
|
||||
return UVM_MMU_ENGINE_TYPE_GRAPHICS;
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2023 NVIDIA Corporation
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -205,17 +205,18 @@ void uvm_hal_ampere_host_clear_faulted_channel_sw_method(uvm_push_t *push,
|
||||
CLEAR_FAULTED_B, HWVALUE(C076, CLEAR_FAULTED_B, INST_HI, instance_ptr_hi));
|
||||
}
|
||||
|
||||
// Copy from Pascal, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
// Copy from Turing, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar)
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
NvU32 page_table_level;
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
|
||||
@@ -230,8 +231,8 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
// PDE3 is the highest level on Pascal, see the comment in uvm_pascal_mmu.c
|
||||
// for details.
|
||||
// PDE3 is the highest level on Pascal-Ampere, see the comment in
|
||||
// uvm_pascal_mmu.c for details.
|
||||
UVM_ASSERT_MSG(depth < NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
|
||||
page_table_level = NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
|
||||
|
||||
@@ -242,7 +243,12 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
@@ -255,16 +261,18 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
|
||||
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
||||
// Copy from Volta, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
// Copy from Turing, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
@@ -272,6 +280,7 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 va_lo;
|
||||
NvU32 va_hi;
|
||||
NvU64 end;
|
||||
@@ -281,9 +290,9 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 log2_invalidation_size;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
|
||||
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
|
||||
|
||||
// The invalidation size must be a power-of-two number of pages containing
|
||||
@@ -325,7 +334,7 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
// PDE3 is the highest level on Pascal-Ampere , see the comment in
|
||||
// PDE3 is the highest level on Pascal-Ampere, see the comment in
|
||||
// uvm_pascal_mmu.c for details.
|
||||
UVM_ASSERT_MSG(depth < NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
|
||||
page_table_level = NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
|
||||
@@ -337,10 +346,15 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo) |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
sysmembar_value |
|
||||
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C56F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
@@ -352,21 +366,23 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
|
||||
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
||||
// Copy from Pascal, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
// Copy from Turing, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
|
||||
{
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 invalidate_gpc_value = 0;
|
||||
NvU32 aperture_value = 0;
|
||||
NvU32 pdb_lo = 0;
|
||||
NvU32 pdb_hi = 0;
|
||||
NvU32 page_table_level = 0;
|
||||
uvm_membar_t membar;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
@@ -381,7 +397,7 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
if (params->page_table_level != UvmInvalidatePageTableLevelAll) {
|
||||
// PDE3 is the highest level on Pascal, see the comment in
|
||||
// PDE3 is the highest level on Pascal-Ampere, see the comment in
|
||||
// uvm_pascal_mmu.c for details.
|
||||
page_table_level = min((NvU32)UvmInvalidatePageTableLevelPde3, params->page_table_level) - 1;
|
||||
}
|
||||
@@ -393,6 +409,11 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (params->membar == UvmInvalidateTlbMemBarSys)
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
if (params->disable_gpc_invalidate)
|
||||
invalidate_gpc_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
|
||||
else
|
||||
@@ -403,9 +424,9 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
|
||||
NvU32 va_lo = va & HWMASK(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
NvU32 va_hi = va >> HWSIZE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo) |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C56F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
|
||||
@@ -418,7 +439,7 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
else {
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
@@ -432,12 +453,7 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
|
||||
if (params->membar == UvmInvalidateTlbMemBarSys)
|
||||
membar = UVM_MEMBAR_SYS;
|
||||
else if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
membar = UVM_MEMBAR_GPU;
|
||||
else
|
||||
membar = UVM_MEMBAR_NONE;
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2020 NVIDIA Corporation
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -36,22 +36,7 @@
|
||||
#include "uvm_ampere_fault_buffer.h"
|
||||
#include "hwref/ampere/ga100/dev_fault.h"
|
||||
|
||||
uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id)
|
||||
{
|
||||
if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST31)
|
||||
return UVM_MMU_ENGINE_TYPE_HOST;
|
||||
|
||||
if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_CE0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_CE9)
|
||||
return UVM_MMU_ENGINE_TYPE_CE;
|
||||
|
||||
// We shouldn't be servicing faults from any other engines
|
||||
UVM_ASSERT_MSG(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS && mmu_engine_id < NV_PFAULT_MMU_ENG_ID_BAR1,
|
||||
"Unexpected engine ID: 0x%x\n", mmu_engine_id);
|
||||
|
||||
return UVM_MMU_ENGINE_TYPE_GRAPHICS;
|
||||
}
|
||||
|
||||
static NvU32 page_table_depth_ampere(NvU32 page_size)
|
||||
static NvU32 page_table_depth_ampere(NvU64 page_size)
|
||||
{
|
||||
// The common-case is page_size == UVM_PAGE_SIZE_2M, hence the first check
|
||||
if (page_size == UVM_PAGE_SIZE_2M)
|
||||
@@ -62,14 +47,14 @@ static NvU32 page_table_depth_ampere(NvU32 page_size)
|
||||
return 4;
|
||||
}
|
||||
|
||||
static NvU32 page_sizes_ampere(void)
|
||||
static NvU64 page_sizes_ampere(void)
|
||||
{
|
||||
return UVM_PAGE_SIZE_512M | UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
static uvm_mmu_mode_hal_t ampere_mmu_mode_hal;
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size)
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size)
|
||||
{
|
||||
static bool initialized = false;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2021 NVIDIA Corporation
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2021 NVIDIA Corporation
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -29,10 +29,9 @@
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "nv_uvm_types.h"
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_ats_sva.h"
|
||||
|
||||
#include "uvm_ats_sva.h"
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
||||
105
kernel-open/nvidia-uvm/uvm_blackwell.c
Normal file
105
kernel-open/nvidia-uvm/uvm_blackwell.c
Normal file
@@ -0,0 +1,105 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2022-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_blackwell_fault_buffer.h"
|
||||
|
||||
void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
parent_gpu->tlb_batch.va_invalidate_supported = true;
|
||||
|
||||
parent_gpu->tlb_batch.va_range_invalidate_supported = true;
|
||||
|
||||
// TODO: Bug 1767241: Run benchmarks to figure out a good number
|
||||
parent_gpu->tlb_batch.max_ranges = 8;
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_blackwell_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
|
||||
(sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Blackwell covers 64 PB and that's the minimum
|
||||
// size that can be used.
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 64 * UVM_SIZE_1PB;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384 * UVM_SIZE_1TB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
// See uvm_mmu.h for mapping placement
|
||||
parent_gpu->flat_vidmem_va_base = (64 * UVM_SIZE_1PB) + (32 * UVM_SIZE_1TB);
|
||||
|
||||
// TODO: Bug 3953852: Set this to true pending Blackwell changes
|
||||
parent_gpu->ce_phys_vidmem_write_supported = !uvm_parent_gpu_is_coherent(parent_gpu);
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
|
||||
// All GR context buffers may be mapped to 57b wide VAs. All "compute" units
|
||||
// accessing GR context buffers support the 57-bit VA range.
|
||||
parent_gpu->max_channel_va = 1ull << 57;
|
||||
|
||||
parent_gpu->max_host_va = 1ull << 57;
|
||||
|
||||
// Blackwell can map sysmem with any page size
|
||||
parent_gpu->can_map_sysmem_with_large_pages = true;
|
||||
|
||||
// Prefetch instructions will generate faults
|
||||
parent_gpu->prefetch_fault_supported = true;
|
||||
|
||||
// Blackwell can place GPFIFO in vidmem
|
||||
parent_gpu->gpfifo_in_vidmem_supported = true;
|
||||
|
||||
parent_gpu->replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
|
||||
parent_gpu->has_clear_faulted_channel_sw_method = true;
|
||||
|
||||
parent_gpu->has_clear_faulted_channel_method = false;
|
||||
|
||||
parent_gpu->smc.supported = true;
|
||||
|
||||
parent_gpu->sparse_mappings_supported = true;
|
||||
|
||||
parent_gpu->map_remap_larger_page_promotion = false;
|
||||
|
||||
parent_gpu->plc_supported = true;
|
||||
|
||||
parent_gpu->no_ats_range_required = true;
|
||||
}
|
||||
122
kernel-open/nvidia-uvm/uvm_blackwell_fault_buffer.c
Normal file
122
kernel-open/nvidia-uvm/uvm_blackwell_fault_buffer.c
Normal file
@@ -0,0 +1,122 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2023-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_hal_types.h"
|
||||
#include "hwref/blackwell/gb100/dev_fault.h"
|
||||
#include "clc369.h"
|
||||
|
||||
// NV_PFAULT_FAULT_TYPE_COMPRESSION_FAILURE fault type is deprecated on
|
||||
// Blackwell.
|
||||
uvm_fault_type_t uvm_hal_blackwell_fault_buffer_get_fault_type(const NvU32 *fault_entry)
|
||||
{
|
||||
NvU32 hw_fault_type_value = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, FAULT_TYPE);
|
||||
|
||||
switch (hw_fault_type_value) {
|
||||
case NV_PFAULT_FAULT_TYPE_PDE:
|
||||
return UVM_FAULT_TYPE_INVALID_PDE;
|
||||
case NV_PFAULT_FAULT_TYPE_PTE:
|
||||
return UVM_FAULT_TYPE_INVALID_PTE;
|
||||
case NV_PFAULT_FAULT_TYPE_RO_VIOLATION:
|
||||
return UVM_FAULT_TYPE_WRITE;
|
||||
case NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION:
|
||||
return UVM_FAULT_TYPE_ATOMIC;
|
||||
case NV_PFAULT_FAULT_TYPE_WO_VIOLATION:
|
||||
return UVM_FAULT_TYPE_READ;
|
||||
|
||||
case NV_PFAULT_FAULT_TYPE_PDE_SIZE:
|
||||
return UVM_FAULT_TYPE_PDE_SIZE;
|
||||
case NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION:
|
||||
return UVM_FAULT_TYPE_VA_LIMIT_VIOLATION;
|
||||
case NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK:
|
||||
return UVM_FAULT_TYPE_UNBOUND_INST_BLOCK;
|
||||
case NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION:
|
||||
return UVM_FAULT_TYPE_PRIV_VIOLATION;
|
||||
case NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION:
|
||||
return UVM_FAULT_TYPE_PITCH_MASK_VIOLATION;
|
||||
case NV_PFAULT_FAULT_TYPE_WORK_CREATION:
|
||||
return UVM_FAULT_TYPE_WORK_CREATION;
|
||||
case NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE:
|
||||
return UVM_FAULT_TYPE_UNSUPPORTED_APERTURE;
|
||||
case NV_PFAULT_FAULT_TYPE_CC_VIOLATION:
|
||||
return UVM_FAULT_TYPE_CC_VIOLATION;
|
||||
case NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND:
|
||||
return UVM_FAULT_TYPE_UNSUPPORTED_KIND;
|
||||
case NV_PFAULT_FAULT_TYPE_REGION_VIOLATION:
|
||||
return UVM_FAULT_TYPE_REGION_VIOLATION;
|
||||
case NV_PFAULT_FAULT_TYPE_POISONED:
|
||||
return UVM_FAULT_TYPE_POISONED;
|
||||
}
|
||||
|
||||
UVM_ASSERT_MSG(false, "Invalid fault type value: %d\n", hw_fault_type_value);
|
||||
|
||||
return UVM_FAULT_TYPE_COUNT;
|
||||
}
|
||||
|
||||
static bool client_id_ce(NvU16 client_id)
|
||||
{
|
||||
if (client_id >= NV_PFAULT_CLIENT_HUB_HSCE0 && client_id <= NV_PFAULT_CLIENT_HUB_HSCE7)
|
||||
return true;
|
||||
|
||||
switch (client_id) {
|
||||
case NV_PFAULT_CLIENT_HUB_CE0:
|
||||
case NV_PFAULT_CLIENT_HUB_CE1:
|
||||
case NV_PFAULT_CLIENT_HUB_CE2:
|
||||
case NV_PFAULT_CLIENT_HUB_CE3:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
uvm_mmu_engine_type_t uvm_hal_blackwell_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
|
||||
uvm_fault_client_type_t client_type,
|
||||
NvU16 client_id)
|
||||
{
|
||||
// Servicing CE and Host (HUB clients) faults.
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_HUB) {
|
||||
if (client_id_ce(client_id)) {
|
||||
UVM_ASSERT(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_CE0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_CE19);
|
||||
|
||||
return UVM_MMU_ENGINE_TYPE_CE;
|
||||
}
|
||||
|
||||
if (client_id == NV_PFAULT_CLIENT_HUB_HOST ||
|
||||
(client_id >= NV_PFAULT_CLIENT_HUB_ESC0 && client_id <= NV_PFAULT_CLIENT_HUB_ESC11)) {
|
||||
UVM_ASSERT((mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST44) ||
|
||||
(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS));
|
||||
|
||||
return UVM_MMU_ENGINE_TYPE_HOST;
|
||||
}
|
||||
}
|
||||
|
||||
// We shouldn't be servicing faults from any other engines other than GR.
|
||||
UVM_ASSERT_MSG(client_id <= NV_PFAULT_CLIENT_GPC_ROP_3, "Unexpected client ID: 0x%x\n", client_id);
|
||||
UVM_ASSERT_MSG(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS, "Unexpected engine ID: 0x%x\n", mmu_engine_id);
|
||||
UVM_ASSERT(client_type == UVM_FAULT_CLIENT_TYPE_GPC);
|
||||
|
||||
return UVM_MMU_ENGINE_TYPE_GRAPHICS;
|
||||
}
|
||||
92
kernel-open/nvidia-uvm/uvm_blackwell_fault_buffer.h
Normal file
92
kernel-open/nvidia-uvm/uvm_blackwell_fault_buffer.h
Normal file
@@ -0,0 +1,92 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef __UVM_HAL_BLACKWELL_FAULT_BUFFER_H__
|
||||
#define __UVM_HAL_BLACKWELL_FAULT_BUFFER_H__
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_gpu.h"
|
||||
|
||||
// There are up to 10 TPCs per GPC in Blackwell, and there are 2 LTP uTLBs per
|
||||
// TPC. Besides, there is one active RGG uTLB per GPC. Each TPC has a number of
|
||||
// clients that can make requests to its uTLBs: 1xTPCCS, 1xPE, 2xT1. Requests
|
||||
// from these units are routed as follows to the 2 LTP uTLBs:
|
||||
//
|
||||
// -------- ---------
|
||||
// | T1_0 | -----------------> | uTLB0 |
|
||||
// -------- ---------
|
||||
//
|
||||
// -------- ---------
|
||||
// | T1_1 | -----------------> | uTLB1 |
|
||||
// -------- --------> ---------
|
||||
// | ^
|
||||
// ------- | |
|
||||
// | PE | ----------- |
|
||||
// ------- |
|
||||
// |
|
||||
// --------- |
|
||||
// | TPCCS | -----------------------
|
||||
// ---------
|
||||
//
|
||||
//
|
||||
// The client ids are local to their GPC and the id mapping is linear across
|
||||
// TPCs: TPC_n has TPCCS_n, PE_n, T1_p, and T1_q, where p=2*n and q=p+1.
|
||||
//
|
||||
// NV_PFAULT_CLIENT_GPC_LTP_UTLB_n and NV_PFAULT_CLIENT_GPC_RGG_UTLB enums can
|
||||
// be ignored. These will never be reported in a fault message, and should
|
||||
// never be used in an invalidate. Therefore, we define our own values.
|
||||
typedef enum {
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_RGG = 0,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP0 = 1,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP1 = 2,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP2 = 3,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP3 = 4,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP4 = 5,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP5 = 6,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP6 = 7,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP7 = 8,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP8 = 9,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP9 = 10,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP10 = 11,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP11 = 12,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP12 = 13,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP13 = 14,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP14 = 15,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP15 = 16,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP16 = 17,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP17 = 18,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP18 = 19,
|
||||
UVM_BLACKWELL_GPC_UTLB_ID_LTP19 = 20,
|
||||
|
||||
UVM_BLACKWELL_GPC_UTLB_COUNT,
|
||||
} uvm_blackwell_gpc_utlb_id_t;
|
||||
|
||||
static NvU32 uvm_blackwell_get_utlbs_per_gpc(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NvU32 utlbs = parent_gpu->rm_info.maxTpcPerGpcCount * 2 + 1;
|
||||
UVM_ASSERT(utlbs <= UVM_BLACKWELL_GPC_UTLB_COUNT);
|
||||
return utlbs;
|
||||
}
|
||||
|
||||
#endif
|
||||
256
kernel-open/nvidia-uvm/uvm_blackwell_host.c
Normal file
256
kernel-open/nvidia-uvm/uvm_blackwell_host.c
Normal file
@@ -0,0 +1,256 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_push_macros.h"
|
||||
#include "clc96f.h"
|
||||
|
||||
// TODO: Bug 3210931: Rename HOST references and files to ESCHED.
|
||||
|
||||
void uvm_hal_blackwell_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
NvU32 page_table_level;
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
aperture_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
|
||||
else
|
||||
aperture_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
|
||||
pdb.address >>= 12;
|
||||
|
||||
pdb_lo = pdb.address & HWMASK(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
// PDE4 is the highest level on Blackwell, see the comment in
|
||||
// uvm_blackwell_mmu.c for details.
|
||||
UVM_ASSERT_MSG(depth < NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4, "depth %u", depth);
|
||||
page_table_level = NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 - depth;
|
||||
|
||||
if (membar != UVM_MEMBAR_NONE)
|
||||
ack_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C96F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
|
||||
HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
|
||||
aperture_value |
|
||||
ack_value,
|
||||
MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
|
||||
HWVALUE(C96F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
|
||||
void uvm_hal_blackwell_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
NvU32 page_table_level;
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 va_lo;
|
||||
NvU32 va_hi;
|
||||
NvU64 end;
|
||||
NvU64 actual_base;
|
||||
NvU64 actual_size;
|
||||
NvU64 actual_end;
|
||||
NvU32 log2_invalidation_size;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
|
||||
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
|
||||
|
||||
// The invalidation size must be a power-of-two number of pages containing
|
||||
// the passed interval
|
||||
end = base + size - 1;
|
||||
log2_invalidation_size = __fls((unsigned long)(end ^ base)) + 1;
|
||||
|
||||
if (log2_invalidation_size == 64) {
|
||||
// Invalidate everything
|
||||
gpu->parent->host_hal->tlb_invalidate_all(push, pdb, depth, membar);
|
||||
return;
|
||||
}
|
||||
|
||||
// The hardware aligns the target address down to the invalidation size.
|
||||
actual_size = 1ULL << log2_invalidation_size;
|
||||
actual_base = UVM_ALIGN_DOWN(base, actual_size);
|
||||
actual_end = actual_base + actual_size - 1;
|
||||
UVM_ASSERT(actual_end >= end);
|
||||
|
||||
// The invalidation size field expects log2(invalidation size in 4K), not
|
||||
// log2(invalidation size in bytes)
|
||||
log2_invalidation_size -= 12;
|
||||
|
||||
// Address to invalidate, as a multiple of 4K.
|
||||
base >>= 12;
|
||||
va_lo = base & HWMASK(C96F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
va_hi = base >> HWSIZE(C96F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
aperture_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
|
||||
else
|
||||
aperture_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
|
||||
pdb.address >>= 12;
|
||||
|
||||
pdb_lo = pdb.address & HWMASK(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
// PDE4 is the highest level on Blackwell, see the comment in
|
||||
// uvm_blackwell_mmu.c for details.
|
||||
UVM_ASSERT_MSG(depth < NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4, "depth %u", depth);
|
||||
page_table_level = NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 - depth;
|
||||
|
||||
if (membar != UVM_MEMBAR_NONE)
|
||||
ack_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C96F, MEM_OP_A, HWVALUE(C96F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
|
||||
sysmembar_value |
|
||||
HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
HWVALUE(C96F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C96F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
MEM_OP_C, HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
|
||||
HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
|
||||
aperture_value |
|
||||
ack_value,
|
||||
MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
|
||||
HWVALUE(C96F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
|
||||
void uvm_hal_blackwell_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
|
||||
{
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 invalidate_gpc_value = 0;
|
||||
NvU32 aperture_value = 0;
|
||||
NvU32 pdb_lo = 0;
|
||||
NvU32 pdb_hi = 0;
|
||||
NvU32 page_table_level = 0;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
aperture_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
|
||||
else
|
||||
aperture_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
|
||||
pdb.address >>= 12;
|
||||
|
||||
pdb_lo = pdb.address & HWMASK(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
if (params->page_table_level != UvmInvalidatePageTableLevelAll) {
|
||||
// PDE4 is the highest level on Blackwell, see the comment in
|
||||
// uvm_blackwell_mmu.c for details.
|
||||
page_table_level = min((NvU32)UvmInvalidatePageTableLevelPde4, params->page_table_level) - 1;
|
||||
}
|
||||
|
||||
if (params->membar != UvmInvalidateTlbMemBarNone)
|
||||
ack_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
|
||||
if (params->membar == UvmInvalidateTlbMemBarSys)
|
||||
sysmembar_value = HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
if (params->disable_gpc_invalidate)
|
||||
invalidate_gpc_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
|
||||
else
|
||||
invalidate_gpc_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE);
|
||||
|
||||
if (params->target_va_mode == UvmTargetVaModeTargeted) {
|
||||
NvU64 va = params->va >> 12;
|
||||
|
||||
NvU32 va_lo = va & HWMASK(C96F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
NvU32 va_hi = va >> HWSIZE(C96F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
|
||||
NV_PUSH_4U(C96F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
HWVALUE(C96F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C96F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
MEM_OP_C, HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
|
||||
HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
invalidate_gpc_value |
|
||||
aperture_value |
|
||||
ack_value,
|
||||
MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
|
||||
HWVALUE(C96F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
else {
|
||||
NV_PUSH_4U(C96F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
|
||||
HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
invalidate_gpc_value |
|
||||
aperture_value |
|
||||
ack_value,
|
||||
MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
|
||||
HWVALUE(C96F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
}
|
||||
165
kernel-open/nvidia-uvm/uvm_blackwell_mmu.c
Normal file
165
kernel-open/nvidia-uvm/uvm_blackwell_mmu.c
Normal file
@@ -0,0 +1,165 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2022-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
// On Blackwell, the UVM page tree 'depth' maps to hardware as follows:
|
||||
//
|
||||
// UVM depth HW level VA bits
|
||||
// 0 PDE4 56:56
|
||||
// 1 PDE3 55:47
|
||||
// 2 PDE2 (or 256G PTE) 46:38
|
||||
// 3 PDE1 (or 512M PTE) 37:29
|
||||
// 4 PDE0 (dual 64K/4K PDE, or 2M PTE) 28:21
|
||||
// 5 PTE_64K / PTE_4K 20:16 / 20:12
|
||||
|
||||
#include "uvm_types.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_hal_types.h"
|
||||
#include "uvm_blackwell_fault_buffer.h"
|
||||
#include "hwref/blackwell/gb100/dev_fault.h"
|
||||
#include "hwref/blackwell/gb100/dev_mmu.h"
|
||||
|
||||
static uvm_mmu_mode_hal_t blackwell_mmu_mode_hal;
|
||||
|
||||
static NvU32 page_table_depth_blackwell(NvU64 page_size)
|
||||
{
|
||||
switch (page_size) {
|
||||
case UVM_PAGE_SIZE_2M:
|
||||
return 4;
|
||||
case UVM_PAGE_SIZE_512M:
|
||||
return 3;
|
||||
case UVM_PAGE_SIZE_256G:
|
||||
return 2;
|
||||
default:
|
||||
return 5;
|
||||
}
|
||||
}
|
||||
|
||||
static NvU64 page_sizes_blackwell(void)
|
||||
{
|
||||
return UVM_PAGE_SIZE_256G | UVM_PAGE_SIZE_512M | UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell(NvU64 big_page_size)
|
||||
{
|
||||
static bool initialized = false;
|
||||
|
||||
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
|
||||
|
||||
// TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with
|
||||
// 128K big page size for Pascal+ GPUs
|
||||
if (big_page_size == UVM_PAGE_SIZE_128K)
|
||||
return NULL;
|
||||
|
||||
if (!initialized) {
|
||||
uvm_mmu_mode_hal_t *hopper_mmu_mode_hal = uvm_hal_mmu_mode_hopper(big_page_size);
|
||||
UVM_ASSERT(hopper_mmu_mode_hal);
|
||||
|
||||
// The assumption made is that arch_hal->mmu_mode_hal() will be called
|
||||
// under the global lock the first time, so check it here.
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
blackwell_mmu_mode_hal = *hopper_mmu_mode_hal;
|
||||
blackwell_mmu_mode_hal.page_table_depth = page_table_depth_blackwell;
|
||||
blackwell_mmu_mode_hal.page_sizes = page_sizes_blackwell;
|
||||
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
return &blackwell_mmu_mode_hal;
|
||||
}
|
||||
|
||||
NvU16 uvm_hal_blackwell_mmu_client_id_to_utlb_id(NvU16 client_id)
|
||||
{
|
||||
switch (client_id) {
|
||||
case NV_PFAULT_CLIENT_GPC_RAST:
|
||||
case NV_PFAULT_CLIENT_GPC_GCC:
|
||||
case NV_PFAULT_CLIENT_GPC_GPCCS:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_RGG;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_0:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP0;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_1:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_0:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_0:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP1;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_2:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP2;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_3:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_1:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_1:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP3;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_4:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP4;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_5:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_2:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_2:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP5;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_6:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP6;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_7:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_3:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_3:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP7;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_8:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP8;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_9:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_4:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_4:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP9;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_10:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP10;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_11:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_5:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_5:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP11;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_12:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP12;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_13:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_6:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_6:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP13;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_14:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP14;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_15:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_7:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_7:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP15;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_16:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP16;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_17:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_8:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_8:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP17;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_18:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP18;
|
||||
case NV_PFAULT_CLIENT_GPC_T1_19:
|
||||
case NV_PFAULT_CLIENT_GPC_PE_9:
|
||||
case NV_PFAULT_CLIENT_GPC_TPCCS_9:
|
||||
return UVM_BLACKWELL_GPC_UTLB_ID_LTP19;
|
||||
|
||||
default:
|
||||
UVM_ASSERT_MSG(false, "Invalid client value: 0x%x\n", client_id);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -361,7 +361,6 @@ static NV_STATUS channel_reserve_and_lock_in_pool(uvm_channel_pool_t *pool, uvm_
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_channel_update_progress(channel);
|
||||
index = uvm_channel_index_in_pool(channel);
|
||||
|
||||
channel_pool_lock(pool);
|
||||
|
||||
@@ -493,25 +492,20 @@ static NvU32 channel_get_available_push_info_index(uvm_channel_t *channel)
|
||||
static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, NvU64 semaphore_va)
|
||||
{
|
||||
NvU32 iv_index;
|
||||
uvm_gpu_address_t notifier_gpu_va;
|
||||
uvm_gpu_address_t auth_tag_gpu_va;
|
||||
uvm_gpu_address_t semaphore_gpu_va;
|
||||
uvm_gpu_address_t encrypted_payload_gpu_va;
|
||||
uvm_gpu_t *gpu = push->gpu;
|
||||
uvm_channel_t *channel = push->channel;
|
||||
uvm_gpu_semaphore_t *semaphore = &channel->tracking_sem.semaphore;
|
||||
uvm_gpu_address_t notifier_gpu_va = uvm_gpu_semaphore_get_notifier_gpu_va(semaphore);
|
||||
uvm_gpu_address_t auth_tag_gpu_va = uvm_gpu_semaphore_get_auth_tag_gpu_va(semaphore);
|
||||
uvm_gpu_address_t encrypted_payload_gpu_va = uvm_gpu_semaphore_get_encrypted_payload_gpu_va(semaphore);
|
||||
uvm_gpu_address_t semaphore_gpu_va = uvm_gpu_address_virtual(semaphore_va);
|
||||
UvmCslIv *iv_cpu_addr = semaphore->conf_computing.ivs;
|
||||
NvU32 payload_size = sizeof(*semaphore->payload);
|
||||
NvU32 payload_size = sizeof(*uvm_gpu_semaphore_get_encrypted_payload_cpu_va(semaphore));
|
||||
NvU32 *last_pushed_notifier = &semaphore->conf_computing.last_pushed_notifier;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
encrypted_payload_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.encrypted_payload, gpu, false);
|
||||
notifier_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.notifier, gpu, false);
|
||||
auth_tag_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.auth_tag, gpu, false);
|
||||
semaphore_gpu_va = uvm_gpu_address_virtual(semaphore_va);
|
||||
|
||||
iv_index = ((*last_pushed_notifier + 2) / 2) % channel->num_gpfifo_entries;
|
||||
|
||||
uvm_conf_computing_log_gpu_encryption(channel, &iv_cpu_addr[iv_index]);
|
||||
@@ -1541,14 +1535,14 @@ static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *ch
|
||||
NV_STATUS uvm_channel_get_status(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
NvNotification *errorNotifier;
|
||||
NvNotification *error_notifier;
|
||||
|
||||
if (uvm_channel_is_proxy(channel))
|
||||
errorNotifier = channel->proxy.channel_info.shadowErrorNotifier;
|
||||
error_notifier = channel->proxy.channel_info.shadowErrorNotifier;
|
||||
else
|
||||
errorNotifier = channel->channel_info.errorNotifier;
|
||||
error_notifier = channel->channel_info.errorNotifier;
|
||||
|
||||
if (errorNotifier->status == 0)
|
||||
if (error_notifier->status == 0)
|
||||
return NV_OK;
|
||||
|
||||
// In case we hit a channel error, check the ECC error notifier as well so
|
||||
@@ -1710,59 +1704,24 @@ static void free_conf_computing_buffers(uvm_channel_t *channel)
|
||||
channel->conf_computing.static_pb_protected_sysmem = NULL;
|
||||
channel->conf_computing.push_crypto_bundles = NULL;
|
||||
|
||||
uvm_rm_mem_free(channel->tracking_sem.semaphore.conf_computing.encrypted_payload);
|
||||
uvm_rm_mem_free(channel->tracking_sem.semaphore.conf_computing.notifier);
|
||||
uvm_rm_mem_free(channel->tracking_sem.semaphore.conf_computing.auth_tag);
|
||||
uvm_kvfree(channel->tracking_sem.semaphore.conf_computing.ivs);
|
||||
channel->tracking_sem.semaphore.conf_computing.encrypted_payload = NULL;
|
||||
channel->tracking_sem.semaphore.conf_computing.notifier = NULL;
|
||||
channel->tracking_sem.semaphore.conf_computing.auth_tag = NULL;
|
||||
channel->tracking_sem.semaphore.conf_computing.ivs = NULL;
|
||||
}
|
||||
|
||||
static NV_STATUS alloc_conf_computing_buffers_semaphore(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_gpu_semaphore_t *semaphore = &channel->tracking_sem.semaphore;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
sizeof(semaphore->conf_computing.last_pushed_notifier),
|
||||
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
|
||||
&semaphore->conf_computing.notifier);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
sizeof(*channel->tracking_sem.semaphore.payload),
|
||||
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
|
||||
&semaphore->conf_computing.encrypted_payload);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
|
||||
&semaphore->conf_computing.auth_tag);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
semaphore->conf_computing.ivs = uvm_kvmalloc_zero(sizeof(*semaphore->conf_computing.ivs)
|
||||
* channel->num_gpfifo_entries);
|
||||
* channel->num_gpfifo_entries);
|
||||
|
||||
if (!semaphore->conf_computing.ivs)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
return status;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
|
||||
@@ -2380,24 +2339,41 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
|
||||
return status;
|
||||
}
|
||||
|
||||
static bool ce_usable_for_channel_type(uvm_channel_type_t type, const UvmGpuCopyEngineCaps *cap)
|
||||
static bool ce_is_usable(const UvmGpuCopyEngineCaps *cap)
|
||||
{
|
||||
if (!cap->supported || cap->grce)
|
||||
return false;
|
||||
return cap->supported && !cap->grce;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case UVM_CHANNEL_TYPE_CPU_TO_GPU:
|
||||
case UVM_CHANNEL_TYPE_GPU_TO_CPU:
|
||||
return cap->sysmem;
|
||||
case UVM_CHANNEL_TYPE_GPU_INTERNAL:
|
||||
case UVM_CHANNEL_TYPE_MEMOPS:
|
||||
return true;
|
||||
case UVM_CHANNEL_TYPE_GPU_TO_GPU:
|
||||
return cap->p2p;
|
||||
default:
|
||||
UVM_ASSERT_MSG(false, "Unexpected channel type 0x%x\n", type);
|
||||
return false;
|
||||
// Check that all asynchronous CEs are usable, and that there is at least one
|
||||
// such CE.
|
||||
static NV_STATUS ces_validate(uvm_channel_manager_t *manager, const UvmGpuCopyEngineCaps *ces_caps)
|
||||
{
|
||||
unsigned ce;
|
||||
bool found_usable_ce = false;
|
||||
|
||||
for (ce = 0; ce < UVM_COPY_ENGINE_COUNT_MAX; ++ce) {
|
||||
const UvmGpuCopyEngineCaps *ce_caps = ces_caps + ce;
|
||||
|
||||
if (!ce_is_usable(ce_caps))
|
||||
continue;
|
||||
|
||||
found_usable_ce = true;
|
||||
|
||||
// All channels may need to release their semaphore to sysmem.
|
||||
// All CEs are expected to have the sysmem flag set.
|
||||
if (!ce_caps->sysmem)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
// While P2P capabilities are only required for transfers between GPUs,
|
||||
// in practice all CEs are expected to have the corresponding flag set.
|
||||
if (!ce_caps->p2p)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
if (!found_usable_ce)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static unsigned ce_usage_count(NvU32 ce, const unsigned *preferred_ce)
|
||||
@@ -2426,15 +2402,13 @@ static int compare_ce_for_channel_type(const UvmGpuCopyEngineCaps *ce_caps,
|
||||
const UvmGpuCopyEngineCaps *cap0 = ce_caps + ce_index0;
|
||||
const UvmGpuCopyEngineCaps *cap1 = ce_caps + ce_index1;
|
||||
|
||||
UVM_ASSERT(ce_usable_for_channel_type(type, cap0));
|
||||
UVM_ASSERT(ce_usable_for_channel_type(type, cap1));
|
||||
UVM_ASSERT(ce_index0 < UVM_COPY_ENGINE_COUNT_MAX);
|
||||
UVM_ASSERT(ce_index1 < UVM_COPY_ENGINE_COUNT_MAX);
|
||||
UVM_ASSERT(ce_index0 != ce_index1);
|
||||
|
||||
switch (type) {
|
||||
// For CPU to GPU fast sysmem read is the most important
|
||||
case UVM_CHANNEL_TYPE_CPU_TO_GPU:
|
||||
// For CPU to GPU fast sysmem read is the most important
|
||||
if (cap0->sysmemRead != cap1->sysmemRead)
|
||||
return cap1->sysmemRead - cap0->sysmemRead;
|
||||
|
||||
@@ -2444,8 +2418,8 @@ static int compare_ce_for_channel_type(const UvmGpuCopyEngineCaps *ce_caps,
|
||||
|
||||
break;
|
||||
|
||||
// For GPU to CPU fast sysmem write is the most important
|
||||
case UVM_CHANNEL_TYPE_GPU_TO_CPU:
|
||||
// For GPU to CPU fast sysmem write is the most important
|
||||
if (cap0->sysmemWrite != cap1->sysmemWrite)
|
||||
return cap1->sysmemWrite - cap0->sysmemWrite;
|
||||
|
||||
@@ -2455,8 +2429,8 @@ static int compare_ce_for_channel_type(const UvmGpuCopyEngineCaps *ce_caps,
|
||||
|
||||
break;
|
||||
|
||||
// For GPU to GPU prefer the LCE with the most PCEs
|
||||
case UVM_CHANNEL_TYPE_GPU_TO_GPU:
|
||||
// Prefer the LCE with the most PCEs
|
||||
{
|
||||
int pce_diff = (int)hweight32(cap1->cePceMask) - (int)hweight32(cap0->cePceMask);
|
||||
|
||||
@@ -2466,10 +2440,10 @@ static int compare_ce_for_channel_type(const UvmGpuCopyEngineCaps *ce_caps,
|
||||
|
||||
break;
|
||||
|
||||
// For GPU_INTERNAL we want the max possible bandwidth for CEs. For now
|
||||
// assume that the number of PCEs is a good measure.
|
||||
// TODO: Bug 1735254: Add a direct CE query for local FB bandwidth
|
||||
case UVM_CHANNEL_TYPE_GPU_INTERNAL:
|
||||
// We want the max possible bandwidth for CEs used for GPU_INTERNAL,
|
||||
// for now assume that the number of PCEs is a good measure.
|
||||
// TODO: Bug 1735254: Add a direct CE query for local FB bandwidth
|
||||
{
|
||||
int pce_diff = (int)hweight32(cap1->cePceMask) - (int)hweight32(cap0->cePceMask);
|
||||
|
||||
@@ -2483,11 +2457,15 @@ static int compare_ce_for_channel_type(const UvmGpuCopyEngineCaps *ce_caps,
|
||||
|
||||
break;
|
||||
|
||||
// For MEMOPS we mostly care about latency which should be better with
|
||||
// less used CEs (although we only know about our own usage and not
|
||||
// system-wide) so just break out to get the default ordering which
|
||||
// prioritizes usage count.
|
||||
case UVM_CHANNEL_TYPE_MEMOPS:
|
||||
// For MEMOPS we mostly care about latency which should be better
|
||||
// with less used CEs (although we only know about our own usage and
|
||||
// not system-wide) so just break out to get the default ordering
|
||||
// which prioritizes usage count.
|
||||
// For WLC we only care about using a dedicated CE, which requires
|
||||
// knowing the global CE mappings. For now just rely on the default
|
||||
// ordering, which results on selecting an unused CE (if available).
|
||||
case UVM_CHANNEL_TYPE_WLC:
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -2510,54 +2488,104 @@ static int compare_ce_for_channel_type(const UvmGpuCopyEngineCaps *ce_caps,
|
||||
return ce_index0 - ce_index1;
|
||||
}
|
||||
|
||||
// Identify usable CEs, and select the preferred CE for a given channel type.
|
||||
static NV_STATUS pick_ce_for_channel_type(uvm_channel_manager_t *manager,
|
||||
const UvmGpuCopyEngineCaps *ce_caps,
|
||||
uvm_channel_type_t type,
|
||||
unsigned *preferred_ce)
|
||||
// Select the preferred CE for the given channel types.
|
||||
static void pick_ces_for_channel_types(uvm_channel_manager_t *manager,
|
||||
const UvmGpuCopyEngineCaps *ce_caps,
|
||||
uvm_channel_type_t *channel_types,
|
||||
unsigned num_channel_types,
|
||||
unsigned *preferred_ce)
|
||||
{
|
||||
NvU32 i;
|
||||
NvU32 best_ce = UVM_COPY_ENGINE_COUNT_MAX;
|
||||
unsigned i;
|
||||
|
||||
UVM_ASSERT(type < UVM_CHANNEL_TYPE_CE_COUNT);
|
||||
// In Confidential Computing, do not mark all usable CEs, only the preferred
|
||||
// ones, because non-preferred CE channels are guaranteed to not be used.
|
||||
bool mark_all_usable_ces = !g_uvm_global.conf_computing_enabled;
|
||||
|
||||
for (i = 0; i < UVM_COPY_ENGINE_COUNT_MAX; ++i) {
|
||||
const UvmGpuCopyEngineCaps *cap = ce_caps + i;
|
||||
for (i = 0; i < num_channel_types; ++i) {
|
||||
unsigned ce;
|
||||
unsigned best_ce = UVM_COPY_ENGINE_COUNT_MAX;
|
||||
uvm_channel_type_t type = channel_types[i];
|
||||
|
||||
if (!ce_usable_for_channel_type(type, cap))
|
||||
continue;
|
||||
for (ce = 0; ce < UVM_COPY_ENGINE_COUNT_MAX; ++ce) {
|
||||
if (!ce_is_usable(ce_caps + ce))
|
||||
continue;
|
||||
|
||||
__set_bit(i, manager->ce_mask);
|
||||
if (mark_all_usable_ces)
|
||||
__set_bit(ce, manager->ce_mask);
|
||||
|
||||
if (best_ce == UVM_COPY_ENGINE_COUNT_MAX) {
|
||||
best_ce = i;
|
||||
continue;
|
||||
if (best_ce == UVM_COPY_ENGINE_COUNT_MAX) {
|
||||
best_ce = ce;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (compare_ce_for_channel_type(ce_caps, type, ce, best_ce, preferred_ce) < 0)
|
||||
best_ce = ce;
|
||||
}
|
||||
|
||||
if (compare_ce_for_channel_type(ce_caps, type, i, best_ce, preferred_ce) < 0)
|
||||
best_ce = i;
|
||||
}
|
||||
UVM_ASSERT(best_ce != UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
if (best_ce == UVM_COPY_ENGINE_COUNT_MAX) {
|
||||
UVM_ERR_PRINT("Failed to find a suitable CE for channel type %s\n", uvm_channel_type_to_string(type));
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
preferred_ce[type] = best_ce;
|
||||
|
||||
preferred_ce[type] = best_ce;
|
||||
return NV_OK;
|
||||
// Preferred CEs are always marked as usable.
|
||||
if (type < UVM_CHANNEL_TYPE_CE_COUNT)
|
||||
__set_bit(best_ce, manager->ce_mask);
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS channel_manager_pick_copy_engines(uvm_channel_manager_t *manager, unsigned *preferred_ce)
|
||||
static void pick_ces(uvm_channel_manager_t *manager, const UvmGpuCopyEngineCaps *ce_caps, unsigned *preferred_ce)
|
||||
{
|
||||
NV_STATUS status;
|
||||
unsigned i;
|
||||
UvmGpuCopyEnginesCaps *ces_caps;
|
||||
// The order of picking CEs for each type matters as it's affected by
|
||||
// the usage count of each CE and it increases every time a CE
|
||||
// is selected. MEMOPS has the least priority as it only cares about
|
||||
// low usage of the CE to improve latency
|
||||
uvm_channel_type_t types[] = {UVM_CHANNEL_TYPE_CPU_TO_GPU,
|
||||
UVM_CHANNEL_TYPE_GPU_TO_CPU,
|
||||
UVM_CHANNEL_TYPE_GPU_INTERNAL,
|
||||
UVM_CHANNEL_TYPE_GPU_TO_GPU,
|
||||
UVM_CHANNEL_TYPE_MEMOPS};
|
||||
|
||||
UVM_ASSERT(!g_uvm_global.conf_computing_enabled);
|
||||
|
||||
pick_ces_for_channel_types(manager, ce_caps, types, ARRAY_SIZE(types), preferred_ce);
|
||||
}
|
||||
|
||||
static void pick_ces_conf_computing(uvm_channel_manager_t *manager,
|
||||
const UvmGpuCopyEngineCaps *ce_caps,
|
||||
unsigned *preferred_ce)
|
||||
{
|
||||
unsigned best_wlc_ce;
|
||||
|
||||
// The WLC type must go last so an unused CE is chosen, if available
|
||||
uvm_channel_type_t types[] = {UVM_CHANNEL_TYPE_CPU_TO_GPU,
|
||||
UVM_CHANNEL_TYPE_GPU_TO_CPU,
|
||||
UVM_CHANNEL_TYPE_GPU_INTERNAL,
|
||||
UVM_CHANNEL_TYPE_MEMOPS,
|
||||
UVM_CHANNEL_TYPE_WLC};
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
pick_ces_for_channel_types(manager, ce_caps, types, ARRAY_SIZE(types), preferred_ce);
|
||||
|
||||
// Direct transfers between GPUs are disallowed in Confidential Computing,
|
||||
// but the preferred CE is still set to an arbitrary value for consistency.
|
||||
preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_GPU] = preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_CPU];
|
||||
|
||||
best_wlc_ce = preferred_ce[UVM_CHANNEL_TYPE_WLC];
|
||||
|
||||
// TODO: Bug 4576908: in HCC, the WLC type should not share a CE with any
|
||||
// channel type other than LCIC. The assertion should be a check instead.
|
||||
UVM_ASSERT(ce_usage_count(best_wlc_ce, preferred_ce) == 0);
|
||||
}
|
||||
|
||||
static NV_STATUS channel_manager_pick_ces(uvm_channel_manager_t *manager, unsigned *preferred_ce)
|
||||
{
|
||||
NV_STATUS status;
|
||||
UvmGpuCopyEnginesCaps *ces_caps;
|
||||
uvm_channel_type_t type;
|
||||
|
||||
for (type = 0; type < UVM_CHANNEL_TYPE_COUNT; type++)
|
||||
preferred_ce[type] = UVM_COPY_ENGINE_COUNT_MAX;
|
||||
|
||||
ces_caps = uvm_kvmalloc_zero(sizeof(*ces_caps));
|
||||
if (!ces_caps)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
@@ -2566,16 +2594,14 @@ static NV_STATUS channel_manager_pick_copy_engines(uvm_channel_manager_t *manage
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
// The order of picking CEs for each type matters as it's affected by the
|
||||
// usage count of each CE and it increases every time a CE is selected.
|
||||
// MEMOPS has the least priority as it only cares about low usage of the
|
||||
// CE to improve latency
|
||||
for (i = 0; i < ARRAY_SIZE(types); ++i) {
|
||||
status = pick_ce_for_channel_type(manager, ces_caps->copyEngineCaps, types[i], preferred_ce);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
}
|
||||
status = ces_validate(manager, ces_caps->copyEngineCaps);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
pick_ces_conf_computing(manager, ces_caps->copyEngineCaps, preferred_ce);
|
||||
else
|
||||
pick_ces(manager, ces_caps->copyEngineCaps, preferred_ce);
|
||||
out:
|
||||
uvm_kvfree(ces_caps);
|
||||
|
||||
@@ -2584,16 +2610,18 @@ out:
|
||||
|
||||
// Return the pool corresponding to the given CE index
|
||||
//
|
||||
// This function cannot be used to access the proxy pool in SR-IOV heavy.
|
||||
// Used to retrieve pools of type UVM_CHANNEL_POOL_TYPE_CE only.
|
||||
static uvm_channel_pool_t *channel_manager_ce_pool(uvm_channel_manager_t *manager, NvU32 ce)
|
||||
{
|
||||
uvm_channel_pool_t *pool;
|
||||
uvm_channel_pool_t *pool = uvm_channel_pool_first(manager, UVM_CHANNEL_POOL_TYPE_CE);
|
||||
|
||||
UVM_ASSERT(pool != NULL);
|
||||
UVM_ASSERT(test_bit(ce, manager->ce_mask));
|
||||
|
||||
// The index of the pool associated with 'ce' is the number of usable CEs
|
||||
// in [0, ce)
|
||||
pool = manager->channel_pools + bitmap_weight(manager->ce_mask, ce);
|
||||
// Pools of type UVM_CHANNEL_POOL_TYPE_CE are stored contiguously. The
|
||||
// offset of the pool associated with 'ce' is the number of usable CEs in
|
||||
// [0, ce).
|
||||
pool += bitmap_weight(manager->ce_mask, ce);
|
||||
|
||||
UVM_ASSERT(pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
|
||||
UVM_ASSERT(pool->engine_index == ce);
|
||||
@@ -2639,7 +2667,7 @@ static const char *buffer_location_to_string(UVM_BUFFER_LOCATION loc)
|
||||
else if (loc == UVM_BUFFER_LOCATION_DEFAULT)
|
||||
return "auto";
|
||||
|
||||
UVM_ASSERT_MSG(false, "Invalid buffer locationvalue %d\n", loc);
|
||||
UVM_ASSERT_MSG(false, "Invalid buffer location value %d\n", loc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -2811,25 +2839,30 @@ static unsigned channel_manager_get_max_pools(uvm_channel_manager_t *manager)
|
||||
static NV_STATUS channel_manager_create_ce_pools(uvm_channel_manager_t *manager, unsigned *preferred_ce)
|
||||
{
|
||||
unsigned ce;
|
||||
unsigned type;
|
||||
|
||||
// A pool is created for each usable CE, even if it has not been selected as
|
||||
// the preferred CE for any type, because as more information is discovered
|
||||
// (for example, a pair of peer GPUs is added) we may start using the
|
||||
// previously idle pools.
|
||||
// previously idle pools. Configurations where non-preferred CEs are
|
||||
// guaranteed to remain unused are allowed to avoid marking those engines as
|
||||
// usable.
|
||||
for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
|
||||
NV_STATUS status;
|
||||
unsigned type;
|
||||
uvm_channel_pool_t *pool = NULL;
|
||||
|
||||
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_CE, ce, &pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
for (type = 0; type < UVM_CHANNEL_TYPE_CE_COUNT; type++) {
|
||||
// Set pool type if it hasn't been set before.
|
||||
if (preferred_ce[type] == ce && manager->pool_to_use.default_for_type[type] == NULL)
|
||||
manager->pool_to_use.default_for_type[type] = pool;
|
||||
}
|
||||
for (type = 0; type < UVM_CHANNEL_TYPE_CE_COUNT; type++) {
|
||||
// Avoid overwriting previously set defaults.
|
||||
if (manager->pool_to_use.default_for_type[type] != NULL)
|
||||
continue;
|
||||
|
||||
ce = preferred_ce[type];
|
||||
manager->pool_to_use.default_for_type[type] = channel_manager_ce_pool(manager, ce);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
@@ -3000,17 +3033,15 @@ static NV_STATUS setup_lcic_schedule(uvm_channel_t *paired_wlc, uvm_channel_t *l
|
||||
// Reuse WLC sysmem allocation
|
||||
NvU64 gpu_unprotected = uvm_rm_mem_get_gpu_uvm_va(paired_wlc->conf_computing.static_pb_unprotected_sysmem, gpu);
|
||||
char *cpu_unprotected = paired_wlc->conf_computing.static_pb_unprotected_sysmem_cpu;
|
||||
uvm_gpu_semaphore_t *lcic_gpu_semaphore = &lcic->tracking_sem.semaphore;
|
||||
|
||||
uvm_gpu_semaphore_t *lcic_semaphore = &lcic->tracking_sem.semaphore;
|
||||
uvm_gpu_address_t notifier_src_entry_addr = lcic->conf_computing.static_notifier_entry_unprotected_sysmem_gpu_va;
|
||||
uvm_gpu_address_t notifier_src_exit_addr = lcic->conf_computing.static_notifier_exit_unprotected_sysmem_gpu_va;
|
||||
uvm_gpu_address_t notifier_dst_addr = uvm_rm_mem_get_gpu_va(lcic_gpu_semaphore->conf_computing.notifier,
|
||||
gpu,
|
||||
false);
|
||||
uvm_gpu_address_t encrypted_payload_gpu_va =
|
||||
uvm_rm_mem_get_gpu_va(lcic_gpu_semaphore->conf_computing.encrypted_payload, gpu, false);
|
||||
uvm_gpu_address_t notifier_dst_addr = uvm_gpu_semaphore_get_notifier_gpu_va(lcic_semaphore);
|
||||
uvm_gpu_address_t encrypted_payload_gpu_va = uvm_gpu_semaphore_get_encrypted_payload_gpu_va(lcic_semaphore);
|
||||
uvm_gpu_address_t auth_tag_gpu_va = uvm_gpu_semaphore_get_auth_tag_gpu_va(lcic_semaphore);
|
||||
uvm_gpu_address_t semaphore_gpu_va = uvm_gpu_address_virtual(uvm_channel_tracking_semaphore_get_gpu_va(lcic));
|
||||
uvm_gpu_address_t auth_tag_gpu_va = uvm_rm_mem_get_gpu_va(lcic_gpu_semaphore->conf_computing.auth_tag, gpu, false);
|
||||
NvU32 payload_size = sizeof(*lcic->tracking_sem.semaphore.payload);
|
||||
NvU32 payload_size = sizeof(*uvm_gpu_semaphore_get_encrypted_payload_cpu_va(lcic_semaphore));
|
||||
NvU32 notifier_size = sizeof(*lcic->conf_computing.static_notifier_entry_unprotected_sysmem_cpu);
|
||||
|
||||
NvU64 *lcic_gpfifo_entries;
|
||||
@@ -3189,12 +3220,8 @@ static NV_STATUS channel_manager_create_conf_computing_pools(uvm_channel_manager
|
||||
|
||||
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_SEC2] = sec2_pool;
|
||||
|
||||
// Use the same CE as CPU TO GPU channels for WLC/LCIC
|
||||
// Both need to use the same engine for the fixed schedule to work.
|
||||
// TODO: Bug 3981928: [hcc][uvm] Optimize parameters of WLC/LCIC secure
|
||||
// work launch
|
||||
// Find a metric to select the best CE to use
|
||||
wlc_lcic_ce_index = preferred_ce[UVM_CHANNEL_TYPE_CPU_TO_GPU];
|
||||
// WLC and LCIC must use the same engine for the fixed schedule to work.
|
||||
wlc_lcic_ce_index = preferred_ce[UVM_CHANNEL_TYPE_WLC];
|
||||
|
||||
// Create WLC/LCIC pools. This should be done early, CE channels use
|
||||
// them for secure launch. The WLC pool must be created before the LCIC.
|
||||
@@ -3223,14 +3250,10 @@ static NV_STATUS channel_manager_create_conf_computing_pools(uvm_channel_manager
|
||||
static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_channel_type_t type;
|
||||
unsigned max_channel_pools;
|
||||
unsigned preferred_ce[UVM_CHANNEL_TYPE_CE_COUNT];
|
||||
unsigned preferred_ce[UVM_CHANNEL_TYPE_COUNT];
|
||||
|
||||
for (type = 0; type < ARRAY_SIZE(preferred_ce); type++)
|
||||
preferred_ce[type] = UVM_COPY_ENGINE_COUNT_MAX;
|
||||
|
||||
status = channel_manager_pick_copy_engines(manager, preferred_ce);
|
||||
status = channel_manager_pick_ces(manager, preferred_ce);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
@@ -3491,7 +3514,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "get %u\n", channel->gpu_get);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "put %u\n", channel->cpu_put);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore GPU VA 0x%llx\n", uvm_channel_tracking_semaphore_get_gpu_va(channel));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore CPU VA 0x%llx\n", (NvU64)(uintptr_t)channel->tracking_sem.semaphore.payload);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore CPU VA 0x%llx\n", (NvU64)uvm_gpu_semaphore_get_cpu_va(&channel->tracking_sem.semaphore));
|
||||
|
||||
channel_pool_unlock(channel->pool);
|
||||
}
|
||||
|
||||
@@ -418,7 +418,7 @@ struct uvm_channel_manager_struct
|
||||
unsigned num_channel_pools;
|
||||
|
||||
// Mask containing the indexes of the usable Copy Engines. Each usable CE
|
||||
// has at least one pool associated with it.
|
||||
// has at least one pool of type UVM_CHANNEL_POOL_TYPE_CE associated with it
|
||||
DECLARE_BITMAP(ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
struct
|
||||
|
||||
@@ -340,9 +340,9 @@ static NV_STATUS uvm_test_iommu_rc_for_gpu(uvm_gpu_t *gpu)
|
||||
if (!domain || !iommu_is_dma_domain(domain))
|
||||
return NV_OK;
|
||||
|
||||
// Only run if ATS is enabled. Otherwise the CE doesn't get response on
|
||||
// writing to unmapped location.
|
||||
if (!g_uvm_global.ats.enabled)
|
||||
// Only run if ATS is enabled with 64kB base page.
|
||||
// Otherwise the CE doesn't get response on writing to unmapped location.
|
||||
if (!g_uvm_global.ats.enabled || PAGE_SIZE != UVM_PAGE_SIZE_64K)
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(data_size, NULL, &sysmem);
|
||||
@@ -691,12 +691,16 @@ static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
|
||||
if (uvm_test_rng_range_32(&rng, 0, 1) == 0) {
|
||||
NvU32 random_stream_index = uvm_test_rng_range_32(&rng, 0, num_streams - 1);
|
||||
uvm_test_stream_t *random_stream = &streams[random_stream_index];
|
||||
uvm_push_acquire_tracker(&stream->push, &random_stream->tracker);
|
||||
snapshot_counter(&stream->push,
|
||||
random_stream->counter_mem,
|
||||
stream->other_stream_counter_snapshots_mem,
|
||||
i,
|
||||
random_stream->queued_counter_repeat);
|
||||
|
||||
if ((random_stream->push.gpu == gpu) || uvm_push_allow_dependencies_across_gpus()) {
|
||||
uvm_push_acquire_tracker(&stream->push, &random_stream->tracker);
|
||||
|
||||
snapshot_counter(&stream->push,
|
||||
random_stream->counter_mem,
|
||||
stream->other_stream_counter_snapshots_mem,
|
||||
i,
|
||||
random_stream->queued_counter_repeat);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_push_end(&stream->push);
|
||||
@@ -789,7 +793,7 @@ done:
|
||||
// This test verifies that concurrent pushes using the same channel pool
|
||||
// select different channels, when the Confidential Computing feature is
|
||||
// enabled.
|
||||
NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_channel_pool_t *pool;
|
||||
@@ -849,7 +853,7 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS test_channel_iv_rotation(uvm_va_space_t *va_space)
|
||||
static NV_STATUS test_channel_iv_rotation(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
@@ -944,7 +948,7 @@ release:
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
static NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
@@ -983,7 +987,7 @@ NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
|
||||
static NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
@@ -1031,7 +1035,7 @@ NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
static NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2023 NVIDIA Corporation
|
||||
Copyright (c) 2013-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -423,7 +423,9 @@ static void uvm_get_unaddressable_range(NvU32 num_va_bits, NvU64 *first, NvU64 *
|
||||
UVM_ASSERT(first);
|
||||
UVM_ASSERT(outer);
|
||||
|
||||
if (uvm_platform_uses_canonical_form_address()) {
|
||||
// Maxwell GPUs (num_va_bits == 40b) do not support canonical form address
|
||||
// even when plugged into platforms using it.
|
||||
if (uvm_platform_uses_canonical_form_address() && num_va_bits > 40) {
|
||||
*first = 1ULL << (num_va_bits - 1);
|
||||
*outer = (NvU64)((NvS64)(1ULL << 63) >> (64 - num_va_bits));
|
||||
}
|
||||
|
||||
@@ -469,6 +469,7 @@ NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
size,
|
||||
(const NvU8 *) src_cipher,
|
||||
src_iv,
|
||||
NV_U32_MAX,
|
||||
(NvU8 *) dst_plain,
|
||||
NULL,
|
||||
0,
|
||||
@@ -485,6 +486,8 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU8 valid)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
|
||||
|
||||
// There is no dedicated lock for the CSL context associated with replayable
|
||||
// faults. The mutual exclusion required by the RM CSL API is enforced by
|
||||
@@ -494,36 +497,48 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
status = nvUvmInterfaceCslDecrypt(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
|
||||
parent_gpu->fault_buffer_hal->entry_size(parent_gpu),
|
||||
status = nvUvmInterfaceCslLogEncryption(csl_context, UVM_CSL_OPERATION_DECRYPT, fault_entry_size);
|
||||
|
||||
// Informing RM of an encryption/decryption should not fail
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
status = nvUvmInterfaceCslDecrypt(csl_context,
|
||||
fault_entry_size,
|
||||
(const NvU8 *) src_cipher,
|
||||
NULL,
|
||||
NV_U32_MAX,
|
||||
(NvU8 *) dst_plain,
|
||||
&valid,
|
||||
sizeof(valid),
|
||||
(const NvU8 *) auth_tag_buffer);
|
||||
|
||||
if (status != NV_OK)
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment)
|
||||
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
|
||||
|
||||
// See comment in uvm_conf_computing_fault_decrypt
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
status = nvUvmInterfaceCslIncrementIv(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
|
||||
UVM_CSL_OPERATION_DECRYPT,
|
||||
increment,
|
||||
NULL);
|
||||
status = nvUvmInterfaceCslLogEncryption(csl_context, UVM_CSL_OPERATION_DECRYPT, fault_entry_size);
|
||||
|
||||
// Informing RM of an encryption/decryption should not fail
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
status = nvUvmInterfaceCslIncrementIv(csl_context, UVM_CSL_OPERATION_DECRYPT, 1, NULL);
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
@@ -191,12 +191,12 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU8 valid);
|
||||
|
||||
// Increment the CPU-side decrypt IV of the CSL context associated with
|
||||
// replayable faults. The function is a no-op if the given increment is zero.
|
||||
// replayable faults.
|
||||
//
|
||||
// The IV associated with a fault CSL context is a 64-bit counter.
|
||||
//
|
||||
// Locking: this function must be invoked while holding the replayable ISR lock.
|
||||
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment);
|
||||
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Query the number of remaining messages before IV needs to be rotated.
|
||||
void uvm_conf_computing_query_message_pools(uvm_channel_t *channel,
|
||||
|
||||
@@ -51,8 +51,10 @@ NV_STATUS uvm_test_fault_buffer_flush(UVM_TEST_FAULT_BUFFER_FLUSH_PARAMS *params
|
||||
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
if (uvm_processor_mask_empty(retained_gpus))
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
if (uvm_processor_mask_empty(retained_gpus)) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < params->iterations; i++) {
|
||||
if (fatal_signal_pending(current)) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -119,10 +119,6 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
|
||||
if (memory_owning_gpu == NULL)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
// TODO: Bug 1903234: Once RM supports indirect peer mappings, we'll need to
|
||||
// update this test since the aperture will be SYS. Depending on how
|
||||
// RM implements things, we might not be able to compare the physical
|
||||
// addresses either.
|
||||
aperture = get_aperture(va_space, memory_owning_gpu, memory_mapping_gpu, memory_info, sli_supported);
|
||||
|
||||
if (is_cacheable(ext_mapping_info, aperture))
|
||||
|
||||
@@ -409,4 +409,10 @@ NV_STATUS uvm_service_block_context_init(void);
|
||||
// Release fault service contexts if any exist.
|
||||
void uvm_service_block_context_exit(void);
|
||||
|
||||
// Allocate a service block context
|
||||
uvm_service_block_context_t *uvm_service_block_context_alloc(struct mm_struct *mm);
|
||||
|
||||
// Free a servic block context
|
||||
void uvm_service_block_context_free(uvm_service_block_context_t *service_context);
|
||||
|
||||
#endif // __UVM_GLOBAL_H__
|
||||
|
||||
@@ -81,6 +81,8 @@ static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
|
||||
return UVM_GPU_LINK_NVLINK_3;
|
||||
case UVM_LINK_TYPE_NVLINK_4:
|
||||
return UVM_GPU_LINK_NVLINK_4;
|
||||
case UVM_LINK_TYPE_NVLINK_5:
|
||||
return UVM_GPU_LINK_NVLINK_5;
|
||||
case UVM_LINK_TYPE_C2C:
|
||||
return UVM_GPU_LINK_C2C;
|
||||
default:
|
||||
@@ -218,8 +220,9 @@ static NV_STATUS alloc_and_init_address_space(uvm_gpu_t *gpu)
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
gpu->big_page.internal_size = gpu_address_space_info.bigPageSize;
|
||||
UVM_ASSERT(gpu_address_space_info.bigPageSize <= NV_U32_MAX);
|
||||
|
||||
gpu->big_page.internal_size = gpu_address_space_info.bigPageSize;
|
||||
gpu->time.time0_register = gpu_address_space_info.time0Offset;
|
||||
gpu->time.time1_register = gpu_address_space_info.time1Offset;
|
||||
|
||||
@@ -458,7 +461,8 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
|
||||
|
||||
static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 7);
|
||||
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 8);
|
||||
|
||||
switch (link_type) {
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_INVALID);
|
||||
@@ -467,6 +471,7 @@ static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_2);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_3);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_4);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_5);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_C2C);
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
@@ -1082,9 +1087,6 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
gpu->parent->rm_va_size,
|
||||
va_per_entry);
|
||||
|
||||
UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->big_page.internal_size));
|
||||
UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->mem_info.max_vidmem_page_size));
|
||||
|
||||
tree_alloc = uvm_page_tree_pdb(&gpu->address_space_tree);
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceSetPageDirectory(gpu->rm_address_space,
|
||||
tree_alloc->addr.address,
|
||||
@@ -1680,12 +1682,9 @@ static void remove_gpu(uvm_gpu_t *gpu)
|
||||
// TODO: Bug 2008200: Add and remove the GPU in a more reasonable spot.
|
||||
uvm_conf_computing_gpu_deinit(gpu);
|
||||
|
||||
// TODO: Bug 2844714: If the parent is not being freed, the following
|
||||
// gpu_table_lock is only needed to protect concurrent
|
||||
// find_first_valid_gpu() in BH from the __clear_bit here. After
|
||||
// find_first_valid_gpu() is removed, gpu_table_lock should only be acquired
|
||||
// and released in the free_parent case.
|
||||
//
|
||||
// If the parent is not being freed, the following gpu_table_lock is only
|
||||
// needed to protect concurrent uvm_parent_gpu_find_first_valid_gpu() in BH
|
||||
// from the __clear_bit here.
|
||||
// In the free_parent case, gpu_table_lock protects the top half from the
|
||||
// uvm_global_remove_parent_gpu()
|
||||
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
|
||||
@@ -2263,18 +2262,6 @@ static void set_optimal_p2p_write_ces(const UvmGpuP2PCapsParams *p2p_caps_params
|
||||
ce0 = p2p_caps_params->optimalNvlinkWriteCEs[sorted ? 0 : 1];
|
||||
ce1 = p2p_caps_params->optimalNvlinkWriteCEs[sorted ? 1 : 0];
|
||||
|
||||
// Indirect peers communicate through the CPU, so the optimal CE
|
||||
// should match the one selected for writing to system memory
|
||||
if (peer_caps->is_indirect_peer) {
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
pool = gpu0->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
|
||||
UVM_ASSERT(ce0 == pool->engine_index);
|
||||
|
||||
pool = gpu1->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
|
||||
UVM_ASSERT(ce1 == pool->engine_index);
|
||||
}
|
||||
|
||||
uvm_channel_manager_set_p2p_ce(gpu0->channel_manager, gpu1, ce0);
|
||||
uvm_channel_manager_set_p2p_ce(gpu1->channel_manager, gpu0, ce1);
|
||||
}
|
||||
@@ -2364,74 +2351,51 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
|
||||
|
||||
// check for peer-to-peer compatibility (PCI-E or NvLink).
|
||||
peer_caps->link_type = get_gpu_link_type(p2p_caps_params->p2pLink);
|
||||
if (peer_caps->link_type == UVM_GPU_LINK_INVALID
|
||||
|| peer_caps->link_type == UVM_GPU_LINK_C2C
|
||||
)
|
||||
if (peer_caps->link_type == UVM_GPU_LINK_INVALID || peer_caps->link_type == UVM_GPU_LINK_C2C)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
peer_caps->total_link_line_rate_mbyte_per_s = p2p_caps_params->totalLinkLineRateMBps;
|
||||
|
||||
// Initialize peer ids and establish peer mappings
|
||||
peer_caps->is_indirect_peer = (p2p_caps_params->indirectAccess == NV_TRUE);
|
||||
// Peer id from min(gpu_id0, gpu_id1) -> max(gpu_id0, gpu_id1)
|
||||
peer_caps->peer_ids[0] = p2p_caps_params->peerIds[0];
|
||||
|
||||
if (peer_caps->is_indirect_peer) {
|
||||
UVM_ASSERT(gpu0->mem_info.numa.enabled);
|
||||
UVM_ASSERT(gpu1->mem_info.numa.enabled);
|
||||
// Peer id from max(gpu_id0, gpu_id1) -> min(gpu_id0, gpu_id1)
|
||||
peer_caps->peer_ids[1] = p2p_caps_params->peerIds[1];
|
||||
|
||||
status = uvm_pmm_gpu_indirect_peer_init(&gpu0->pmm, gpu1);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
// Establish peer mappings from each GPU to the other.
|
||||
status = uvm_mmu_create_peer_identity_mappings(gpu0, gpu1);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = uvm_pmm_gpu_indirect_peer_init(&gpu1->pmm, gpu0);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
status = uvm_mmu_create_peer_identity_mappings(gpu1, gpu0);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
set_optimal_p2p_write_ces(p2p_caps_params, peer_caps, gpu0, gpu1);
|
||||
UVM_ASSERT(peer_caps->total_link_line_rate_mbyte_per_s == 0);
|
||||
}
|
||||
else {
|
||||
// Peer id from min(gpu_id0, gpu_id1) -> max(gpu_id0, gpu_id1)
|
||||
peer_caps->peer_ids[0] = p2p_caps_params->peerIds[0];
|
||||
set_optimal_p2p_write_ces(p2p_caps_params, peer_caps, gpu0, gpu1);
|
||||
|
||||
// Peer id from max(gpu_id0, gpu_id1) -> min(gpu_id0, gpu_id1)
|
||||
peer_caps->peer_ids[1] = p2p_caps_params->peerIds[1];
|
||||
UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
|
||||
UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);
|
||||
|
||||
// Establish peer mappings from each GPU to the other. Indirect peers
|
||||
// do not require identity mappings since they use sysmem aperture to
|
||||
// communicate.
|
||||
status = uvm_mmu_create_peer_identity_mappings(gpu0, gpu1);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
// In the case of NVLINK peers, this initialization will happen during
|
||||
// add_gpu. As soon as the peer info table is assigned below, the access
|
||||
// counter bottom half could start operating on the GPU being newly
|
||||
// added and inspecting the peer caps, so all of the appropriate
|
||||
// initialization must happen before this point.
|
||||
uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
|
||||
|
||||
status = uvm_mmu_create_peer_identity_mappings(gpu1, gpu0);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
uvm_processor_mask_set(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
|
||||
UVM_ASSERT(gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] == NULL);
|
||||
gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = gpu1;
|
||||
|
||||
set_optimal_p2p_write_ces(p2p_caps_params, peer_caps, gpu0, gpu1);
|
||||
uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
|
||||
uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
|
||||
|
||||
UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
|
||||
UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);
|
||||
uvm_processor_mask_set(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
|
||||
UVM_ASSERT(gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] == NULL);
|
||||
gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = gpu0;
|
||||
|
||||
// In the case of NVLINK peers, this initialization will happen during
|
||||
// add_gpu. As soon as the peer info table is assigned below, the access
|
||||
// counter bottom half could start operating on the GPU being newly
|
||||
// added and inspecting the peer caps, so all of the appropriate
|
||||
// initialization must happen before this point.
|
||||
uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
|
||||
|
||||
uvm_processor_mask_set(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
|
||||
UVM_ASSERT(gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] == NULL);
|
||||
gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = gpu1;
|
||||
|
||||
uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
|
||||
uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
|
||||
|
||||
uvm_processor_mask_set(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
|
||||
UVM_ASSERT(gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] == NULL);
|
||||
gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = gpu0;
|
||||
|
||||
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
|
||||
}
|
||||
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
|
||||
|
||||
return init_procfs_peer_files(gpu0, gpu1);
|
||||
}
|
||||
@@ -2499,7 +2463,6 @@ static NV_STATUS enable_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
goto cleanup;
|
||||
|
||||
// Sanity checks
|
||||
UVM_ASSERT(p2p_caps_params.indirectAccess == NV_FALSE);
|
||||
UVM_ASSERT(p2p_caps_params.p2pLink == UVM_LINK_TYPE_PCIE);
|
||||
|
||||
status = init_peer_access(gpu0, gpu1, &p2p_caps_params, peer_caps);
|
||||
@@ -2529,29 +2492,26 @@ static NV_STATUS enable_nvlink_peer_access(uvm_gpu_t *gpu0,
|
||||
UVM_ASSERT(peer_caps->ref_count == 0);
|
||||
peer_caps->ref_count = 1;
|
||||
|
||||
if (!p2p_caps_params->indirectAccess) {
|
||||
// Create P2P object for direct NVLink peers
|
||||
status = create_p2p_object(gpu0, gpu1, &p2p_handle);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("failed to create a P2P object with error: %s, for GPU1:%s and GPU2:%s \n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu0),
|
||||
uvm_gpu_name(gpu1));
|
||||
return status;
|
||||
}
|
||||
|
||||
UVM_ASSERT(p2p_handle != 0);
|
||||
|
||||
// Store the handle in the global table.
|
||||
peer_caps->p2p_handle = p2p_handle;
|
||||
|
||||
// Update p2p caps after p2p object creation as it generates the peer
|
||||
// ids
|
||||
status = get_p2p_caps(gpu0, gpu1, p2p_caps_params);
|
||||
if (status != NV_OK)
|
||||
goto cleanup;
|
||||
// Create P2P object for direct NVLink peers
|
||||
status = create_p2p_object(gpu0, gpu1, &p2p_handle);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("failed to create a P2P object with error: %s, for GPU1:%s and GPU2:%s \n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu0),
|
||||
uvm_gpu_name(gpu1));
|
||||
return status;
|
||||
}
|
||||
|
||||
UVM_ASSERT(p2p_handle != 0);
|
||||
|
||||
// Store the handle in the global table.
|
||||
peer_caps->p2p_handle = p2p_handle;
|
||||
|
||||
// Update p2p caps after p2p object creation as it generates the peer ids.
|
||||
status = get_p2p_caps(gpu0, gpu1, p2p_caps_params);
|
||||
if (status != NV_OK)
|
||||
goto cleanup;
|
||||
|
||||
status = init_peer_access(gpu0, gpu1, p2p_caps_params, peer_caps);
|
||||
if (status != NV_OK)
|
||||
goto cleanup;
|
||||
@@ -2586,11 +2546,6 @@ static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu)
|
||||
if (p2p_caps_params.p2pLink == UVM_LINK_TYPE_NONE || p2p_caps_params.p2pLink == UVM_LINK_TYPE_PCIE)
|
||||
continue;
|
||||
|
||||
// Indirect peers are only supported when onlined as NUMA nodes, because
|
||||
// we want to use vm_insert_page and dma_map_page.
|
||||
if (p2p_caps_params.indirectAccess && (!gpu->mem_info.numa.enabled || !other_gpu->mem_info.numa.enabled))
|
||||
continue;
|
||||
|
||||
status = enable_nvlink_peer_access(gpu, other_gpu, &p2p_caps_params);
|
||||
if (status != NV_OK)
|
||||
goto cleanup;
|
||||
@@ -2679,32 +2634,25 @@ static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
deinit_procfs_peer_cap_files(peer_caps);
|
||||
|
||||
p2p_handle = peer_caps->p2p_handle;
|
||||
UVM_ASSERT(p2p_handle);
|
||||
|
||||
if (peer_caps->is_indirect_peer) {
|
||||
uvm_pmm_gpu_indirect_peer_destroy(&gpu0->pmm, gpu1);
|
||||
uvm_pmm_gpu_indirect_peer_destroy(&gpu1->pmm, gpu0);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(p2p_handle);
|
||||
uvm_mmu_destroy_peer_identity_mappings(gpu0, gpu1);
|
||||
uvm_mmu_destroy_peer_identity_mappings(gpu1, gpu0);
|
||||
|
||||
uvm_mmu_destroy_peer_identity_mappings(gpu0, gpu1);
|
||||
uvm_mmu_destroy_peer_identity_mappings(gpu1, gpu0);
|
||||
uvm_rm_locked_call_void(nvUvmInterfaceP2pObjectDestroy(uvm_global_session_handle(), p2p_handle));
|
||||
|
||||
uvm_rm_locked_call_void(nvUvmInterfaceP2pObjectDestroy(uvm_global_session_handle(), p2p_handle));
|
||||
UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
|
||||
UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);
|
||||
|
||||
UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
|
||||
UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);
|
||||
uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
|
||||
uvm_processor_mask_clear(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
|
||||
gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = NULL;
|
||||
uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
|
||||
|
||||
uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
|
||||
uvm_processor_mask_clear(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
|
||||
gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = NULL;
|
||||
uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
|
||||
|
||||
uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
|
||||
uvm_processor_mask_clear(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
|
||||
gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = NULL;
|
||||
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
|
||||
}
|
||||
uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
|
||||
uvm_processor_mask_clear(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
|
||||
gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = NULL;
|
||||
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
|
||||
|
||||
// Flush the access counter buffer to avoid getting stale notifications for
|
||||
// accesses to GPUs to which peer access is being disabled. This is also
|
||||
@@ -2744,10 +2692,6 @@ static uvm_aperture_t uvm_gpu_peer_caps_aperture(uvm_gpu_peer_t *peer_caps, uvm_
|
||||
{
|
||||
size_t peer_index;
|
||||
|
||||
// Indirect peers are accessed as sysmem addresses
|
||||
if (peer_caps->is_indirect_peer)
|
||||
return UVM_APERTURE_SYS;
|
||||
|
||||
// MIG instances in the same physical GPU have vidmem addresses
|
||||
if (local_gpu->parent == remote_gpu->parent)
|
||||
return UVM_APERTURE_VID;
|
||||
@@ -2798,6 +2742,7 @@ uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_p
|
||||
for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
|
||||
uvm_gpu_t *other_gpu = gpu->peer_info.peer_gpus[uvm_id_gpu_index(id)];
|
||||
UVM_ASSERT(other_gpu);
|
||||
UVM_ASSERT(!uvm_gpus_are_smc_peers(gpu, other_gpu));
|
||||
|
||||
if (uvm_gpus_are_nvswitch_connected(gpu, other_gpu)) {
|
||||
// NVSWITCH connected systems use an extended physical address to
|
||||
@@ -2834,7 +2779,7 @@ static NvU64 instance_ptr_to_key(uvm_gpu_phys_address_t instance_ptr)
|
||||
|
||||
// Instance pointers must be 4k aligned and they must have either VID or SYS
|
||||
// apertures. Compress them as much as we can both to guarantee that the key
|
||||
// fits within 64 bits, and to make the table as shallow as possible.
|
||||
// fits within 64 bits, and to make the key space as small as possible.
|
||||
UVM_ASSERT(IS_ALIGNED(instance_ptr.address, UVM_PAGE_SIZE_4K));
|
||||
UVM_ASSERT(instance_ptr.aperture == UVM_APERTURE_VID || instance_ptr.aperture == UVM_APERTURE_SYS);
|
||||
|
||||
@@ -2851,7 +2796,7 @@ static NV_STATUS parent_gpu_add_user_channel_subctx_info(uvm_parent_gpu_t *paren
|
||||
uvm_rb_tree_node_t *channel_tree_node;
|
||||
uvm_user_channel_subctx_info_t *channel_subctx_info;
|
||||
uvm_user_channel_subctx_info_t *new_channel_subctx_info = NULL;
|
||||
uvm_va_space_t *va_space = user_channel->gpu_va_space->va_space;
|
||||
uvm_gpu_va_space_t *gpu_va_space = user_channel->gpu_va_space;
|
||||
|
||||
if (!user_channel->in_subctx)
|
||||
return NV_OK;
|
||||
@@ -2895,21 +2840,21 @@ static NV_STATUS parent_gpu_add_user_channel_subctx_info(uvm_parent_gpu_t *paren
|
||||
|
||||
user_channel->subctx_info = channel_subctx_info;
|
||||
|
||||
// Register the VA space of the channel subcontext info descriptor, or
|
||||
// Register the GPU VA space of the channel subcontext info descriptor, or
|
||||
// check that the existing one matches the channel's
|
||||
if (channel_subctx_info->subctxs[user_channel->subctx_id].refcount++ > 0) {
|
||||
UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].va_space == va_space,
|
||||
"CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: expected VA space 0x%llx but got 0x%llx instead\n",
|
||||
UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space == gpu_va_space,
|
||||
"CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: expected GPU VA space 0x%llx but got 0x%llx instead\n",
|
||||
user_channel->hw_runlist_id,
|
||||
user_channel->hw_channel_id,
|
||||
instance_ptr.address,
|
||||
uvm_aperture_string(instance_ptr.aperture),
|
||||
user_channel->subctx_id,
|
||||
user_channel->tsg.id,
|
||||
(NvU64)va_space,
|
||||
(NvU64)channel_subctx_info->subctxs[user_channel->subctx_id].va_space);
|
||||
UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].va_space != NULL,
|
||||
"CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: VA space is NULL\n",
|
||||
(NvU64)gpu_va_space,
|
||||
(NvU64)channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space);
|
||||
UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space != NULL,
|
||||
"CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: GPU VA space is NULL\n",
|
||||
user_channel->hw_runlist_id,
|
||||
user_channel->hw_channel_id,
|
||||
instance_ptr.address,
|
||||
@@ -2926,17 +2871,17 @@ static NV_STATUS parent_gpu_add_user_channel_subctx_info(uvm_parent_gpu_t *paren
|
||||
user_channel->tsg.id);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].va_space == NULL,
|
||||
"CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: expected VA space NULL but got 0x%llx instead\n",
|
||||
UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space == NULL,
|
||||
"CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: expected GPU VA space NULL but got 0x%llx instead\n",
|
||||
user_channel->hw_runlist_id,
|
||||
user_channel->hw_channel_id,
|
||||
instance_ptr.address,
|
||||
uvm_aperture_string(instance_ptr.aperture),
|
||||
user_channel->subctx_id,
|
||||
user_channel->tsg.id,
|
||||
(NvU64)channel_subctx_info->subctxs[user_channel->subctx_id].va_space);
|
||||
(NvU64)channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space);
|
||||
|
||||
channel_subctx_info->subctxs[user_channel->subctx_id].va_space = va_space;
|
||||
channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space = gpu_va_space;
|
||||
}
|
||||
|
||||
++channel_subctx_info->total_refcount;
|
||||
@@ -2960,7 +2905,7 @@ static void parent_gpu_remove_user_channel_subctx_info_locked(uvm_parent_gpu_t *
|
||||
uvm_user_channel_t *user_channel)
|
||||
{
|
||||
uvm_gpu_phys_address_t instance_ptr = user_channel->instance_ptr.addr;
|
||||
uvm_va_space_t *va_space = user_channel->gpu_va_space->va_space;
|
||||
uvm_gpu_va_space_t *gpu_va_space = user_channel->gpu_va_space;
|
||||
|
||||
uvm_assert_spinlock_locked(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
@@ -2989,16 +2934,17 @@ static void parent_gpu_remove_user_channel_subctx_info_locked(uvm_parent_gpu_t *
|
||||
user_channel->subctx_id,
|
||||
user_channel->tsg.id);
|
||||
|
||||
UVM_ASSERT_MSG(user_channel->subctx_info->subctxs[user_channel->subctx_id].va_space == va_space,
|
||||
"CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: expected VA space 0x%llx but got 0x%llx instead\n",
|
||||
UVM_ASSERT_MSG(user_channel->subctx_info->subctxs[user_channel->subctx_id].gpu_va_space == gpu_va_space,
|
||||
"CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: "
|
||||
"expected GPU VA space 0x%llx but got 0x%llx instead\n",
|
||||
user_channel->hw_runlist_id,
|
||||
user_channel->hw_channel_id,
|
||||
instance_ptr.address,
|
||||
uvm_aperture_string(instance_ptr.aperture),
|
||||
user_channel->subctx_id,
|
||||
user_channel->tsg.id,
|
||||
(NvU64)va_space,
|
||||
(NvU64)user_channel->subctx_info->subctxs[user_channel->subctx_id].va_space);
|
||||
(NvU64)gpu_va_space,
|
||||
(NvU64)user_channel->subctx_info->subctxs[user_channel->subctx_id].gpu_va_space);
|
||||
|
||||
UVM_ASSERT_MSG(user_channel->subctx_info->total_refcount > 0,
|
||||
"CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: TSG refcount is 0\n",
|
||||
@@ -3011,7 +2957,7 @@ static void parent_gpu_remove_user_channel_subctx_info_locked(uvm_parent_gpu_t *
|
||||
|
||||
// Decrement VA space refcount. If it gets to zero, unregister the pointer
|
||||
if (--user_channel->subctx_info->subctxs[user_channel->subctx_id].refcount == 0)
|
||||
user_channel->subctx_info->subctxs[user_channel->subctx_id].va_space = NULL;
|
||||
user_channel->subctx_info->subctxs[user_channel->subctx_id].gpu_va_space = NULL;
|
||||
|
||||
if (--user_channel->subctx_info->total_refcount == 0) {
|
||||
uvm_rb_tree_remove(&parent_gpu->tsg_table, &user_channel->subctx_info->node);
|
||||
@@ -3094,7 +3040,7 @@ static uvm_user_channel_t *instance_ptr_to_user_channel(uvm_parent_gpu_t *parent
|
||||
return get_user_channel(instance_node);
|
||||
}
|
||||
|
||||
static uvm_va_space_t *user_channel_and_subctx_to_va_space(uvm_user_channel_t *user_channel, NvU32 subctx_id)
|
||||
static uvm_gpu_va_space_t *user_channel_and_subctx_to_gpu_va_space(uvm_user_channel_t *user_channel, NvU32 subctx_id)
|
||||
{
|
||||
uvm_user_channel_subctx_info_t *channel_subctx_info;
|
||||
|
||||
@@ -3122,28 +3068,31 @@ static uvm_va_space_t *user_channel_and_subctx_to_va_space(uvm_user_channel_t *u
|
||||
// uncleanly and work from that subcontext continues running with work from
|
||||
// other subcontexts.
|
||||
if (channel_subctx_info->subctxs[subctx_id].refcount == 0) {
|
||||
UVM_ASSERT(channel_subctx_info->subctxs[subctx_id].va_space == NULL);
|
||||
UVM_ASSERT(channel_subctx_info->subctxs[subctx_id].gpu_va_space == NULL);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT_MSG(channel_subctx_info->subctxs[subctx_id].va_space,
|
||||
"instance_ptr {0x%llx:%s} in TSG %u: no VA space for SubCTX %u\n",
|
||||
UVM_ASSERT_MSG(channel_subctx_info->subctxs[subctx_id].gpu_va_space,
|
||||
"instance_ptr {0x%llx:%s} in TSG %u: no GPU VA space for SubCTX %u\n",
|
||||
user_channel->instance_ptr.addr.address,
|
||||
uvm_aperture_string(user_channel->instance_ptr.addr.aperture),
|
||||
user_channel->tsg.id,
|
||||
subctx_id);
|
||||
}
|
||||
|
||||
return channel_subctx_info->subctxs[subctx_id].va_space;
|
||||
return channel_subctx_info->subctxs[subctx_id].gpu_va_space;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_fault_buffer_entry_t *fault,
|
||||
uvm_va_space_t **out_va_space)
|
||||
const uvm_fault_buffer_entry_t *fault,
|
||||
uvm_va_space_t **out_va_space,
|
||||
uvm_gpu_t **out_gpu)
|
||||
{
|
||||
uvm_user_channel_t *user_channel;
|
||||
uvm_gpu_va_space_t *gpu_va_space;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
*out_va_space = NULL;
|
||||
*out_gpu = NULL;
|
||||
|
||||
uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
@@ -3164,8 +3113,10 @@ NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
|
||||
// We can safely access user_channel->gpu_va_space under the
|
||||
// instance_ptr_table_lock since gpu_va_space is set to NULL after this
|
||||
// function is called in uvm_user_channel_detach
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(user_channel->gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
|
||||
*out_va_space = user_channel->gpu_va_space->va_space;
|
||||
gpu_va_space = user_channel->gpu_va_space;
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
|
||||
*out_va_space = gpu_va_space->va_space;
|
||||
*out_gpu = gpu_va_space->gpu;
|
||||
}
|
||||
else {
|
||||
NvU32 ve_id = fault->fault_source.ve_id;
|
||||
@@ -3175,12 +3126,17 @@ NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
ve_id -= user_channel->smc_engine_ve_id_offset;
|
||||
|
||||
*out_va_space = user_channel_and_subctx_to_va_space(user_channel, ve_id);
|
||||
gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, ve_id);
|
||||
|
||||
// Instance pointer is valid but the fault targets a non-existent
|
||||
// subcontext.
|
||||
if (!*out_va_space)
|
||||
if (gpu_va_space) {
|
||||
*out_va_space = gpu_va_space->va_space;
|
||||
*out_gpu = gpu_va_space->gpu;
|
||||
}
|
||||
else {
|
||||
status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
|
||||
}
|
||||
}
|
||||
|
||||
exit_unlock:
|
||||
@@ -3190,13 +3146,16 @@ exit_unlock:
|
||||
}
|
||||
|
||||
NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_buffer_entry_t *entry,
|
||||
uvm_va_space_t **out_va_space)
|
||||
const uvm_access_counter_buffer_entry_t *entry,
|
||||
uvm_va_space_t **out_va_space,
|
||||
uvm_gpu_t **out_gpu)
|
||||
{
|
||||
uvm_user_channel_t *user_channel;
|
||||
uvm_gpu_va_space_t *gpu_va_space;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
*out_va_space = NULL;
|
||||
*out_gpu = NULL;
|
||||
UVM_ASSERT(entry->address.is_virtual);
|
||||
|
||||
uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);
|
||||
@@ -3212,13 +3171,20 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare
|
||||
"Access counter packet contains SubCTX %u for channel not in subctx\n",
|
||||
entry->virtual_info.ve_id);
|
||||
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(user_channel->gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
|
||||
*out_va_space = user_channel->gpu_va_space->va_space;
|
||||
gpu_va_space = user_channel->gpu_va_space;
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
|
||||
*out_va_space = gpu_va_space->va_space;
|
||||
*out_gpu = gpu_va_space->gpu;
|
||||
}
|
||||
else {
|
||||
*out_va_space = user_channel_and_subctx_to_va_space(user_channel, entry->virtual_info.ve_id);
|
||||
if (!*out_va_space)
|
||||
gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->virtual_info.ve_id);
|
||||
if (gpu_va_space) {
|
||||
*out_va_space = gpu_va_space->va_space;
|
||||
*out_gpu = gpu_va_space->gpu;
|
||||
}
|
||||
else {
|
||||
status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
|
||||
}
|
||||
}
|
||||
|
||||
exit_unlock:
|
||||
@@ -3296,7 +3262,10 @@ void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64
|
||||
atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu,
|
||||
struct page *page,
|
||||
size_t size,
|
||||
NvU64 *dma_address_out)
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -160,6 +160,10 @@ struct uvm_service_block_context_struct
|
||||
// Pages whose permissions need to be revoked from other processors
|
||||
uvm_page_mask_t revocation_mask;
|
||||
|
||||
// Temporary mask used in service_va_block_locked() in
|
||||
// uvm_gpu_access_counters.c.
|
||||
uvm_processor_mask_t update_processors;
|
||||
|
||||
struct
|
||||
{
|
||||
// Per-processor mask with the pages that will be resident after
|
||||
@@ -275,6 +279,10 @@ struct uvm_fault_service_batch_context_struct
|
||||
// pick one to be the target of the cancel sequence.
|
||||
uvm_va_space_t *fatal_va_space;
|
||||
|
||||
// TODO: Bug 3900733: refactor service_fault_batch_for_cancel() to handle
|
||||
// iterating over multiple GPU VA spaces and remove fatal_gpu.
|
||||
uvm_gpu_t *fatal_gpu;
|
||||
|
||||
bool has_throttled_faults;
|
||||
|
||||
NvU32 num_invalid_prefetch_faults;
|
||||
@@ -589,20 +597,26 @@ typedef enum
|
||||
UVM_GPU_LINK_NVLINK_2,
|
||||
UVM_GPU_LINK_NVLINK_3,
|
||||
UVM_GPU_LINK_NVLINK_4,
|
||||
UVM_GPU_LINK_NVLINK_5,
|
||||
UVM_GPU_LINK_C2C,
|
||||
UVM_GPU_LINK_MAX
|
||||
} uvm_gpu_link_type_t;
|
||||
|
||||
// UVM does not support P2P copies on pre-Pascal GPUs. Pascal+ GPUs only
|
||||
// support virtual addresses in P2P copies. Therefore, a peer identity mapping
|
||||
// needs to be created.
|
||||
// Ampere+ GPUs support physical peer copies, too, so identity mappings are not
|
||||
// needed
|
||||
typedef enum
|
||||
{
|
||||
// Peer copies can be disallowed for a variety of reasons. For example,
|
||||
// P2P transfers are disabled in pre-Pascal GPUs because there is no
|
||||
// compelling use case for direct peer migrations.
|
||||
UVM_GPU_PEER_COPY_MODE_UNSUPPORTED,
|
||||
|
||||
// Pascal+ GPUs support virtual addresses in P2P copies. Virtual peer copies
|
||||
// require the creation of peer identity mappings.
|
||||
UVM_GPU_PEER_COPY_MODE_VIRTUAL,
|
||||
|
||||
// Ampere+ GPUs support virtual and physical peer copies. Physical peer
|
||||
// copies do not depend on peer identity mappings.
|
||||
UVM_GPU_PEER_COPY_MODE_PHYSICAL,
|
||||
|
||||
UVM_GPU_PEER_COPY_MODE_COUNT
|
||||
} uvm_gpu_peer_copy_mode_t;
|
||||
|
||||
@@ -1256,11 +1270,6 @@ struct uvm_gpu_peer_struct
|
||||
// peer_id[1] from max(gpu_id_1, gpu_id_2) -> min(gpu_id_1, gpu_id_2)
|
||||
NvU8 peer_ids[2];
|
||||
|
||||
// Indirect peers are GPUs which can coherently access each others' memory
|
||||
// over NVLINK, but are routed through the CPU using the SYS aperture rather
|
||||
// than a PEER aperture
|
||||
NvU8 is_indirect_peer : 1;
|
||||
|
||||
// The link type between the peer GPUs, currently either PCIe or NVLINK.
|
||||
// This field is used to determine the when this peer struct has been
|
||||
// initialized (link_type != UVM_GPU_LINK_INVALID). NVLink peers are
|
||||
@@ -1269,8 +1278,8 @@ struct uvm_gpu_peer_struct
|
||||
uvm_gpu_link_type_t link_type;
|
||||
|
||||
// Maximum unidirectional bandwidth between the peers in megabytes per
|
||||
// second, not taking into account the protocols' overhead. The reported
|
||||
// bandwidth for indirect peers is zero. See UvmGpuP2PCapsParams.
|
||||
// second, not taking into account the protocols' overhead.
|
||||
// See UvmGpuP2PCapsParams.
|
||||
NvU32 total_link_line_rate_mbyte_per_s;
|
||||
|
||||
// For PCIe, the number of times that this has been retained by a VA space.
|
||||
@@ -1414,19 +1423,9 @@ static bool uvm_gpus_are_nvswitch_connected(const uvm_gpu_t *gpu0, const uvm_gpu
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool uvm_gpus_are_indirect_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
static bool uvm_gpus_are_smc_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
{
|
||||
uvm_gpu_peer_t *peer_caps = uvm_gpu_peer_caps(gpu0, gpu1);
|
||||
|
||||
if (peer_caps->link_type != UVM_GPU_LINK_INVALID && peer_caps->is_indirect_peer) {
|
||||
UVM_ASSERT(gpu0->mem_info.numa.enabled);
|
||||
UVM_ASSERT(gpu1->mem_info.numa.enabled);
|
||||
UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_PCIE);
|
||||
UVM_ASSERT(!uvm_gpus_are_nvswitch_connected(gpu0, gpu1));
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
return gpu0->parent == gpu1->parent;
|
||||
}
|
||||
|
||||
// Retrieve the virtual address corresponding to the given vidmem physical
|
||||
@@ -1611,16 +1610,25 @@ void uvm_parent_gpu_remove_user_channel(uvm_parent_gpu_t *parent_gpu, uvm_user_c
|
||||
// NV_ERR_PAGE_TABLE_NOT_AVAIL Entry's instance pointer is valid but the entry
|
||||
// targets an invalid subcontext
|
||||
//
|
||||
// out_va_space is valid if NV_OK is returned, otherwise it's NULL. The caller
|
||||
// is responsibile for ensuring that the returned va_space can't be destroyed,
|
||||
// so these functions should only be called from the bottom half.
|
||||
// out_va_space is valid if NV_OK is returned, otherwise it's NULL.
|
||||
// out_gpu is valid if NV_OK is returned, otherwise it's NULL.
|
||||
// The caller is responsible for ensuring that the returned va_space and gpu
|
||||
// can't be destroyed, so this function should only be called from the bottom
|
||||
// half.
|
||||
NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_fault_buffer_entry_t *fault,
|
||||
uvm_va_space_t **out_va_space);
|
||||
const uvm_fault_buffer_entry_t *fault,
|
||||
uvm_va_space_t **out_va_space,
|
||||
uvm_gpu_t **out_gpu);
|
||||
|
||||
// Return the GPU VA space for the given instance pointer and ve_id in the
|
||||
// access counter entry. This function can only be used for virtual address
|
||||
// entries.
|
||||
// The return values are the same as uvm_parent_gpu_fault_entry_to_va_space()
|
||||
// but for virtual access counter entries.
|
||||
NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_buffer_entry_t *entry,
|
||||
uvm_va_space_t **out_va_space);
|
||||
const uvm_access_counter_buffer_entry_t *entry,
|
||||
uvm_va_space_t **out_va_space,
|
||||
uvm_gpu_t **out_gpu);
|
||||
|
||||
typedef enum
|
||||
{
|
||||
|
||||
@@ -734,9 +734,18 @@ static int cmp_sort_virt_notifications_by_instance_ptr(const void *_a, const voi
|
||||
return cmp_access_counter_instance_ptr(a, b);
|
||||
}
|
||||
|
||||
// Compare two GPUs
|
||||
static inline int cmp_gpu(const uvm_gpu_t *a, const uvm_gpu_t *b)
|
||||
{
|
||||
NvU32 id_a = a ? uvm_id_value(a->id) : 0;
|
||||
NvU32 id_b = b ? uvm_id_value(b->id) : 0;
|
||||
|
||||
return UVM_CMP_DEFAULT(id_a, id_b);
|
||||
}
|
||||
|
||||
// Sort comparator for pointers to GVA access counter notification buffer
|
||||
// entries that sorts by va_space, and fault address.
|
||||
static int cmp_sort_virt_notifications_by_va_space_address(const void *_a, const void *_b)
|
||||
// entries that sorts by va_space, GPU ID, and fault address.
|
||||
static int cmp_sort_virt_notifications_by_va_space_gpu_address(const void *_a, const void *_b)
|
||||
{
|
||||
const uvm_access_counter_buffer_entry_t **a = (const uvm_access_counter_buffer_entry_t **)_a;
|
||||
const uvm_access_counter_buffer_entry_t **b = (const uvm_access_counter_buffer_entry_t **)_b;
|
||||
@@ -747,6 +756,10 @@ static int cmp_sort_virt_notifications_by_va_space_address(const void *_a, const
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
result = cmp_gpu((*a)->gpu, (*b)->gpu);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
return UVM_CMP_DEFAULT((*a)->address.address, (*b)->address.address);
|
||||
}
|
||||
|
||||
@@ -774,7 +787,7 @@ typedef enum
|
||||
NOTIFICATION_FETCH_MODE_ALL,
|
||||
} notification_fetch_mode_t;
|
||||
|
||||
static NvU32 fetch_access_counter_buffer_entries(uvm_gpu_t *gpu,
|
||||
static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
notification_fetch_mode_t fetch_mode)
|
||||
{
|
||||
@@ -783,12 +796,12 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_gpu_t *gpu,
|
||||
NvU32 notification_index;
|
||||
uvm_access_counter_buffer_entry_t *notification_cache;
|
||||
uvm_spin_loop_t spin;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
NvU32 last_instance_ptr_idx = 0;
|
||||
uvm_aperture_t last_aperture = UVM_APERTURE_PEER_MAX;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
|
||||
notification_cache = batch_context->notification_cache;
|
||||
|
||||
@@ -819,7 +832,7 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_gpu_t *gpu,
|
||||
|
||||
// We cannot just wait for the last entry (the one pointed by put) to become valid, we have to do it
|
||||
// individually since entries can be written out of order
|
||||
UVM_SPIN_WHILE(!gpu->parent->access_counter_buffer_hal->entry_is_valid(gpu->parent, get), &spin) {
|
||||
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
|
||||
// We have some entry to work on. Let's do the rest later.
|
||||
if (fetch_mode != NOTIFICATION_FETCH_MODE_ALL && notification_index > 0)
|
||||
goto done;
|
||||
@@ -829,7 +842,7 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_gpu_t *gpu,
|
||||
smp_mb__after_atomic();
|
||||
|
||||
// Got valid bit set. Let's cache.
|
||||
gpu->parent->access_counter_buffer_hal->parse_entry(gpu->parent, get, current_entry);
|
||||
parent_gpu->access_counter_buffer_hal->parse_entry(parent_gpu, get, current_entry);
|
||||
|
||||
if (current_entry->address.is_virtual) {
|
||||
batch_context->virt.notifications[batch_context->virt.num_notifications++] = current_entry;
|
||||
@@ -845,26 +858,38 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_gpu_t *gpu,
|
||||
}
|
||||
}
|
||||
else {
|
||||
const NvU64 translation_size = get_config_for_type(access_counters, current_entry->counter_type)->translation_size;
|
||||
NvU64 translation_size;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
translation_size = get_config_for_type(access_counters,
|
||||
current_entry->counter_type)->translation_size;
|
||||
current_entry->address.address = UVM_ALIGN_DOWN(current_entry->address.address, translation_size);
|
||||
|
||||
batch_context->phys.notifications[batch_context->phys.num_notifications++] = current_entry;
|
||||
|
||||
current_entry->physical_info.resident_id =
|
||||
uvm_gpu_get_processor_id_by_address(gpu, uvm_gpu_phys_address(current_entry->address.aperture,
|
||||
current_entry->address.address));
|
||||
|
||||
if (batch_context->phys.is_single_aperture) {
|
||||
if (batch_context->phys.num_notifications == 1)
|
||||
last_aperture = current_entry->address.aperture;
|
||||
else if (current_entry->address.aperture != last_aperture)
|
||||
batch_context->phys.is_single_aperture = false;
|
||||
gpu = uvm_parent_gpu_find_first_valid_gpu(parent_gpu);
|
||||
if (!gpu) {
|
||||
current_entry->physical_info.resident_id = UVM_ID_INVALID;
|
||||
current_entry->gpu = NULL;
|
||||
}
|
||||
else {
|
||||
current_entry->gpu = gpu;
|
||||
current_entry->physical_info.resident_id =
|
||||
uvm_gpu_get_processor_id_by_address(gpu, uvm_gpu_phys_address(current_entry->address.aperture,
|
||||
current_entry->address.address));
|
||||
|
||||
if (current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC)
|
||||
UVM_ASSERT(uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
|
||||
else
|
||||
UVM_ASSERT(!uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
|
||||
if (batch_context->phys.is_single_aperture) {
|
||||
if (batch_context->phys.num_notifications == 1)
|
||||
last_aperture = current_entry->address.aperture;
|
||||
else if (current_entry->address.aperture != last_aperture)
|
||||
batch_context->phys.is_single_aperture = false;
|
||||
}
|
||||
|
||||
if (current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC)
|
||||
UVM_ASSERT(uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
|
||||
else
|
||||
UVM_ASSERT(!uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
|
||||
}
|
||||
}
|
||||
|
||||
++notification_index;
|
||||
@@ -874,7 +899,7 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
done:
|
||||
write_get(gpu->parent, get);
|
||||
write_get(parent_gpu, get);
|
||||
|
||||
return notification_index;
|
||||
}
|
||||
@@ -895,12 +920,16 @@ static void translate_virt_notifications_instance_ptrs(uvm_parent_gpu_t *parent_
|
||||
// simply be ignored in subsequent processing.
|
||||
status = uvm_parent_gpu_access_counter_entry_to_va_space(parent_gpu,
|
||||
current_entry,
|
||||
¤t_entry->virtual_info.va_space);
|
||||
if (status != NV_OK)
|
||||
¤t_entry->virtual_info.va_space,
|
||||
¤t_entry->gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ASSERT(current_entry->virtual_info.va_space == NULL);
|
||||
UVM_ASSERT(current_entry->gpu == NULL);
|
||||
}
|
||||
}
|
||||
else {
|
||||
current_entry->virtual_info.va_space = batch_context->virt.notifications[i - 1]->virtual_info.va_space;
|
||||
current_entry->gpu = batch_context->virt.notifications[i - 1]->gpu;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -924,7 +953,7 @@ static void preprocess_virt_notifications(uvm_parent_gpu_t *parent_gpu,
|
||||
sort(batch_context->virt.notifications,
|
||||
batch_context->virt.num_notifications,
|
||||
sizeof(*batch_context->virt.notifications),
|
||||
cmp_sort_virt_notifications_by_va_space_address,
|
||||
cmp_sort_virt_notifications_by_va_space_gpu_address,
|
||||
NULL);
|
||||
}
|
||||
|
||||
@@ -942,13 +971,17 @@ static void preprocess_phys_notifications(uvm_access_counter_service_batch_conte
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS notify_tools_and_process_flags(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_buffer_entry_t **notification_start,
|
||||
NvU32 num_entries,
|
||||
NvU32 flags)
|
||||
static NV_STATUS notify_tools_broadcast_and_process_flags(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_buffer_entry_t **notification_start,
|
||||
NvU32 num_entries,
|
||||
NvU32 flags)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_parent_gpu_find_first_valid_gpu(parent_gpu);
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (!gpu)
|
||||
return NV_OK;
|
||||
|
||||
if (uvm_enable_builtin_tests) {
|
||||
// TODO: Bug 4310744: [UVM][TOOLS] Attribute access counter tools events
|
||||
// to va_space instead of broadcasting.
|
||||
@@ -964,6 +997,31 @@ static NV_STATUS notify_tools_and_process_flags(uvm_gpu_t *gpu,
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_access_counter_buffer_entry_t **notification_start,
|
||||
NvU32 num_entries,
|
||||
NvU32 flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (uvm_enable_builtin_tests) {
|
||||
NvU32 i;
|
||||
|
||||
for (i = 0; i < num_entries; i++) {
|
||||
uvm_tools_record_access_counter(va_space,
|
||||
gpu->id,
|
||||
notification_start[i],
|
||||
flags & UVM_ACCESS_COUNTER_PHYS_ON_MANAGED);
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR)
|
||||
status = access_counter_clear_notifications(gpu, notification_start, num_entries);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
@@ -1087,12 +1145,12 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
// pages to be serviced
|
||||
if (page_count > 0) {
|
||||
uvm_processor_id_t id;
|
||||
uvm_processor_mask_t update_processors;
|
||||
uvm_processor_mask_t *update_processors = &service_context->update_processors;
|
||||
|
||||
uvm_processor_mask_and(&update_processors, &va_block->resident, &service_context->resident_processors);
|
||||
uvm_processor_mask_and(update_processors, &va_block->resident, &service_context->resident_processors);
|
||||
|
||||
// Remove pages that are already resident in the destination processors
|
||||
for_each_id_in_mask(id, &update_processors) {
|
||||
for_each_id_in_mask(id, update_processors) {
|
||||
bool migrate_pages;
|
||||
uvm_page_mask_t *residency_mask = uvm_va_block_resident_mask_get(va_block, id, NUMA_NO_NODE);
|
||||
UVM_ASSERT(residency_mask);
|
||||
@@ -1169,13 +1227,13 @@ static void reverse_mappings_to_va_block_page_mask(uvm_va_block_t *va_block,
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
static NV_STATUS service_phys_single_va_block(uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
NvU32 *out_flags)
|
||||
{
|
||||
uvm_gpu_t *gpu = current_entry->gpu;
|
||||
size_t index;
|
||||
uvm_va_block_t *va_block = reverse_mappings[0].va_block;
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
@@ -1262,8 +1320,7 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
static NV_STATUS service_phys_va_blocks(uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
@@ -1276,8 +1333,7 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
|
||||
|
||||
for (index = 0; index < num_reverse_mappings; ++index) {
|
||||
NvU32 out_flags_local = 0;
|
||||
status = service_phys_single_va_block(gpu,
|
||||
batch_context,
|
||||
status = service_phys_single_va_block(batch_context,
|
||||
current_entry,
|
||||
reverse_mappings + index,
|
||||
1,
|
||||
@@ -1326,8 +1382,7 @@ static bool are_reverse_mappings_on_single_block(const uvm_reverse_map_t *revers
|
||||
// Service the given translation range. It will return the count of the reverse
|
||||
// mappings found during servicing in num_reverse_mappings, even if the function
|
||||
// doesn't return NV_OK.
|
||||
static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
uvm_gpu_t *resident_gpu,
|
||||
static NV_STATUS service_phys_notification_translation(uvm_gpu_t *resident_gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_gpu_access_counter_type_config_t *config,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
@@ -1336,6 +1391,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
size_t *num_reverse_mappings,
|
||||
NvU32 *out_flags)
|
||||
{
|
||||
uvm_gpu_t *gpu = current_entry->gpu;
|
||||
NV_STATUS status;
|
||||
NvU32 region_start, region_end;
|
||||
|
||||
@@ -1373,16 +1429,14 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
|
||||
// Service all the translations
|
||||
if (are_reverse_mappings_on_single_block(batch_context->phys.translations, *num_reverse_mappings)) {
|
||||
status = service_phys_single_va_block(gpu,
|
||||
batch_context,
|
||||
status = service_phys_single_va_block(batch_context,
|
||||
current_entry,
|
||||
batch_context->phys.translations,
|
||||
*num_reverse_mappings,
|
||||
out_flags);
|
||||
}
|
||||
else {
|
||||
status = service_phys_va_blocks(gpu,
|
||||
batch_context,
|
||||
status = service_phys_va_blocks(batch_context,
|
||||
current_entry,
|
||||
batch_context->phys.translations,
|
||||
*num_reverse_mappings,
|
||||
@@ -1392,14 +1446,14 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
NvU32 *out_flags)
|
||||
static NV_STATUS service_phys_notification(uvm_access_counter_service_batch_context_t *batch_context,
|
||||
uvm_access_counter_buffer_entry_t *current_entry)
|
||||
{
|
||||
NvU64 address;
|
||||
NvU64 translation_index;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
uvm_gpu_t *gpu = current_entry->gpu;
|
||||
uvm_parent_gpu_t *parent_gpu = gpu->parent;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
uvm_access_counter_type_t counter_type = current_entry->counter_type;
|
||||
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters, counter_type);
|
||||
unsigned long sub_granularity;
|
||||
@@ -1429,14 +1483,13 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
// fall outside of the allocatable address range. We just drop
|
||||
// them.
|
||||
if (address >= resident_gpu->mem_info.max_allocatable_address)
|
||||
return NV_OK;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (translation_index = 0; translation_index < config->translations_per_counter; ++translation_index) {
|
||||
size_t num_reverse_mappings;
|
||||
NvU32 out_flags_local = 0;
|
||||
status = service_phys_notification_translation(gpu,
|
||||
resident_gpu,
|
||||
status = service_phys_notification_translation(resident_gpu,
|
||||
batch_context,
|
||||
config,
|
||||
current_entry,
|
||||
@@ -1457,37 +1510,32 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
if (uvm_enable_builtin_tests)
|
||||
*out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_PHYS_ON_MANAGED : 0);
|
||||
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_PHYS_ON_MANAGED : 0);
|
||||
|
||||
out:
|
||||
notify_tools_broadcast_and_process_flags(parent_gpu, ¤t_entry, 1, flags);
|
||||
return status;
|
||||
}
|
||||
|
||||
// TODO: Bug 2018899: Add statistics for dropped access counter notifications
|
||||
static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
|
||||
static NV_STATUS service_phys_notifications(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
NvU32 i;
|
||||
uvm_access_counter_buffer_entry_t **notifications = batch_context->phys.notifications;
|
||||
|
||||
UVM_ASSERT(gpu->parent->access_counters_can_use_physical_addresses);
|
||||
UVM_ASSERT(parent_gpu->access_counters_can_use_physical_addresses);
|
||||
|
||||
preprocess_phys_notifications(batch_context);
|
||||
|
||||
for (i = 0; i < batch_context->phys.num_notifications; ++i) {
|
||||
NV_STATUS status;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
NvU32 flags = 0;
|
||||
|
||||
if (!UVM_ID_IS_VALID(current_entry->physical_info.resident_id))
|
||||
continue;
|
||||
|
||||
status = service_phys_notification(gpu, batch_context, current_entry, &flags);
|
||||
|
||||
notify_tools_and_process_flags(gpu, ¬ifications[i], 1, flags);
|
||||
|
||||
status = service_phys_notification(batch_context, current_entry);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
@@ -1624,16 +1672,14 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
NvU64 address = current_entry->address.address;
|
||||
|
||||
if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end)) {
|
||||
expand_notification_block(gpu_va_space,
|
||||
va_block,
|
||||
batch_context->block_service_context.block_context,
|
||||
accessed_pages,
|
||||
current_entry);
|
||||
}
|
||||
else {
|
||||
if (current_entry->virtual_info.va_space != va_space || current_entry->gpu != gpu || address > va_block->end)
|
||||
break;
|
||||
}
|
||||
|
||||
expand_notification_block(gpu_va_space,
|
||||
va_block,
|
||||
batch_context->block_service_context.block_context,
|
||||
accessed_pages,
|
||||
current_entry);
|
||||
}
|
||||
|
||||
*out_index = i;
|
||||
@@ -1648,7 +1694,7 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
|
||||
if (status == NV_OK)
|
||||
flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
flags_status = notify_tools_and_process_flags(gpu, ¬ifications[index], *out_index - index, flags);
|
||||
flags_status = notify_tools_and_process_flags(va_space, gpu, ¬ifications[index], *out_index - index, flags);
|
||||
|
||||
if ((status == NV_OK) && (flags_status != NV_OK))
|
||||
status = flags_status;
|
||||
@@ -1687,7 +1733,7 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
if (!vma) {
|
||||
// Clear the notification entry to continue receiving access counter
|
||||
// notifications when a new VMA is allocated in this range.
|
||||
status = notify_tools_and_process_flags(gpu, ¬ifications[index], 1, flags);
|
||||
status = notify_tools_and_process_flags(va_space, gpu, ¬ifications[index], 1, flags);
|
||||
*out_index = index + 1;
|
||||
return status;
|
||||
}
|
||||
@@ -1701,10 +1747,10 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
address = current_entry->address.address;
|
||||
|
||||
if ((current_entry->virtual_info.va_space == va_space) && (address < end))
|
||||
uvm_page_mask_set(&ats_context->accessed_mask, (address - base) / PAGE_SIZE);
|
||||
else
|
||||
if (current_entry->virtual_info.va_space != va_space || current_entry->gpu != gpu || address >= end)
|
||||
break;
|
||||
|
||||
uvm_page_mask_set(&ats_context->accessed_mask, (address - base) / PAGE_SIZE);
|
||||
}
|
||||
|
||||
*out_index = i;
|
||||
@@ -1719,7 +1765,7 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
if (status != NV_OK)
|
||||
flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
flags_status = notify_tools_and_process_flags(gpu, ¬ifications[index], *out_index - index, flags);
|
||||
flags_status = notify_tools_and_process_flags(va_space, gpu, ¬ifications[index], *out_index - index, flags);
|
||||
if ((status == NV_OK) && (flags_status != NV_OK))
|
||||
status = flags_status;
|
||||
|
||||
@@ -1771,7 +1817,7 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
|
||||
status = service_virt_notifications_in_block(gpu_va_space, mm, va_block, batch_context, index, out_index);
|
||||
}
|
||||
else {
|
||||
status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
|
||||
status = notify_tools_and_process_flags(va_space, gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
|
||||
*out_index = index + 1;
|
||||
}
|
||||
}
|
||||
@@ -1801,7 +1847,11 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
|
||||
|
||||
// Clobber status to continue processing the rest of the notifications
|
||||
// in the batch.
|
||||
status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
|
||||
status = notify_tools_and_process_flags(va_space,
|
||||
gpu_va_space->gpu,
|
||||
batch_context->virt.notifications,
|
||||
1,
|
||||
flags);
|
||||
|
||||
*out_index = index + 1;
|
||||
}
|
||||
@@ -1809,7 +1859,7 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
|
||||
static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
NvU32 i = 0;
|
||||
@@ -1817,18 +1867,19 @@ static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
|
||||
struct mm_struct *mm = NULL;
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
uvm_va_space_t *prev_va_space = NULL;
|
||||
uvm_gpu_t *prev_gpu = NULL;
|
||||
uvm_gpu_va_space_t *gpu_va_space = NULL;
|
||||
|
||||
// TODO: Bug 4299018 : Add support for virtual access counter migrations on
|
||||
// 4K page sizes.
|
||||
if (PAGE_SIZE == UVM_PAGE_SIZE_4K) {
|
||||
return notify_tools_and_process_flags(gpu,
|
||||
batch_context->virt.notifications,
|
||||
batch_context->virt.num_notifications,
|
||||
0);
|
||||
return notify_tools_broadcast_and_process_flags(parent_gpu,
|
||||
batch_context->virt.notifications,
|
||||
batch_context->virt.num_notifications,
|
||||
0);
|
||||
}
|
||||
|
||||
preprocess_virt_notifications(gpu->parent, batch_context);
|
||||
preprocess_virt_notifications(parent_gpu, batch_context);
|
||||
|
||||
while (i < batch_context->virt.num_notifications) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
|
||||
@@ -1842,25 +1893,38 @@ static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
|
||||
uvm_va_space_mm_release_unlock(prev_va_space, mm);
|
||||
|
||||
mm = NULL;
|
||||
gpu_va_space = NULL;
|
||||
prev_gpu = NULL;
|
||||
}
|
||||
|
||||
// Acquire locks for the new va_space.
|
||||
if (va_space) {
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
|
||||
}
|
||||
|
||||
prev_va_space = va_space;
|
||||
}
|
||||
|
||||
if (va_space && gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
|
||||
status = service_virt_notifications_batch(gpu_va_space, mm, batch_context, i, &i);
|
||||
if (va_space) {
|
||||
if (prev_gpu != current_entry->gpu) {
|
||||
prev_gpu = current_entry->gpu;
|
||||
gpu_va_space = uvm_gpu_va_space_get(va_space, current_entry->gpu);
|
||||
}
|
||||
|
||||
if (gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
|
||||
status = service_virt_notifications_batch(gpu_va_space, mm, batch_context, i, &i);
|
||||
}
|
||||
else {
|
||||
status = notify_tools_and_process_flags(va_space,
|
||||
current_entry->gpu,
|
||||
&batch_context->virt.notifications[i],
|
||||
1,
|
||||
0);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
status = notify_tools_and_process_flags(gpu, &batch_context->virt.notifications[i], 1, 0);
|
||||
status = notify_tools_broadcast_and_process_flags(parent_gpu, &batch_context->virt.notifications[i], 1, 0);
|
||||
i++;
|
||||
}
|
||||
|
||||
@@ -1876,19 +1940,18 @@ static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
void uvm_gpu_service_access_counters(uvm_gpu_t *gpu)
|
||||
void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_access_counter_service_batch_context_t *batch_context = &gpu->parent->access_counter_buffer_info.batch_service_context;
|
||||
uvm_access_counter_service_batch_context_t *batch_context = &parent_gpu->access_counter_buffer_info.batch_service_context;
|
||||
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
|
||||
if (gpu->parent->access_counter_buffer_info.notifications_ignored_count > 0)
|
||||
if (parent_gpu->access_counter_buffer_info.notifications_ignored_count > 0)
|
||||
return;
|
||||
|
||||
while (1) {
|
||||
batch_context->num_cached_notifications = fetch_access_counter_buffer_entries(gpu,
|
||||
batch_context->num_cached_notifications = fetch_access_counter_buffer_entries(parent_gpu,
|
||||
batch_context,
|
||||
NOTIFICATION_FETCH_MODE_BATCH_READY);
|
||||
if (batch_context->num_cached_notifications == 0)
|
||||
@@ -1897,13 +1960,13 @@ void uvm_gpu_service_access_counters(uvm_gpu_t *gpu)
|
||||
++batch_context->batch_id;
|
||||
|
||||
if (batch_context->virt.num_notifications) {
|
||||
status = service_virt_notifications(gpu, batch_context);
|
||||
status = service_virt_notifications(parent_gpu, batch_context);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
if (batch_context->phys.num_notifications) {
|
||||
status = service_phys_notifications(gpu, batch_context);
|
||||
status = service_phys_notifications(parent_gpu, batch_context);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
@@ -1912,7 +1975,7 @@ void uvm_gpu_service_access_counters(uvm_gpu_t *gpu)
|
||||
if (status != NV_OK) {
|
||||
UVM_DBG_PRINT("Error %s servicing access counter notifications on GPU: %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu));
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
void uvm_gpu_service_access_counters(uvm_gpu_t *gpu);
|
||||
void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
|
||||
@@ -479,17 +479,14 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_kvfree(parent_gpu->isr.access_counters.stats.cpu_exec_count);
|
||||
}
|
||||
|
||||
static uvm_gpu_t *find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
// When SMC is enabled, there's no longer a 1:1 relationship between the
|
||||
// parent and the partitions. But because all relevant interrupt paths
|
||||
// are shared, as is the fault reporting logic, it's sufficient here
|
||||
// to proceed with any valid uvm_gpu_t, even if the corresponding partition
|
||||
// didn't cause all, or even any of the interrupts.
|
||||
// The bottom half handlers will later find the appropriate partitions by
|
||||
// attributing the notifications to VA spaces as necessary.
|
||||
// parent and the partitions. It's sufficient to return any valid uvm_gpu_t
|
||||
// since the purpose is to have a channel and push buffer for operations
|
||||
// that affect the whole parent GPU.
|
||||
if (parent_gpu->smc.enabled) {
|
||||
NvU32 sub_processor_index;
|
||||
|
||||
@@ -518,13 +515,8 @@ static uvm_gpu_t *find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
static void replayable_faults_isr_bottom_half(void *args)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
|
||||
uvm_gpu_t *gpu;
|
||||
unsigned int cpu;
|
||||
|
||||
gpu = find_first_valid_gpu(parent_gpu);
|
||||
if (gpu == NULL)
|
||||
goto put_kref;
|
||||
|
||||
UVM_ASSERT(parent_gpu->replayable_faults_supported);
|
||||
|
||||
// Record the lock ownership
|
||||
@@ -545,11 +537,10 @@ static void replayable_faults_isr_bottom_half(void *args)
|
||||
++parent_gpu->isr.replayable_faults.stats.cpu_exec_count[cpu];
|
||||
put_cpu();
|
||||
|
||||
uvm_gpu_service_replayable_faults(gpu);
|
||||
uvm_parent_gpu_service_replayable_faults(parent_gpu);
|
||||
|
||||
uvm_parent_gpu_replayable_faults_isr_unlock(parent_gpu);
|
||||
|
||||
put_kref:
|
||||
// It is OK to drop a reference on the parent GPU if a bottom half has
|
||||
// been retriggered within uvm_parent_gpu_replayable_faults_isr_unlock,
|
||||
// because the rescheduling added an additional reference.
|
||||
@@ -564,13 +555,8 @@ static void replayable_faults_isr_bottom_half_entry(void *args)
|
||||
static void non_replayable_faults_isr_bottom_half(void *args)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
|
||||
uvm_gpu_t *gpu;
|
||||
unsigned int cpu;
|
||||
|
||||
gpu = find_first_valid_gpu(parent_gpu);
|
||||
if (gpu == NULL)
|
||||
goto put_kref;
|
||||
|
||||
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
|
||||
|
||||
uvm_parent_gpu_non_replayable_faults_isr_lock(parent_gpu);
|
||||
@@ -584,11 +570,10 @@ static void non_replayable_faults_isr_bottom_half(void *args)
|
||||
++parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count[cpu];
|
||||
put_cpu();
|
||||
|
||||
uvm_gpu_service_non_replayable_fault_buffer(gpu);
|
||||
uvm_parent_gpu_service_non_replayable_fault_buffer(parent_gpu);
|
||||
|
||||
uvm_parent_gpu_non_replayable_faults_isr_unlock(parent_gpu);
|
||||
|
||||
put_kref:
|
||||
uvm_parent_gpu_kref_put(parent_gpu);
|
||||
}
|
||||
|
||||
@@ -600,13 +585,8 @@ static void non_replayable_faults_isr_bottom_half_entry(void *args)
|
||||
static void access_counters_isr_bottom_half(void *args)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
|
||||
uvm_gpu_t *gpu;
|
||||
unsigned int cpu;
|
||||
|
||||
gpu = find_first_valid_gpu(parent_gpu);
|
||||
if (gpu == NULL)
|
||||
goto put_kref;
|
||||
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
|
||||
uvm_record_lock(&parent_gpu->isr.access_counters.service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
|
||||
@@ -620,11 +600,10 @@ static void access_counters_isr_bottom_half(void *args)
|
||||
++parent_gpu->isr.access_counters.stats.cpu_exec_count[cpu];
|
||||
put_cpu();
|
||||
|
||||
uvm_gpu_service_access_counters(gpu);
|
||||
uvm_parent_gpu_service_access_counters(parent_gpu);
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
|
||||
put_kref:
|
||||
uvm_parent_gpu_kref_put(parent_gpu);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -193,4 +193,10 @@ void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
|
||||
// parent_gpu->isr.interrupts_lock must be held to call this function.
|
||||
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Return the first valid GPU given the parent GPU or NULL if no MIG instances
|
||||
// are registered. This should only be called from bottom halves or if the
|
||||
// g_uvm_global.global_lock is held so that the returned pointer remains valid.
|
||||
//
|
||||
uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
#endif // __UVM_GPU_ISR_H__
|
||||
|
||||
@@ -212,6 +212,7 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
|
||||
|
||||
// Make sure that all fields in the entry are properly initialized
|
||||
fault_entry->va_space = NULL;
|
||||
fault_entry->gpu = NULL;
|
||||
fault_entry->is_fatal = (fault_entry->fault_type >= UVM_FAULT_TYPE_FATAL);
|
||||
fault_entry->filtered = false;
|
||||
|
||||
@@ -235,7 +236,7 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static bool use_clear_faulted_channel_sw_method(uvm_gpu_t *gpu)
|
||||
static bool use_clear_faulted_channel_sw_method(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
// If true, UVM uses a SW method to request RM to do the clearing on its
|
||||
// behalf.
|
||||
@@ -243,7 +244,7 @@ static bool use_clear_faulted_channel_sw_method(uvm_gpu_t *gpu)
|
||||
|
||||
// In SRIOV, the UVM (guest) driver does not have access to the privileged
|
||||
// registers used to clear the faulted bit.
|
||||
if (uvm_parent_gpu_is_virt_mode_sriov(gpu->parent))
|
||||
if (uvm_parent_gpu_is_virt_mode_sriov(parent_gpu))
|
||||
use_sw_method = true;
|
||||
|
||||
// In Confidential Computing access to the privileged registers is blocked,
|
||||
@@ -253,17 +254,17 @@ static bool use_clear_faulted_channel_sw_method(uvm_gpu_t *gpu)
|
||||
use_sw_method = true;
|
||||
|
||||
if (use_sw_method)
|
||||
UVM_ASSERT(gpu->parent->has_clear_faulted_channel_sw_method);
|
||||
UVM_ASSERT(parent_gpu->has_clear_faulted_channel_sw_method);
|
||||
|
||||
return use_sw_method;
|
||||
}
|
||||
|
||||
static NV_STATUS clear_faulted_method_on_gpu(uvm_gpu_t *gpu,
|
||||
uvm_user_channel_t *user_channel,
|
||||
static NV_STATUS clear_faulted_method_on_gpu(uvm_user_channel_t *user_channel,
|
||||
const uvm_fault_buffer_entry_t *fault_entry,
|
||||
NvU32 batch_id,
|
||||
uvm_tracker_t *tracker)
|
||||
{
|
||||
uvm_gpu_t *gpu = user_channel->gpu;
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
@@ -283,7 +284,7 @@ static NV_STATUS clear_faulted_method_on_gpu(uvm_gpu_t *gpu,
|
||||
return status;
|
||||
}
|
||||
|
||||
if (use_clear_faulted_channel_sw_method(gpu))
|
||||
if (use_clear_faulted_channel_sw_method(gpu->parent))
|
||||
gpu->parent->host_hal->clear_faulted_channel_sw_method(&push, user_channel, fault_entry);
|
||||
else
|
||||
gpu->parent->host_hal->clear_faulted_channel_method(&push, user_channel, fault_entry);
|
||||
@@ -305,12 +306,12 @@ static NV_STATUS clear_faulted_method_on_gpu(uvm_gpu_t *gpu,
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS clear_faulted_register_on_gpu(uvm_gpu_t *gpu,
|
||||
uvm_user_channel_t *user_channel,
|
||||
static NV_STATUS clear_faulted_register_on_gpu(uvm_user_channel_t *user_channel,
|
||||
const uvm_fault_buffer_entry_t *fault_entry,
|
||||
NvU32 batch_id,
|
||||
uvm_tracker_t *tracker)
|
||||
{
|
||||
uvm_gpu_t *gpu = user_channel->gpu;
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(!gpu->parent->has_clear_faulted_channel_method);
|
||||
@@ -328,25 +329,26 @@ static NV_STATUS clear_faulted_register_on_gpu(uvm_gpu_t *gpu,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS clear_faulted_on_gpu(uvm_gpu_t *gpu,
|
||||
uvm_user_channel_t *user_channel,
|
||||
static NV_STATUS clear_faulted_on_gpu(uvm_user_channel_t *user_channel,
|
||||
const uvm_fault_buffer_entry_t *fault_entry,
|
||||
NvU32 batch_id,
|
||||
uvm_tracker_t *tracker)
|
||||
{
|
||||
if (gpu->parent->has_clear_faulted_channel_method || use_clear_faulted_channel_sw_method(gpu))
|
||||
return clear_faulted_method_on_gpu(gpu, user_channel, fault_entry, batch_id, tracker);
|
||||
uvm_gpu_t *gpu = user_channel->gpu;
|
||||
|
||||
return clear_faulted_register_on_gpu(gpu, user_channel, fault_entry, batch_id, tracker);
|
||||
if (gpu->parent->has_clear_faulted_channel_method || use_clear_faulted_channel_sw_method(gpu->parent))
|
||||
return clear_faulted_method_on_gpu(user_channel, fault_entry, batch_id, tracker);
|
||||
|
||||
return clear_faulted_register_on_gpu(user_channel, fault_entry, batch_id, tracker);
|
||||
}
|
||||
|
||||
static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
static NV_STATUS service_managed_fault_in_block_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_fault_buffer_entry_t *fault_entry,
|
||||
uvm_service_block_context_t *service_context,
|
||||
const bool hmm_migratable)
|
||||
{
|
||||
uvm_gpu_t *gpu = fault_entry->gpu;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_page_index_t page_index;
|
||||
uvm_perf_thrashing_hint_t thrashing_hint;
|
||||
@@ -441,13 +443,13 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
|
||||
uvm_fault_buffer_entry_t *fault_entry,
|
||||
const bool hmm_migratable)
|
||||
{
|
||||
NV_STATUS status, tracker_status;
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
uvm_gpu_t *gpu = fault_entry->gpu;
|
||||
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.non_replayable.block_service_context;
|
||||
|
||||
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
|
||||
@@ -459,8 +461,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, &va_block_retry,
|
||||
service_managed_fault_in_block_locked(gpu,
|
||||
va_block,
|
||||
service_managed_fault_in_block_locked(va_block,
|
||||
&va_block_retry,
|
||||
fault_entry,
|
||||
service_context,
|
||||
@@ -502,16 +503,14 @@ static void kill_channel_delayed_entry(void *user_channel)
|
||||
UVM_ENTRY_VOID(kill_channel_delayed(user_channel));
|
||||
}
|
||||
|
||||
static void schedule_kill_channel(uvm_gpu_t *gpu,
|
||||
uvm_fault_buffer_entry_t *fault_entry,
|
||||
uvm_user_channel_t *user_channel)
|
||||
static void schedule_kill_channel(uvm_fault_buffer_entry_t *fault_entry, uvm_user_channel_t *user_channel)
|
||||
{
|
||||
uvm_va_space_t *va_space = fault_entry->va_space;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
uvm_parent_gpu_t *parent_gpu = fault_entry->gpu->parent;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
void *packet = (char *)non_replayable_faults->shadow_buffer_copy +
|
||||
(fault_entry->non_replayable.buffer_index * gpu->parent->fault_buffer_hal->entry_size(gpu->parent));
|
||||
(fault_entry->non_replayable.buffer_index * parent_gpu->fault_buffer_hal->entry_size(parent_gpu));
|
||||
|
||||
UVM_ASSERT(gpu);
|
||||
UVM_ASSERT(va_space);
|
||||
UVM_ASSERT(user_channel);
|
||||
|
||||
@@ -522,7 +521,7 @@ static void schedule_kill_channel(uvm_gpu_t *gpu,
|
||||
user_channel->kill_channel.va_space = va_space;
|
||||
|
||||
// Save the packet to be handled by RM in the channel structure
|
||||
memcpy(user_channel->kill_channel.fault_packet, packet, gpu->parent->fault_buffer_hal->entry_size(gpu->parent));
|
||||
memcpy(user_channel->kill_channel.fault_packet, packet, parent_gpu->fault_buffer_hal->entry_size(parent_gpu));
|
||||
|
||||
// Retain the channel here so it is not prematurely destroyed. It will be
|
||||
// released after forwarding the fault to RM in kill_channel_delayed.
|
||||
@@ -533,7 +532,7 @@ static void schedule_kill_channel(uvm_gpu_t *gpu,
|
||||
kill_channel_delayed_entry,
|
||||
user_channel);
|
||||
|
||||
nv_kthread_q_schedule_q_item(&gpu->parent->isr.kill_channel_q,
|
||||
nv_kthread_q_schedule_q_item(&parent_gpu->isr.kill_channel_q,
|
||||
&user_channel->kill_channel.kill_channel_q_item);
|
||||
}
|
||||
|
||||
@@ -550,6 +549,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_buffer_entry_t *fault_entry,
|
||||
NV_STATUS lookup_status)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
|
||||
@@ -557,9 +557,11 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NV_STATUS fatal_fault_status = NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
UVM_ASSERT(!fault_entry->is_fatal);
|
||||
UVM_ASSERT(fault_entry->va_space == va_space);
|
||||
UVM_ASSERT(fault_entry->gpu == gpu);
|
||||
|
||||
// Avoid dropping fault events when the VA block is not found or cannot be created
|
||||
uvm_perf_event_notify_gpu_fault(&fault_entry->va_space->perf_events,
|
||||
uvm_perf_event_notify_gpu_fault(&va_space->perf_events,
|
||||
NULL,
|
||||
gpu->id,
|
||||
UVM_ID_INVALID,
|
||||
@@ -584,11 +586,11 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
ats_invalidate->tlb_batch_pending = false;
|
||||
|
||||
va_range_next = uvm_va_space_iter_first(gpu_va_space->va_space, fault_entry->fault_address, ~0ULL);
|
||||
va_range_next = uvm_va_space_iter_first(va_space, fault_entry->fault_address, ~0ULL);
|
||||
|
||||
// The VA isn't managed. See if ATS knows about it.
|
||||
vma = find_vma_intersection(mm, fault_address, fault_address + 1);
|
||||
if (!vma || uvm_ats_check_in_gmmu_region(gpu_va_space->va_space, fault_address, va_range_next)) {
|
||||
if (!vma || uvm_ats_check_in_gmmu_region(va_space, fault_address, va_range_next)) {
|
||||
|
||||
// Do not return error due to logical errors in the application
|
||||
status = NV_OK;
|
||||
@@ -631,19 +633,24 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_fault_once(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_entry, const bool hmm_migratable)
|
||||
static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_fault_buffer_entry_t *fault_entry,
|
||||
const bool hmm_migratable)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_user_channel_t *user_channel;
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
uvm_va_space_t *va_space;
|
||||
struct mm_struct *mm;
|
||||
uvm_gpu_va_space_t *gpu_va_space;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
uvm_va_block_context_t *va_block_context =
|
||||
gpu->parent->fault_buffer_info.non_replayable.block_service_context.block_context;
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_va_block_context_t *va_block_context = non_replayable_faults->block_service_context.block_context;
|
||||
|
||||
status = uvm_parent_gpu_fault_entry_to_va_space(gpu->parent, fault_entry, &va_space);
|
||||
status = uvm_parent_gpu_fault_entry_to_va_space(parent_gpu,
|
||||
fault_entry,
|
||||
&va_space,
|
||||
&gpu);
|
||||
if (status != NV_OK) {
|
||||
// The VA space lookup will fail if we're running concurrently with
|
||||
// removal of the channel from the VA space (channel unregister, GPU VA
|
||||
@@ -657,10 +664,12 @@ static NV_STATUS service_fault_once(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fa
|
||||
// replayable faults only use the address space of their channel.
|
||||
UVM_ASSERT(status == NV_ERR_INVALID_CHANNEL);
|
||||
UVM_ASSERT(!va_space);
|
||||
UVM_ASSERT(!gpu);
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
UVM_ASSERT(gpu);
|
||||
|
||||
// If an mm is registered with the VA space, we have to retain it
|
||||
// in order to lock it before locking the VA space. It is guaranteed
|
||||
@@ -671,8 +680,7 @@ static NV_STATUS service_fault_once(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fa
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
|
||||
|
||||
gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
|
||||
if (!gpu_va_space) {
|
||||
// The va_space might have gone away. See the comment above.
|
||||
status = NV_OK;
|
||||
@@ -680,6 +688,7 @@ static NV_STATUS service_fault_once(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fa
|
||||
}
|
||||
|
||||
fault_entry->va_space = va_space;
|
||||
fault_entry->gpu = gpu;
|
||||
|
||||
user_channel = uvm_gpu_va_space_get_user_channel(gpu_va_space, fault_entry->instance_ptr);
|
||||
if (!user_channel) {
|
||||
@@ -692,26 +701,25 @@ static NV_STATUS service_fault_once(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fa
|
||||
|
||||
if (!fault_entry->is_fatal) {
|
||||
if (mm) {
|
||||
status = uvm_va_block_find_create(fault_entry->va_space,
|
||||
status = uvm_va_block_find_create(va_space,
|
||||
fault_entry->fault_address,
|
||||
&va_block_context->hmm.vma,
|
||||
&va_block);
|
||||
}
|
||||
else {
|
||||
status = uvm_va_block_find_create_managed(fault_entry->va_space,
|
||||
status = uvm_va_block_find_create_managed(va_space,
|
||||
fault_entry->fault_address,
|
||||
&va_block);
|
||||
}
|
||||
if (status == NV_OK)
|
||||
status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry, hmm_migratable);
|
||||
status = service_managed_fault_in_block(va_block, fault_entry, hmm_migratable);
|
||||
else
|
||||
status = service_non_managed_fault(gpu_va_space, mm, fault_entry, status);
|
||||
|
||||
// We are done, we clear the faulted bit on the channel, so it can be
|
||||
// re-scheduled again
|
||||
if (status == NV_OK && !fault_entry->is_fatal) {
|
||||
status = clear_faulted_on_gpu(gpu,
|
||||
user_channel,
|
||||
status = clear_faulted_on_gpu(user_channel,
|
||||
fault_entry,
|
||||
non_replayable_faults->batch_id,
|
||||
&non_replayable_faults->fault_service_tracker);
|
||||
@@ -720,13 +728,13 @@ static NV_STATUS service_fault_once(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fa
|
||||
}
|
||||
|
||||
if (fault_entry->is_fatal)
|
||||
uvm_tools_record_gpu_fatal_fault(gpu->id, fault_entry->va_space, fault_entry, fault_entry->fatal_reason);
|
||||
uvm_tools_record_gpu_fatal_fault(gpu->id, va_space, fault_entry, fault_entry->fatal_reason);
|
||||
|
||||
if (fault_entry->is_fatal ||
|
||||
(status != NV_OK &&
|
||||
status != NV_WARN_MORE_PROCESSING_REQUIRED &&
|
||||
status != NV_WARN_MISMATCHED_TARGET))
|
||||
schedule_kill_channel(gpu, fault_entry, user_channel);
|
||||
schedule_kill_channel(fault_entry, user_channel);
|
||||
|
||||
exit_no_channel:
|
||||
uvm_va_space_up_read(va_space);
|
||||
@@ -735,22 +743,23 @@ exit_no_channel:
|
||||
if (status != NV_OK &&
|
||||
status != NV_WARN_MORE_PROCESSING_REQUIRED &&
|
||||
status != NV_WARN_MISMATCHED_TARGET)
|
||||
UVM_DBG_PRINT("Error servicing non-replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
|
||||
UVM_DBG_PRINT("Error servicing non-replayable faults on GPU: %s\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_entry)
|
||||
static NV_STATUS service_fault(uvm_parent_gpu_t *parent_gpu, uvm_fault_buffer_entry_t *fault_entry)
|
||||
{
|
||||
uvm_service_block_context_t *service_context =
|
||||
&gpu->parent->fault_buffer_info.non_replayable.block_service_context;
|
||||
&parent_gpu->fault_buffer_info.non_replayable.block_service_context;
|
||||
NV_STATUS status;
|
||||
bool hmm_migratable = true;
|
||||
|
||||
service_context->num_retries = 0;
|
||||
|
||||
do {
|
||||
status = service_fault_once(gpu, fault_entry, hmm_migratable);
|
||||
status = service_fault_once(parent_gpu, fault_entry, hmm_migratable);
|
||||
if (status == NV_WARN_MISMATCHED_TARGET) {
|
||||
hmm_migratable = false;
|
||||
status = NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
@@ -760,7 +769,7 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
|
||||
void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NvU32 cached_faults;
|
||||
|
||||
@@ -772,7 +781,7 @@ void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
|
||||
NV_STATUS status;
|
||||
NvU32 i;
|
||||
|
||||
status = fetch_non_replayable_fault_buffer_entries(gpu->parent, &cached_faults);
|
||||
status = fetch_non_replayable_fault_buffer_entries(parent_gpu, &cached_faults);
|
||||
if (status != NV_OK)
|
||||
return;
|
||||
|
||||
@@ -780,7 +789,7 @@ void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
|
||||
// non-replayable faults since getting multiple faults on the same
|
||||
// memory region is not very likely
|
||||
for (i = 0; i < cached_faults; ++i) {
|
||||
status = service_fault(gpu, &gpu->parent->fault_buffer_info.non_replayable.fault_cache[i]);
|
||||
status = service_fault(parent_gpu, &parent_gpu->fault_buffer_info.non_replayable.fault_cache[i]);
|
||||
if (status != NV_OK)
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
|
||||
bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu);
|
||||
void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -73,5 +73,5 @@ void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Service pending replayable faults on the given GPU. This function must be
|
||||
// only called from the ISR bottom half
|
||||
void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu);
|
||||
void uvm_parent_gpu_service_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
#endif // __UVM_GPU_PAGE_FAULT_H__
|
||||
|
||||
@@ -60,6 +60,17 @@ struct uvm_gpu_semaphore_pool_page_struct
|
||||
// Allocation backing the page
|
||||
uvm_rm_mem_t *memory;
|
||||
|
||||
struct {
|
||||
// Unprotected sysmem storing encrypted value of semaphores
|
||||
uvm_rm_mem_t *encrypted_payload_memory;
|
||||
|
||||
// Unprotected sysmem storing encryption auth tags
|
||||
uvm_rm_mem_t *auth_tag_memory;
|
||||
|
||||
// Unprotected sysmem storing plain text notifier values
|
||||
uvm_rm_mem_t *notifier_memory;
|
||||
} conf_computing;
|
||||
|
||||
// Pool the page is part of
|
||||
uvm_gpu_semaphore_pool_t *pool;
|
||||
|
||||
@@ -80,26 +91,6 @@ static bool gpu_semaphore_is_secure(uvm_gpu_semaphore_t *semaphore)
|
||||
return gpu_semaphore_pool_is_secure(semaphore->page->pool);
|
||||
}
|
||||
|
||||
static NvU32 get_index(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU32 offset;
|
||||
NvU32 index;
|
||||
|
||||
if (gpu_semaphore_is_secure(semaphore))
|
||||
return semaphore->conf_computing.index;
|
||||
|
||||
UVM_ASSERT(semaphore->payload != NULL);
|
||||
UVM_ASSERT(semaphore->page != NULL);
|
||||
|
||||
offset = (char*)semaphore->payload - (char*)uvm_rm_mem_get_cpu_va(semaphore->page->memory);
|
||||
UVM_ASSERT(offset % UVM_SEMAPHORE_SIZE == 0);
|
||||
|
||||
index = offset / UVM_SEMAPHORE_SIZE;
|
||||
UVM_ASSERT(index < UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
// Use canary values on debug builds to catch semaphore use-after-free. We can
|
||||
// catch release-after-free by simply setting the payload to a known value at
|
||||
// free then checking it on alloc or pool free, but catching acquire-after-free
|
||||
@@ -150,34 +141,83 @@ static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
|
||||
return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
|
||||
}
|
||||
|
||||
// Secure semaphore pools are allocated in the CPR of vidmem and only mapped to
|
||||
// the owning GPU as no other processor have access to it.
|
||||
static NV_STATUS pool_alloc_secure_page(uvm_gpu_semaphore_pool_t *pool,
|
||||
uvm_gpu_semaphore_pool_page_t *pool_page,
|
||||
uvm_rm_mem_type_t memory_type)
|
||||
static void pool_page_free_buffers(uvm_gpu_semaphore_pool_page_t *page)
|
||||
{
|
||||
uvm_rm_mem_free(page->memory);
|
||||
page->memory = NULL;
|
||||
|
||||
if (gpu_semaphore_pool_is_secure(page->pool)) {
|
||||
uvm_rm_mem_free(page->conf_computing.encrypted_payload_memory);
|
||||
uvm_rm_mem_free(page->conf_computing.auth_tag_memory);
|
||||
uvm_rm_mem_free(page->conf_computing.notifier_memory);
|
||||
|
||||
page->conf_computing.encrypted_payload_memory = NULL;
|
||||
page->conf_computing.auth_tag_memory = NULL;
|
||||
page->conf_computing.notifier_memory = NULL;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(!page->conf_computing.encrypted_payload_memory);
|
||||
UVM_ASSERT(!page->conf_computing.auth_tag_memory);
|
||||
UVM_ASSERT(!page->conf_computing.notifier_memory);
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS pool_page_alloc_buffers(uvm_gpu_semaphore_pool_page_t *page)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_semaphore_pool_t *pool = page->pool;
|
||||
uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
|
||||
size_t align = 0;
|
||||
bool map_all = true;
|
||||
align = gpu_semaphore_pool_is_secure(pool) ? UVM_CONF_COMPUTING_BUF_ALIGNMENT : 0;
|
||||
map_all = gpu_semaphore_pool_is_secure(pool) ? false : true;
|
||||
|
||||
UVM_ASSERT(gpu_semaphore_pool_is_secure(pool));
|
||||
status = uvm_rm_mem_alloc(pool->gpu,
|
||||
memory_type,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
|
||||
&pool_page->memory);
|
||||
if (map_all)
|
||||
status = uvm_rm_mem_alloc_and_map_all(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
|
||||
else
|
||||
status = uvm_rm_mem_alloc(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
goto error;
|
||||
|
||||
if (!gpu_semaphore_pool_is_secure(pool))
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
|
||||
&page->conf_computing.encrypted_payload_memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
BUILD_BUG_ON(UVM_CONF_COMPUTING_AUTH_TAG_SIZE % UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
UVM_SEMAPHORE_COUNT_PER_PAGE * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
|
||||
&page->conf_computing.auth_tag_memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
UVM_SEMAPHORE_COUNT_PER_PAGE * sizeof(NvU32),
|
||||
0,
|
||||
&page->conf_computing.notifier_memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
return NV_OK;
|
||||
error:
|
||||
pool_page_free_buffers(page);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_semaphore_pool_page_t *pool_page;
|
||||
NvU32 *payloads;
|
||||
size_t i;
|
||||
uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
|
||||
|
||||
uvm_assert_mutex_locked(&pool->mutex);
|
||||
|
||||
@@ -188,24 +228,9 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
|
||||
pool_page->pool = pool;
|
||||
|
||||
// Whenever the Confidential Computing feature is enabled, engines can
|
||||
// access semaphores only in the CPR of vidmem. Mapping to other GPUs is
|
||||
// also disabled.
|
||||
if (gpu_semaphore_pool_is_secure(pool)) {
|
||||
status = pool_alloc_secure_page(pool, pool_page, memory_type);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
}
|
||||
else {
|
||||
status = uvm_rm_mem_alloc_and_map_all(pool->gpu,
|
||||
memory_type,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
0,
|
||||
&pool_page->memory);
|
||||
status = pool_page_alloc_buffers(pool_page);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Verify the GPU can access the semaphore pool.
|
||||
UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
|
||||
@@ -217,7 +242,9 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
pool->free_semaphores_count += UVM_SEMAPHORE_COUNT_PER_PAGE;
|
||||
|
||||
if (semaphore_uses_canary(pool)) {
|
||||
payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
|
||||
size_t i;
|
||||
NvU32 *payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
|
||||
|
||||
for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
|
||||
payloads[i] = make_canary(0);
|
||||
}
|
||||
@@ -253,7 +280,7 @@ static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
|
||||
|
||||
pool->free_semaphores_count -= UVM_SEMAPHORE_COUNT_PER_PAGE;
|
||||
list_del(&page->all_pages_node);
|
||||
uvm_rm_mem_free(page->memory);
|
||||
pool_page_free_buffers(page);
|
||||
uvm_kvfree(page);
|
||||
}
|
||||
|
||||
@@ -273,19 +300,22 @@ NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaph
|
||||
goto done;
|
||||
|
||||
list_for_each_entry(page, &pool->pages, all_pages_node) {
|
||||
NvU32 semaphore_index = find_first_bit(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
const NvU32 semaphore_index = find_first_bit(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
|
||||
UVM_ASSERT(semaphore_index <= UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
|
||||
if (semaphore_index == UVM_SEMAPHORE_COUNT_PER_PAGE)
|
||||
continue;
|
||||
|
||||
if (gpu_semaphore_pool_is_secure(pool)) {
|
||||
semaphore->conf_computing.index = semaphore_index;
|
||||
}
|
||||
else {
|
||||
semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) +
|
||||
semaphore_index * UVM_SEMAPHORE_SIZE);
|
||||
}
|
||||
|
||||
semaphore->page = page;
|
||||
semaphore->index = semaphore_index;
|
||||
|
||||
if (gpu_semaphore_pool_is_secure(pool)) {
|
||||
|
||||
// Reset the notifier to prevent detection of false attack when
|
||||
// checking for updated value
|
||||
*uvm_gpu_semaphore_get_notifier_cpu_va(semaphore) = semaphore->conf_computing.last_observed_notifier;
|
||||
}
|
||||
|
||||
if (semaphore_uses_canary(pool))
|
||||
UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));
|
||||
@@ -311,7 +341,6 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
uvm_gpu_semaphore_pool_page_t *page;
|
||||
uvm_gpu_semaphore_pool_t *pool;
|
||||
NvU32 index;
|
||||
|
||||
UVM_ASSERT(semaphore);
|
||||
|
||||
@@ -323,7 +352,6 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
|
||||
return;
|
||||
|
||||
pool = page->pool;
|
||||
index = get_index(semaphore);
|
||||
|
||||
// Write a known value lower than the current payload in an attempt to catch
|
||||
// release-after-free and acquire-after-free.
|
||||
@@ -333,10 +361,9 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
|
||||
uvm_mutex_lock(&pool->mutex);
|
||||
|
||||
semaphore->page = NULL;
|
||||
semaphore->payload = NULL;
|
||||
|
||||
++pool->free_semaphores_count;
|
||||
__set_bit(index, page->free_semaphores);
|
||||
__set_bit(semaphore->index, page->free_semaphores);
|
||||
|
||||
uvm_mutex_unlock(&pool->mutex);
|
||||
}
|
||||
@@ -449,18 +476,70 @@ NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu
|
||||
|
||||
NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space)
|
||||
{
|
||||
NvU32 index = get_index(semaphore);
|
||||
NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space).address;
|
||||
|
||||
return base_va + UVM_SEMAPHORE_SIZE * index;
|
||||
return base_va + semaphore->index * UVM_SEMAPHORE_SIZE;
|
||||
}
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
char *base_va;
|
||||
|
||||
if (gpu_semaphore_is_secure(semaphore))
|
||||
return &semaphore->conf_computing.cached_payload;
|
||||
|
||||
base_va = uvm_rm_mem_get_cpu_va(semaphore->page->memory);
|
||||
return (NvU32*)(base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
|
||||
}
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
char *encrypted_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.encrypted_payload_memory);
|
||||
|
||||
return (NvU32*)(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU64 encrypted_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.encrypted_payload_memory,
|
||||
semaphore->page->pool->gpu);
|
||||
|
||||
return uvm_gpu_address_virtual_unprotected(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
|
||||
}
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
char *notifier_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.notifier_memory);
|
||||
|
||||
return (NvU32*)(notifier_base_va + semaphore->index * sizeof(NvU32));
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU64 notifier_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.notifier_memory,
|
||||
semaphore->page->pool->gpu);
|
||||
|
||||
return uvm_gpu_address_virtual_unprotected(notifier_base_va + semaphore->index * sizeof(NvU32));
|
||||
}
|
||||
|
||||
void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
char *auth_tag_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.auth_tag_memory);
|
||||
|
||||
return (void*)(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_auth_tag_gpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU64 auth_tag_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.auth_tag_memory,
|
||||
semaphore->page->pool->gpu);
|
||||
|
||||
return uvm_gpu_address_virtual_unprotected(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
|
||||
}
|
||||
|
||||
NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
if (gpu_semaphore_is_secure(semaphore))
|
||||
return UVM_GPU_READ_ONCE(semaphore->conf_computing.cached_payload);
|
||||
|
||||
return UVM_GPU_READ_ONCE(*semaphore->payload);
|
||||
return UVM_GPU_READ_ONCE(*uvm_gpu_semaphore_get_cpu_va(semaphore));
|
||||
}
|
||||
|
||||
void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload)
|
||||
@@ -477,10 +556,7 @@ void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload
|
||||
// the GPU correctly even on non-SMP).
|
||||
mb();
|
||||
|
||||
if (gpu_semaphore_is_secure(semaphore))
|
||||
UVM_GPU_WRITE_ONCE(semaphore->conf_computing.cached_payload, payload);
|
||||
else
|
||||
UVM_GPU_WRITE_ONCE(*semaphore->payload, payload);
|
||||
UVM_GPU_WRITE_ONCE(*uvm_gpu_semaphore_get_cpu_va(semaphore), payload);
|
||||
}
|
||||
|
||||
// This function is intended to catch channels which have been left dangling in
|
||||
@@ -507,7 +583,7 @@ static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_
|
||||
return true;
|
||||
}
|
||||
|
||||
bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
|
||||
static bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
|
||||
{
|
||||
UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
|
||||
|
||||
@@ -571,9 +647,7 @@ static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, u
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU8 local_auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
|
||||
UvmCslIv *ivs_cpu_addr = semaphore->conf_computing.ivs;
|
||||
void *auth_tag_cpu_addr = uvm_rm_mem_get_cpu_va(semaphore->conf_computing.auth_tag);
|
||||
NvU32 *gpu_notifier_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.notifier);
|
||||
NvU32 *payload_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.encrypted_payload);
|
||||
NvU32 *gpu_notifier_cpu_addr = uvm_gpu_semaphore_get_notifier_cpu_va(semaphore);
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
@@ -596,8 +670,8 @@ static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, u
|
||||
smp_mb__after_atomic();
|
||||
|
||||
iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
|
||||
memcpy(local_auth_tag, auth_tag_cpu_addr, sizeof(local_auth_tag));
|
||||
local_payload = UVM_READ_ONCE(*payload_cpu_addr);
|
||||
memcpy(local_auth_tag, uvm_gpu_semaphore_get_auth_tag_cpu_va(semaphore), sizeof(local_auth_tag));
|
||||
local_payload = UVM_READ_ONCE(*uvm_gpu_semaphore_get_encrypted_payload_cpu_va(semaphore));
|
||||
memcpy(&local_iv, &ivs_cpu_addr[iv_index], sizeof(local_iv));
|
||||
|
||||
// Make sure the second read of notifier happens after
|
||||
@@ -650,7 +724,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
|
||||
else
|
||||
uvm_assert_spinlock_locked(&tracking_semaphore->s_lock);
|
||||
|
||||
if (tracking_semaphore->semaphore.conf_computing.encrypted_payload) {
|
||||
if (gpu_semaphore_is_secure(&tracking_semaphore->semaphore)) {
|
||||
// TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore
|
||||
// mechanism to all semaphore
|
||||
uvm_channel_t *channel = container_of(tracking_semaphore, uvm_channel_t, tracking_sem);
|
||||
@@ -690,7 +764,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
|
||||
UVM_ASSERT_MSG_RELEASE(new_value - old_value <= UVM_GPU_SEMAPHORE_MAX_JUMP,
|
||||
"GPU %s unexpected semaphore (CPU VA 0x%llx) jump from 0x%llx to 0x%llx\n",
|
||||
uvm_gpu_name(tracking_semaphore->semaphore.page->pool->gpu),
|
||||
(NvU64)(uintptr_t)tracking_semaphore->semaphore.payload,
|
||||
(NvU64)(uintptr_t)uvm_gpu_semaphore_get_cpu_va(&tracking_semaphore->semaphore),
|
||||
old_value, new_value);
|
||||
|
||||
// Use an atomic write even though the lock is held so that the value can
|
||||
|
||||
@@ -45,15 +45,13 @@ struct uvm_gpu_semaphore_struct
|
||||
// The semaphore pool page the semaphore came from
|
||||
uvm_gpu_semaphore_pool_page_t *page;
|
||||
|
||||
// Pointer to the memory location
|
||||
NvU32 *payload;
|
||||
// Index of the semaphore in semaphore page
|
||||
NvU16 index;
|
||||
|
||||
struct {
|
||||
NvU16 index;
|
||||
NvU32 cached_payload;
|
||||
uvm_rm_mem_t *encrypted_payload;
|
||||
uvm_rm_mem_t *notifier;
|
||||
uvm_rm_mem_t *auth_tag;
|
||||
UvmCslIv *ivs;
|
||||
NvU32 cached_payload;
|
||||
|
||||
NvU32 last_pushed_notifier;
|
||||
NvU32 last_observed_notifier;
|
||||
} conf_computing;
|
||||
@@ -151,6 +149,17 @@ NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu
|
||||
|
||||
NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space);
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
|
||||
void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_auth_tag_gpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
|
||||
// Read the 32-bit payload of the semaphore
|
||||
// Notably doesn't provide any memory ordering guarantees and needs to be used with
|
||||
// care. For an example of what needs to be considered see
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -44,6 +44,8 @@
|
||||
#include "clc7b5.h"
|
||||
#include "clc86f.h"
|
||||
#include "clc8b5.h"
|
||||
#include "clc96f.h"
|
||||
#include "clc9b5.h"
|
||||
|
||||
static int uvm_downgrade_force_membar_sys = 1;
|
||||
module_param(uvm_downgrade_force_membar_sys, uint, 0644);
|
||||
@@ -164,6 +166,11 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.decrypt = uvm_hal_hopper_ce_decrypt,
|
||||
},
|
||||
},
|
||||
{
|
||||
.id = BLACKWELL_DMA_COPY_A,
|
||||
.parent_id = HOPPER_DMA_COPY_A,
|
||||
.u.ce_ops = {},
|
||||
},
|
||||
};
|
||||
|
||||
// Table for GPFIFO functions. Same idea as the copy engine table.
|
||||
@@ -251,6 +258,9 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.semaphore_release = uvm_hal_turing_host_semaphore_release,
|
||||
.clear_faulted_channel_method = uvm_hal_turing_host_clear_faulted_channel_method,
|
||||
.set_gpfifo_entry = uvm_hal_turing_host_set_gpfifo_entry,
|
||||
.tlb_invalidate_all = uvm_hal_turing_host_tlb_invalidate_all,
|
||||
.tlb_invalidate_va = uvm_hal_turing_host_tlb_invalidate_va,
|
||||
.tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -283,6 +293,15 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.set_gpfifo_pushbuffer_segment_base = uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base,
|
||||
}
|
||||
},
|
||||
{
|
||||
.id = BLACKWELL_CHANNEL_GPFIFO_A,
|
||||
.parent_id = HOPPER_CHANNEL_GPFIFO_A,
|
||||
.u.host_ops = {
|
||||
.tlb_invalidate_all = uvm_hal_blackwell_host_tlb_invalidate_all,
|
||||
.tlb_invalidate_va = uvm_hal_blackwell_host_tlb_invalidate_va,
|
||||
.tlb_invalidate_test = uvm_hal_blackwell_host_tlb_invalidate_test,
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t arch_table[] =
|
||||
@@ -294,7 +313,6 @@ static uvm_hal_class_ops_t arch_table[] =
|
||||
.mmu_mode_hal = uvm_hal_mmu_mode_maxwell,
|
||||
.enable_prefetch_faults = uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported,
|
||||
.disable_prefetch_faults = uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported,
|
||||
.mmu_engine_id_to_type = uvm_hal_maxwell_mmu_engine_id_to_type_unsupported,
|
||||
.mmu_client_id_to_utlb_id = uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported,
|
||||
}
|
||||
},
|
||||
@@ -320,7 +338,6 @@ static uvm_hal_class_ops_t arch_table[] =
|
||||
.u.arch_ops = {
|
||||
.init_properties = uvm_hal_volta_arch_init_properties,
|
||||
.mmu_mode_hal = uvm_hal_mmu_mode_volta,
|
||||
.mmu_engine_id_to_type = uvm_hal_volta_mmu_engine_id_to_type,
|
||||
.mmu_client_id_to_utlb_id = uvm_hal_volta_mmu_client_id_to_utlb_id,
|
||||
},
|
||||
},
|
||||
@@ -330,7 +347,6 @@ static uvm_hal_class_ops_t arch_table[] =
|
||||
.u.arch_ops = {
|
||||
.init_properties = uvm_hal_turing_arch_init_properties,
|
||||
.mmu_mode_hal = uvm_hal_mmu_mode_turing,
|
||||
.mmu_engine_id_to_type = uvm_hal_turing_mmu_engine_id_to_type,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -339,7 +355,6 @@ static uvm_hal_class_ops_t arch_table[] =
|
||||
.u.arch_ops = {
|
||||
.init_properties = uvm_hal_ampere_arch_init_properties,
|
||||
.mmu_mode_hal = uvm_hal_mmu_mode_ampere,
|
||||
.mmu_engine_id_to_type = uvm_hal_ampere_mmu_engine_id_to_type,
|
||||
.mmu_client_id_to_utlb_id = uvm_hal_ampere_mmu_client_id_to_utlb_id,
|
||||
},
|
||||
},
|
||||
@@ -356,10 +371,18 @@ static uvm_hal_class_ops_t arch_table[] =
|
||||
.u.arch_ops = {
|
||||
.init_properties = uvm_hal_hopper_arch_init_properties,
|
||||
.mmu_mode_hal = uvm_hal_mmu_mode_hopper,
|
||||
.mmu_engine_id_to_type = uvm_hal_hopper_mmu_engine_id_to_type,
|
||||
.mmu_client_id_to_utlb_id = uvm_hal_hopper_mmu_client_id_to_utlb_id,
|
||||
},
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
|
||||
.u.arch_ops = {
|
||||
.init_properties = uvm_hal_blackwell_arch_init_properties,
|
||||
.mmu_mode_hal = uvm_hal_mmu_mode_blackwell,
|
||||
.mmu_client_id_to_utlb_id = uvm_hal_blackwell_mmu_client_id_to_utlb_id,
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
@@ -374,6 +397,7 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
.read_get = uvm_hal_maxwell_fault_buffer_read_get_unsupported,
|
||||
.write_get = uvm_hal_maxwell_fault_buffer_write_get_unsupported,
|
||||
.get_ve_id = uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported,
|
||||
.get_mmu_engine_type = uvm_hal_maxwell_fault_buffer_get_mmu_engine_type_unsupported,
|
||||
.parse_replayable_entry = uvm_hal_maxwell_fault_buffer_parse_replayable_entry_unsupported,
|
||||
.entry_is_valid = uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported,
|
||||
.entry_clear_valid = uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported,
|
||||
@@ -412,6 +436,7 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
.read_get = uvm_hal_volta_fault_buffer_read_get,
|
||||
.write_get = uvm_hal_volta_fault_buffer_write_get,
|
||||
.get_ve_id = uvm_hal_volta_fault_buffer_get_ve_id,
|
||||
.get_mmu_engine_type = uvm_hal_volta_fault_buffer_get_mmu_engine_type,
|
||||
.parse_replayable_entry = uvm_hal_volta_fault_buffer_parse_replayable_entry,
|
||||
.parse_non_replayable_entry = uvm_hal_volta_fault_buffer_parse_non_replayable_entry,
|
||||
.get_fault_type = uvm_hal_volta_fault_buffer_get_fault_type,
|
||||
@@ -423,12 +448,15 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
.u.fault_buffer_ops = {
|
||||
.disable_replayable_faults = uvm_hal_turing_disable_replayable_faults,
|
||||
.clear_replayable_faults = uvm_hal_turing_clear_replayable_faults,
|
||||
.get_mmu_engine_type = uvm_hal_turing_fault_buffer_get_mmu_engine_type,
|
||||
}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
|
||||
.u.fault_buffer_ops = {}
|
||||
.u.fault_buffer_ops = {
|
||||
.get_mmu_engine_type = uvm_hal_ampere_fault_buffer_get_mmu_engine_type,
|
||||
}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
@@ -440,6 +468,15 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.u.fault_buffer_ops = {
|
||||
.get_ve_id = uvm_hal_hopper_fault_buffer_get_ve_id,
|
||||
.get_mmu_engine_type = uvm_hal_hopper_fault_buffer_get_mmu_engine_type,
|
||||
}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
|
||||
.u.fault_buffer_ops = {
|
||||
.get_fault_type = uvm_hal_blackwell_fault_buffer_get_fault_type,
|
||||
.get_mmu_engine_type = uvm_hal_blackwell_fault_buffer_get_mmu_engine_type,
|
||||
}
|
||||
},
|
||||
};
|
||||
@@ -504,6 +541,11 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.u.access_counter_buffer_ops = {}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
|
||||
.u.access_counter_buffer_ops = {}
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t sec2_table[] =
|
||||
@@ -557,6 +599,11 @@ static uvm_hal_class_ops_t sec2_table[] =
|
||||
.decrypt = uvm_hal_hopper_sec2_decrypt,
|
||||
}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
|
||||
.u.sec2_ops = {}
|
||||
},
|
||||
};
|
||||
|
||||
static inline uvm_hal_class_ops_t *ops_find_by_id(uvm_hal_class_ops_t *table, NvU32 row_count, NvU32 id)
|
||||
@@ -632,13 +679,19 @@ NV_STATUS uvm_hal_init_table(void)
|
||||
return status;
|
||||
}
|
||||
|
||||
status = ops_init_from_parent(host_table, ARRAY_SIZE(host_table), HOST_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.host_ops));
|
||||
status = ops_init_from_parent(host_table,
|
||||
ARRAY_SIZE(host_table),
|
||||
HOST_OP_COUNT,
|
||||
offsetof(uvm_hal_class_ops_t, u.host_ops));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("ops_init_from_parent(host_table) failed: %s\n", nvstatusToString(status));
|
||||
return status;
|
||||
}
|
||||
|
||||
status = ops_init_from_parent(arch_table, ARRAY_SIZE(arch_table), ARCH_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.arch_ops));
|
||||
status = ops_init_from_parent(arch_table,
|
||||
ARRAY_SIZE(arch_table),
|
||||
ARCH_OP_COUNT,
|
||||
offsetof(uvm_hal_class_ops_t, u.arch_ops));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("ops_init_from_parent(arch_table) failed: %s\n", nvstatusToString(status));
|
||||
return status;
|
||||
@@ -778,6 +831,9 @@ void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar)
|
||||
|
||||
gpu = uvm_push_get_gpu(push);
|
||||
|
||||
// TLB invalidate on Blackwell+ GPUs should not use a standalone membar.
|
||||
UVM_ASSERT(gpu->parent->rm_info.gpuArch < NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100);
|
||||
|
||||
for (i = 0; i < gpu->parent->num_hshub_tlb_invalidate_membars; i++)
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
|
||||
@@ -883,7 +939,7 @@ const char *uvm_fault_access_type_string(uvm_fault_access_type_t fault_access_ty
|
||||
|
||||
const char *uvm_fault_type_string(uvm_fault_type_t fault_type)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_FAULT_TYPE_COUNT != 16);
|
||||
BUILD_BUG_ON(UVM_FAULT_TYPE_COUNT != 17);
|
||||
|
||||
switch (fault_type) {
|
||||
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_INVALID_PDE);
|
||||
@@ -902,6 +958,7 @@ const char *uvm_fault_type_string(uvm_fault_type_t fault_type)
|
||||
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_UNSUPPORTED_KIND);
|
||||
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_REGION_VIOLATION);
|
||||
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_POISONED);
|
||||
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_CC_VIOLATION);
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
}
|
||||
@@ -932,14 +989,16 @@ const char *uvm_mmu_engine_type_string(uvm_mmu_engine_type_t mmu_engine_type)
|
||||
void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry)
|
||||
{
|
||||
UVM_DBG_PRINT("fault_address: 0x%llx\n", entry->fault_address);
|
||||
UVM_DBG_PRINT(" fault_instance_ptr: {0x%llx:%s}\n", entry->instance_ptr.address,
|
||||
uvm_aperture_string(entry->instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" fault_instance_ptr: {0x%llx:%s}\n",
|
||||
entry->instance_ptr.address,
|
||||
uvm_aperture_string(entry->instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" fault_type: %s\n", uvm_fault_type_string(entry->fault_type));
|
||||
UVM_DBG_PRINT(" fault_access_type: %s\n", uvm_fault_access_type_string(entry->fault_access_type));
|
||||
UVM_DBG_PRINT(" is_replayable: %s\n", entry->is_replayable? "true": "false");
|
||||
UVM_DBG_PRINT(" is_virtual: %s\n", entry->is_virtual? "true": "false");
|
||||
UVM_DBG_PRINT(" in_protected_mode: %s\n", entry->in_protected_mode? "true": "false");
|
||||
UVM_DBG_PRINT(" fault_source.client_type: %s\n", uvm_fault_client_type_string(entry->fault_source.client_type));
|
||||
UVM_DBG_PRINT(" fault_source.client_type: %s\n",
|
||||
uvm_fault_client_type_string(entry->fault_source.client_type));
|
||||
UVM_DBG_PRINT(" fault_source.client_id: %d\n", entry->fault_source.client_id);
|
||||
UVM_DBG_PRINT(" fault_source.gpc_id: %d\n", entry->fault_source.gpc_id);
|
||||
UVM_DBG_PRINT(" fault_source.mmu_engine_id: %d\n", entry->fault_source.mmu_engine_id);
|
||||
@@ -962,13 +1021,15 @@ const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_coun
|
||||
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry)
|
||||
{
|
||||
if (!entry->address.is_virtual) {
|
||||
UVM_DBG_PRINT("physical address: {0x%llx:%s}\n", entry->address.address,
|
||||
uvm_aperture_string(entry->address.aperture));
|
||||
UVM_DBG_PRINT("physical address: {0x%llx:%s}\n",
|
||||
entry->address.address,
|
||||
uvm_aperture_string(entry->address.aperture));
|
||||
}
|
||||
else {
|
||||
UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address.address);
|
||||
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n", entry->virtual_info.instance_ptr.address,
|
||||
uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n",
|
||||
entry->virtual_info.instance_ptr.address,
|
||||
uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->virtual_info.mmu_engine_type));
|
||||
UVM_DBG_PRINT(" mmu_engine_id %u\n", entry->virtual_info.mmu_engine_id);
|
||||
UVM_DBG_PRINT(" ve_id %u\n", entry->virtual_info.ve_id);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -112,6 +112,10 @@ void uvm_hal_pascal_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_turing_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
@@ -120,6 +124,10 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_blackwell_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar);
|
||||
|
||||
// Issue a TLB invalidate applying to the specified VA range in a PDB.
|
||||
//
|
||||
@@ -149,43 +157,57 @@ typedef void (*uvm_hal_host_tlb_invalidate_va_t)(uvm_push_t *push,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_turing_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_blackwell_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
|
||||
typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
@@ -196,12 +218,18 @@ void uvm_hal_maxwell_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
void uvm_hal_pascal_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
||||
void uvm_hal_turing_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
||||
void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
||||
void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
||||
void uvm_hal_blackwell_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
||||
|
||||
// By default all semaphore release operations include a membar sys before the
|
||||
// operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
|
||||
@@ -443,38 +471,31 @@ void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Retrieve the page-tree HAL for a given big page size
|
||||
typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU32 big_page_size);
|
||||
typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU64 big_page_size);
|
||||
typedef void (*uvm_hal_mmu_enable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
typedef void (*uvm_hal_mmu_disable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell(NvU64 big_page_size);
|
||||
void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_pascal_mmu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Convert a faulted MMU engine ID to a UVM engine type. Only engines which have
|
||||
// faults serviced by UVM are handled. On Pascal the only such engine is
|
||||
// GRAPHICS, so no translation is provided.
|
||||
typedef uvm_mmu_engine_type_t (*uvm_hal_mmu_engine_id_to_type_t)(NvU16 mmu_engine_id);
|
||||
uvm_mmu_engine_type_t uvm_hal_maxwell_mmu_engine_id_to_type_unsupported(NvU16 mmu_engine_id);
|
||||
uvm_mmu_engine_type_t uvm_hal_volta_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
||||
uvm_mmu_engine_type_t uvm_hal_turing_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
||||
uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
||||
uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
||||
|
||||
typedef NvU16 (*uvm_hal_mmu_client_id_to_utlb_id_t)(NvU16 client_id);
|
||||
NvU16 uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported(NvU16 client_id);
|
||||
NvU16 uvm_hal_pascal_mmu_client_id_to_utlb_id(NvU16 client_id);
|
||||
NvU16 uvm_hal_volta_mmu_client_id_to_utlb_id(NvU16 client_id);
|
||||
NvU16 uvm_hal_ampere_mmu_client_id_to_utlb_id(NvU16 client_id);
|
||||
NvU16 uvm_hal_hopper_mmu_client_id_to_utlb_id(NvU16 client_id);
|
||||
NvU16 uvm_hal_blackwell_mmu_client_id_to_utlb_id(NvU16 client_id);
|
||||
|
||||
// Replayable faults
|
||||
typedef void (*uvm_hal_enable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
@@ -484,6 +505,9 @@ typedef NvU32 (*uvm_hal_fault_buffer_read_put_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
typedef NvU32 (*uvm_hal_fault_buffer_read_get_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
typedef void (*uvm_hal_fault_buffer_write_get_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
typedef NvU8 (*uvm_hal_fault_buffer_get_ve_id_t)(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
|
||||
typedef uvm_mmu_engine_type_t (*uvm_hal_fault_buffer_get_mmu_engine_type_t)(NvU16 mmu_engine_id,
|
||||
uvm_fault_client_type_t client_type,
|
||||
NvU16 client_id);
|
||||
|
||||
// Parse the replayable entry at the given buffer index. This also clears the
|
||||
// valid bit of the entry in the buffer.
|
||||
@@ -521,6 +545,9 @@ NvU32 uvm_hal_maxwell_fault_buffer_read_put_unsupported(uvm_parent_gpu_t *parent
|
||||
NvU32 uvm_hal_maxwell_fault_buffer_read_get_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_fault_buffer_write_get_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
|
||||
uvm_mmu_engine_type_t uvm_hal_maxwell_fault_buffer_get_mmu_engine_type_unsupported(NvU16 mmu_engine_id,
|
||||
uvm_fault_client_type_t client_type,
|
||||
NvU16 client_id);
|
||||
uvm_fault_type_t uvm_hal_maxwell_fault_buffer_get_fault_type_unsupported(const NvU32 *fault_entry);
|
||||
|
||||
void uvm_hal_pascal_enable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
@@ -536,12 +563,31 @@ NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
|
||||
NvU32 uvm_hal_volta_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
NvU8 uvm_hal_volta_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
|
||||
uvm_mmu_engine_type_t uvm_hal_volta_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
|
||||
uvm_fault_client_type_t client_type,
|
||||
NvU16 client_id);
|
||||
|
||||
uvm_fault_type_t uvm_hal_volta_fault_buffer_get_fault_type(const NvU32 *fault_entry);
|
||||
|
||||
void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_turing_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
uvm_mmu_engine_type_t uvm_hal_turing_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
|
||||
uvm_fault_client_type_t client_type,
|
||||
NvU16 client_id);
|
||||
|
||||
uvm_mmu_engine_type_t uvm_hal_ampere_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
|
||||
uvm_fault_client_type_t client_type,
|
||||
NvU16 client_id);
|
||||
|
||||
NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
|
||||
uvm_mmu_engine_type_t uvm_hal_hopper_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
|
||||
uvm_fault_client_type_t client_type,
|
||||
NvU16 client_id);
|
||||
|
||||
uvm_mmu_engine_type_t uvm_hal_blackwell_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
|
||||
uvm_fault_client_type_t client_type,
|
||||
NvU16 client_id);
|
||||
uvm_fault_type_t uvm_hal_blackwell_fault_buffer_get_fault_type(const NvU32 *fault_entry);
|
||||
|
||||
bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
@@ -765,7 +811,6 @@ struct uvm_arch_hal_struct
|
||||
uvm_hal_lookup_mode_hal_t mmu_mode_hal;
|
||||
uvm_hal_mmu_enable_prefetch_faults_t enable_prefetch_faults;
|
||||
uvm_hal_mmu_disable_prefetch_faults_t disable_prefetch_faults;
|
||||
uvm_hal_mmu_engine_id_to_type_t mmu_engine_id_to_type;
|
||||
uvm_hal_mmu_client_id_to_utlb_id_t mmu_client_id_to_utlb_id;
|
||||
};
|
||||
|
||||
@@ -778,6 +823,7 @@ struct uvm_fault_buffer_hal_struct
|
||||
uvm_hal_fault_buffer_read_get_t read_get;
|
||||
uvm_hal_fault_buffer_write_get_t write_get;
|
||||
uvm_hal_fault_buffer_get_ve_id_t get_ve_id;
|
||||
uvm_hal_fault_buffer_get_mmu_engine_type_t get_mmu_engine_type;
|
||||
uvm_hal_fault_buffer_parse_replayable_entry_t parse_replayable_entry;
|
||||
uvm_hal_fault_buffer_entry_is_valid_t entry_is_valid;
|
||||
uvm_hal_fault_buffer_entry_clear_valid_t entry_clear_valid;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -300,6 +300,7 @@ typedef enum
|
||||
UVM_FAULT_TYPE_UNSUPPORTED_KIND,
|
||||
UVM_FAULT_TYPE_REGION_VIOLATION,
|
||||
UVM_FAULT_TYPE_POISONED,
|
||||
UVM_FAULT_TYPE_CC_VIOLATION,
|
||||
|
||||
UVM_FAULT_TYPE_COUNT
|
||||
} uvm_fault_type_t;
|
||||
@@ -399,6 +400,7 @@ struct uvm_fault_buffer_entry_struct
|
||||
//
|
||||
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
// This is set to true when some fault could not be serviced and a
|
||||
// cancel command needs to be issued
|
||||
@@ -490,9 +492,9 @@ struct uvm_access_counter_buffer_entry_struct
|
||||
// Address of the region for which a notification was sent
|
||||
uvm_gpu_address_t address;
|
||||
|
||||
// These fields are only valid if address.is_virtual is true
|
||||
union
|
||||
{
|
||||
// These fields are only valid if address.is_virtual is true
|
||||
struct
|
||||
{
|
||||
// Instance pointer of one of the channels in the TSG that triggered
|
||||
@@ -522,9 +524,14 @@ struct uvm_access_counter_buffer_entry_struct
|
||||
// a regular processor id because P2P is not allowed between
|
||||
// partitioned GPUs.
|
||||
uvm_processor_id_t resident_id;
|
||||
|
||||
} physical_info;
|
||||
};
|
||||
|
||||
// This is the GPU that triggered the notification. Note that physical
|
||||
// address based notifications are only supported on non-MIG-capable GPUs.
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
// Number of times the tracked region was accessed since the last time it
|
||||
// was cleared. Counter values saturate at the maximum value supported by
|
||||
// the GPU (2^16 - 1 in Volta)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -92,7 +92,7 @@ typedef struct
|
||||
{
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_va_block_retry_t *va_block_retry;
|
||||
uvm_va_block_context_t *va_block_context;
|
||||
uvm_service_block_context_t *service_context;
|
||||
uvm_va_block_region_t region;
|
||||
uvm_processor_id_t dest_id;
|
||||
uvm_make_resident_cause_t cause;
|
||||
@@ -284,8 +284,10 @@ static void hmm_va_block_unregister_gpu(uvm_va_block_t *va_block,
|
||||
|
||||
// Reset preferred location and accessed-by of policy nodes if needed.
|
||||
uvm_for_each_va_policy_node_in(node, va_block, va_block->start, va_block->end) {
|
||||
if (uvm_id_equal(node->policy.preferred_location, gpu->id))
|
||||
if (uvm_va_policy_preferred_location_equal(&node->policy, gpu->id, NUMA_NO_NODE)) {
|
||||
node->policy.preferred_location = UVM_ID_INVALID;
|
||||
node->policy.preferred_nid = NUMA_NO_NODE;
|
||||
}
|
||||
|
||||
uvm_processor_mask_clear(&node->policy.accessed_by, gpu->id);
|
||||
}
|
||||
@@ -713,7 +715,7 @@ void uvm_hmm_migrate_finish(uvm_va_block_t *va_block)
|
||||
// Migrate the given range [start end] within a va_block to dest_id.
|
||||
static NV_STATUS hmm_migrate_range(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
NvU64 start,
|
||||
NvU64 end,
|
||||
@@ -737,7 +739,7 @@ static NV_STATUS hmm_migrate_range(uvm_va_block_t *va_block,
|
||||
va_block_retry,
|
||||
uvm_va_block_migrate_locked(va_block,
|
||||
va_block_retry,
|
||||
va_block_context,
|
||||
service_context,
|
||||
region,
|
||||
dest_id,
|
||||
mode,
|
||||
@@ -916,14 +918,14 @@ static NV_STATUS split_block_if_needed(uvm_va_block_t *va_block,
|
||||
NvU64 end,
|
||||
uvm_va_block_t **out_va_block)
|
||||
{
|
||||
uvm_va_block_context_t *va_block_context;
|
||||
uvm_service_block_context_t *service_context;
|
||||
uvm_va_space_t *va_space;
|
||||
struct mm_struct *mm;
|
||||
struct vm_area_struct *vma;
|
||||
uvm_va_block_region_t region;
|
||||
NvU64 addr, from, to;
|
||||
uvm_va_block_t *new;
|
||||
NV_STATUS status;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (va_block->start < start) {
|
||||
status = hmm_split_block(va_block, start - 1, &new);
|
||||
@@ -942,15 +944,18 @@ static NV_STATUS split_block_if_needed(uvm_va_block_t *va_block,
|
||||
// Keep the right part, the left part will be deleted.
|
||||
}
|
||||
|
||||
*out_va_block = va_block;
|
||||
|
||||
// Migrate any GPU data to sysmem before destroying the HMM va_block.
|
||||
// We do this because the new va_range might be for a UVM external
|
||||
// allocation which could be converting an address range that was first
|
||||
// operated on by UVM-HMM and the exteral allocation should see that data.
|
||||
va_space = va_block->hmm.va_space;
|
||||
mm = va_space->va_space_mm.mm;
|
||||
va_block_context = uvm_va_space_block_context(va_space, mm);
|
||||
|
||||
service_context = uvm_service_block_context_alloc(mm);
|
||||
if (!service_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
*out_va_block = va_block;
|
||||
|
||||
for (addr = va_block->start; addr < va_block->end; addr = to + 1) {
|
||||
vma = find_vma_intersection(mm, addr, va_block->end);
|
||||
@@ -964,21 +969,23 @@ static NV_STATUS split_block_if_needed(uvm_va_block_t *va_block,
|
||||
if (!uvm_hmm_vma_is_valid(vma, from, false))
|
||||
continue;
|
||||
|
||||
va_block_context->hmm.vma = vma;
|
||||
service_context->block_context->hmm.vma = vma;
|
||||
|
||||
status = hmm_migrate_range(va_block,
|
||||
NULL,
|
||||
va_block_context,
|
||||
service_context,
|
||||
UVM_ID_CPU,
|
||||
from,
|
||||
to,
|
||||
UVM_MIGRATE_MODE_MAKE_RESIDENT_AND_MAP,
|
||||
NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
break;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
uvm_service_block_context_free(service_context);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Normally, the HMM va_block is destroyed when the va_space is destroyed
|
||||
@@ -1089,12 +1096,17 @@ static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,
|
||||
NvU64 end,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
uvm_processor_mask_t set_accessed_by_processors;
|
||||
uvm_processor_mask_t *set_accessed_by_processors;
|
||||
const uvm_va_policy_t *old_policy;
|
||||
uvm_va_policy_node_t *node;
|
||||
uvm_va_block_region_t region;
|
||||
uvm_processor_id_t id;
|
||||
NV_STATUS status, tracker_status;
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS tracker_status;
|
||||
|
||||
set_accessed_by_processors = uvm_processor_mask_cache_alloc();
|
||||
if (!set_accessed_by_processors)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
// Note that we can't just call uvm_va_policy_set_range() for the whole
|
||||
// range [addr end] because we need to examine the old value of
|
||||
@@ -1107,25 +1119,27 @@ static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,
|
||||
// If the old preferred location is a valid processor ID, remote
|
||||
// mappings should be established to the new preferred location if
|
||||
// accessed-by is set.
|
||||
uvm_processor_mask_zero(&set_accessed_by_processors);
|
||||
uvm_processor_mask_zero(set_accessed_by_processors);
|
||||
|
||||
if (UVM_ID_IS_VALID(old_policy->preferred_location) &&
|
||||
uvm_processor_mask_test(&old_policy->accessed_by, old_policy->preferred_location))
|
||||
uvm_processor_mask_set(&set_accessed_by_processors, old_policy->preferred_location);
|
||||
uvm_processor_mask_set(set_accessed_by_processors, old_policy->preferred_location);
|
||||
|
||||
if (!uvm_va_policy_set_preferred_location(va_block,
|
||||
region,
|
||||
preferred_location,
|
||||
preferred_cpu_nid,
|
||||
old_policy))
|
||||
return NV_ERR_NO_MEMORY;
|
||||
old_policy)) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
// Establish new remote mappings if the old preferred location had
|
||||
// accessed-by set.
|
||||
for_each_id_in_mask(id, &set_accessed_by_processors) {
|
||||
for_each_id_in_mask(id, set_accessed_by_processors) {
|
||||
status = uvm_va_block_set_accessed_by_locked(va_block, va_block_context, id, region, out_tracker);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
break;
|
||||
}
|
||||
|
||||
// Even though the UVM_VA_BLOCK_RETRY_LOCKED() may unlock and relock
|
||||
@@ -1143,10 +1157,11 @@ static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,
|
||||
status = tracker_status;
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
break;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
uvm_processor_mask_cache_free(set_accessed_by_processors);
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
|
||||
@@ -1586,7 +1601,7 @@ static void hmm_va_block_cpu_unpopulate_chunk(uvm_va_block_t *va_block,
|
||||
UVM_ASSERT(uvm_cpu_chunk_get_size(chunk) == PAGE_SIZE);
|
||||
|
||||
uvm_cpu_chunk_remove_from_block(va_block, chunk_nid, page_index);
|
||||
uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk, page_index);
|
||||
uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk);
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
}
|
||||
|
||||
@@ -1691,8 +1706,6 @@ static void gpu_chunk_remove(uvm_va_block_t *va_block,
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: Bug 3898467: unmap indirect peers when freeing GPU chunks
|
||||
|
||||
uvm_mmu_chunk_unmap(gpu_chunk, &va_block->tracker);
|
||||
gpu_state->chunks[page_index] = NULL;
|
||||
}
|
||||
@@ -1741,8 +1754,6 @@ static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// TODO: Bug 3898467: map indirect peers.
|
||||
|
||||
uvm_processor_mask_set(&va_block->resident, id);
|
||||
uvm_page_mask_set(&gpu_state->resident, page_index);
|
||||
|
||||
@@ -2128,6 +2139,7 @@ static NV_STATUS migrate_alloc_on_cpu(uvm_va_block_t *va_block,
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_devmem_fault_alloc_and_copy(uvm_hmm_devmem_fault_context_t *devmem_fault_context)
|
||||
{
|
||||
uvm_processor_id_t processor_id;
|
||||
@@ -2262,7 +2274,7 @@ static NV_STATUS populate_region(uvm_va_block_t *va_block,
|
||||
// uvm_hmm_invalidate() should handle that if the underlying page
|
||||
// is invalidated.
|
||||
// Also note there can be an allocated page due to GPU-to-GPU
|
||||
// migration between non-peer or indirect peer GPUs.
|
||||
// migration between non-peer GPUs.
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -2400,6 +2412,7 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
|
||||
{
|
||||
uvm_va_block_region_t region = service_context->region;
|
||||
struct page **pages = service_context->block_context->hmm.pages;
|
||||
struct vm_area_struct *vma = service_context->block_context->hmm.vma;
|
||||
int npages;
|
||||
uvm_page_index_t page_index;
|
||||
uvm_make_resident_cause_t cause;
|
||||
@@ -2417,12 +2430,9 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
|
||||
else
|
||||
cause = UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
|
||||
|
||||
status = uvm_hmm_va_block_migrate_locked(va_block,
|
||||
va_block_retry,
|
||||
service_context->block_context,
|
||||
UVM_ID_CPU,
|
||||
region,
|
||||
cause);
|
||||
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, vma, region));
|
||||
|
||||
status = uvm_hmm_va_block_migrate_locked(va_block, va_block_retry, service_context, UVM_ID_CPU, region, cause);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
@@ -2439,7 +2449,7 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
|
||||
// mmap() files so we check for that here and report a fatal fault.
|
||||
// Otherwise with the current Linux 6.1 make_device_exclusive_range(),
|
||||
// it doesn't make the page exclusive and we end up in an endless loop.
|
||||
if (service_context->block_context->hmm.vma->vm_flags & (VM_SHARED | VM_HUGETLB)) {
|
||||
if (vma->vm_flags & (VM_SHARED | VM_HUGETLB)) {
|
||||
status = NV_ERR_NOT_SUPPORTED;
|
||||
goto done;
|
||||
}
|
||||
@@ -2662,6 +2672,8 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
UVM_ASSERT(service_context);
|
||||
|
||||
for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
|
||||
struct page *src_page;
|
||||
|
||||
@@ -2966,7 +2978,7 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma,
|
||||
{
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_va_block_retry_t *va_block_retry;
|
||||
uvm_va_block_context_t *va_block_context;
|
||||
uvm_service_block_context_t *service_context;
|
||||
const unsigned long *src_pfns;
|
||||
unsigned long *dst_pfns;
|
||||
uvm_va_block_region_t region;
|
||||
@@ -2976,9 +2988,9 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma,
|
||||
|
||||
va_block = uvm_hmm_migrate_event->va_block;
|
||||
va_block_retry = uvm_hmm_migrate_event->va_block_retry;
|
||||
va_block_context = uvm_hmm_migrate_event->va_block_context;
|
||||
src_pfns = va_block_context->hmm.src_pfns;
|
||||
dst_pfns = va_block_context->hmm.dst_pfns;
|
||||
service_context = uvm_hmm_migrate_event->service_context;
|
||||
src_pfns = service_context->block_context->hmm.src_pfns;
|
||||
dst_pfns = service_context->block_context->hmm.dst_pfns;
|
||||
region = uvm_hmm_migrate_event->region;
|
||||
dest_id = uvm_hmm_migrate_event->dest_id;
|
||||
page_mask = &uvm_hmm_migrate_event->page_mask;
|
||||
@@ -2994,7 +3006,7 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma,
|
||||
region,
|
||||
page_mask,
|
||||
&uvm_hmm_migrate_event->same_devmem_page_mask,
|
||||
va_block_context);
|
||||
service_context->block_context);
|
||||
}
|
||||
else {
|
||||
status = dmamap_src_sysmem_pages(va_block,
|
||||
@@ -3004,14 +3016,15 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma,
|
||||
region,
|
||||
page_mask,
|
||||
dest_id,
|
||||
NULL);
|
||||
service_context);
|
||||
}
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = uvm_va_block_make_resident_copy(va_block,
|
||||
va_block_retry,
|
||||
va_block_context,
|
||||
service_context->block_context,
|
||||
dest_id,
|
||||
region,
|
||||
page_mask,
|
||||
@@ -3050,7 +3063,7 @@ static NV_STATUS uvm_hmm_migrate_finalize(uvm_hmm_migrate_event_t *uvm_hmm_migra
|
||||
|
||||
va_block = uvm_hmm_migrate_event->va_block;
|
||||
va_block_retry = uvm_hmm_migrate_event->va_block_retry;
|
||||
va_block_context = uvm_hmm_migrate_event->va_block_context;
|
||||
va_block_context = uvm_hmm_migrate_event->service_context->block_context;
|
||||
region = uvm_hmm_migrate_event->region;
|
||||
dest_id = uvm_hmm_migrate_event->dest_id;
|
||||
page_mask = &uvm_hmm_migrate_event->page_mask;
|
||||
@@ -3090,12 +3103,13 @@ static NV_STATUS uvm_hmm_migrate_finalize(uvm_hmm_migrate_event_t *uvm_hmm_migra
|
||||
// TODO: Bug 3900785: investigate ways to implement async migration.
|
||||
NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_make_resident_cause_t cause)
|
||||
{
|
||||
uvm_hmm_migrate_event_t uvm_hmm_migrate_event;
|
||||
uvm_va_block_context_t *va_block_context = service_context->block_context;
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
NvU64 start;
|
||||
NvU64 end;
|
||||
@@ -3106,6 +3120,7 @@ NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(va_block_context->mm == vma->vm_mm);
|
||||
uvm_assert_mmap_lock_locked(va_block_context->mm);
|
||||
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, vma, region));
|
||||
uvm_assert_rwsem_locked(&va_block->hmm.va_space->lock);
|
||||
uvm_assert_mutex_locked(&va_block->hmm.migrate_lock);
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
@@ -3116,7 +3131,7 @@ NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
|
||||
uvm_hmm_migrate_event.va_block = va_block;
|
||||
uvm_hmm_migrate_event.va_block_retry = va_block_retry;
|
||||
uvm_hmm_migrate_event.va_block_context = va_block_context;
|
||||
uvm_hmm_migrate_event.service_context = service_context;
|
||||
uvm_hmm_migrate_event.region = region;
|
||||
uvm_hmm_migrate_event.dest_id = dest_id;
|
||||
uvm_hmm_migrate_event.cause = cause;
|
||||
@@ -3202,7 +3217,7 @@ NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
NvU64 base,
|
||||
NvU64 length,
|
||||
uvm_processor_id_t dest_id,
|
||||
@@ -3214,11 +3229,12 @@ NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
NvU64 addr, end, last_address;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_block_context_t *block_context = service_context->block_context;
|
||||
|
||||
if (!uvm_hmm_is_enabled(va_space))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
mm = va_block_context->mm;
|
||||
mm = block_context->mm;
|
||||
UVM_ASSERT(mm == va_space->va_space_mm.mm);
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
@@ -3228,7 +3244,7 @@ NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
for (addr = base; addr < last_address; addr = end + 1) {
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
status = hmm_va_block_find_create(va_space, addr, false, &va_block_context->hmm.vma, &va_block);
|
||||
status = hmm_va_block_find_create(va_space, addr, false, &block_context->hmm.vma, &va_block);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
@@ -3236,18 +3252,11 @@ NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
if (end > last_address)
|
||||
end = last_address;
|
||||
|
||||
vma = va_block_context->hmm.vma;
|
||||
vma = block_context->hmm.vma;
|
||||
if (end > vma->vm_end - 1)
|
||||
end = vma->vm_end - 1;
|
||||
|
||||
status = hmm_migrate_range(va_block,
|
||||
&va_block_retry,
|
||||
va_block_context,
|
||||
dest_id,
|
||||
addr,
|
||||
end,
|
||||
mode,
|
||||
out_tracker);
|
||||
status = hmm_migrate_range(va_block, &va_block_retry, service_context, dest_id, addr, end, mode, out_tracker);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
@@ -3283,12 +3292,13 @@ NV_STATUS uvm_hmm_va_block_evict_chunk_prep(uvm_va_block_t *va_block,
|
||||
// Note that the caller must initialize va_block_context->hmm.src_pfns by
|
||||
// calling uvm_hmm_va_block_evict_chunk_prep() before calling this.
|
||||
static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_make_resident_cause_t cause,
|
||||
bool *out_accessed_by_set)
|
||||
{
|
||||
uvm_va_block_context_t *va_block_context = service_context->block_context;
|
||||
NvU64 start = uvm_va_block_region_start(va_block, region);
|
||||
NvU64 end = uvm_va_block_region_end(va_block, region);
|
||||
unsigned long *src_pfns = va_block_context->hmm.src_pfns;
|
||||
@@ -3296,7 +3306,7 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
uvm_hmm_migrate_event_t uvm_hmm_migrate_event = {
|
||||
.va_block = va_block,
|
||||
.va_block_retry = NULL,
|
||||
.va_block_context = va_block_context,
|
||||
.service_context = service_context,
|
||||
.region = region,
|
||||
.dest_id = UVM_ID_CPU,
|
||||
.cause = cause,
|
||||
@@ -3329,13 +3339,7 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
// TODO: Bug 3660922: Need to handle read duplication at some point.
|
||||
UVM_ASSERT(uvm_page_mask_region_empty(cpu_resident_mask, region));
|
||||
|
||||
status = migrate_alloc_on_cpu(va_block,
|
||||
src_pfns,
|
||||
dst_pfns,
|
||||
region,
|
||||
page_mask,
|
||||
NULL,
|
||||
va_block_context);
|
||||
status = migrate_alloc_on_cpu(va_block, src_pfns, dst_pfns, region, page_mask, NULL, va_block_context);
|
||||
if (status != NV_OK)
|
||||
goto err;
|
||||
|
||||
@@ -3369,13 +3373,13 @@ err:
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region,
|
||||
bool *out_accessed_by_set)
|
||||
{
|
||||
return hmm_va_block_evict_chunks(va_block,
|
||||
va_block_context,
|
||||
service_context,
|
||||
pages_to_evict,
|
||||
region,
|
||||
UVM_MAKE_RESIDENT_CAUSE_EVICTION,
|
||||
@@ -3384,11 +3388,12 @@ NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
|
||||
NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
unsigned long *src_pfns = va_block_context->hmm.src_pfns;
|
||||
uvm_va_block_context_t *block_context = service_context->block_context;
|
||||
unsigned long *src_pfns = block_context->hmm.src_pfns;
|
||||
uvm_va_block_gpu_state_t *gpu_state;
|
||||
uvm_page_index_t page_index;
|
||||
uvm_gpu_chunk_t *gpu_chunk;
|
||||
@@ -3401,7 +3406,7 @@ NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||
UVM_ASSERT(gpu_state->chunks);
|
||||
|
||||
// Fill in the src_pfns[] with the ZONE_DEVICE private PFNs of the GPU.
|
||||
memset(src_pfns, 0, sizeof(va_block_context->hmm.src_pfns));
|
||||
memset(src_pfns, 0, sizeof(block_context->hmm.src_pfns));
|
||||
|
||||
// TODO: Bug 3368756: add support for large GPU pages.
|
||||
for_each_va_block_page_in_region_mask(page_index, pages_to_evict, region) {
|
||||
@@ -3409,7 +3414,7 @@ NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||
gpu,
|
||||
uvm_va_block_cpu_page_address(va_block, page_index));
|
||||
status = uvm_hmm_va_block_evict_chunk_prep(va_block,
|
||||
va_block_context,
|
||||
block_context,
|
||||
gpu_chunk,
|
||||
uvm_va_block_region_for_page(page_index));
|
||||
if (status != NV_OK)
|
||||
@@ -3417,7 +3422,7 @@ NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||
}
|
||||
|
||||
return hmm_va_block_evict_chunks(va_block,
|
||||
va_block_context,
|
||||
service_context,
|
||||
pages_to_evict,
|
||||
region,
|
||||
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE,
|
||||
|
||||
@@ -287,16 +287,17 @@ typedef struct
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_service_block_context_t *service_context);
|
||||
|
||||
// This is called to migrate a region within a HMM va_block.
|
||||
// va_block_context must not be NULL and va_block_context->hmm.vma
|
||||
// must be valid.
|
||||
// This is called to migrate a region within a HMM va_block. service_context
|
||||
// must not be NULL, service_context->va_block_context must not be NULL and
|
||||
// service_context->va_block_context->hmm.vma must be valid.
|
||||
//
|
||||
// Special return values (besides things like NV_ERR_NO_MEMORY):
|
||||
// NV_WARN_MORE_PROCESSING_REQUIRED indicates that one or more pages could
|
||||
// not be migrated and that a retry might succeed after unlocking the
|
||||
// va_block lock, va_space lock, and mmap lock.
|
||||
NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_make_resident_cause_t cause);
|
||||
@@ -304,13 +305,14 @@ typedef struct
|
||||
// This is called to migrate an address range of HMM allocations via
|
||||
// UvmMigrate().
|
||||
//
|
||||
// va_block_context must not be NULL. The caller is not required to set
|
||||
// va_block_context->hmm.vma.
|
||||
// service_context and service_context->va_block_context must not be NULL.
|
||||
// The caller is not required to set
|
||||
// service_context->va_block_context->hmm.vma.
|
||||
//
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked and
|
||||
// the va_space read lock must be held.
|
||||
NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
NvU64 base,
|
||||
NvU64 length,
|
||||
uvm_processor_id_t dest_id,
|
||||
@@ -329,27 +331,31 @@ typedef struct
|
||||
uvm_gpu_chunk_t *gpu_chunk,
|
||||
uvm_va_block_region_t chunk_region);
|
||||
|
||||
// Migrate pages to system memory for the given page mask.
|
||||
// Note that the mmap lock is not held and there is no MM retained.
|
||||
// This must be called after uvm_hmm_va_block_evict_chunk_prep() has
|
||||
// initialized va_block_context->hmm.src_pfns[] for the source GPU physical
|
||||
// PFNs being migrated. Note that the input mask 'pages_to_evict' can be
|
||||
// modified. If any of the evicted pages has the accessed by policy set,
|
||||
// then record that by setting out_accessed_by_set.
|
||||
// Migrate pages to system memory for the given page mask. Note that the
|
||||
// mmap lock is not held and there is no MM retained. This must be called
|
||||
// after uvm_hmm_va_block_evict_chunk_prep() has initialized
|
||||
// service_context->va_block_context->hmm.src_pfns[] for the source GPU
|
||||
// physical PFNs being migrated. Note that the input mask 'pages_to_evict'
|
||||
// can be modified. If any of the evicted pages has the accessed by policy
|
||||
// set, then record that by setting out_accessed_by_set.
|
||||
// The caller is not required to set
|
||||
// service_context->va_block_context->hmm.vma, it will be cleared in
|
||||
// uvm_hmm_va_block_evict_chunks().
|
||||
// Locking: the va_block lock must be locked.
|
||||
NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region,
|
||||
bool *out_accessed_by_set);
|
||||
|
||||
// Migrate pages from the given GPU to system memory for the given page
|
||||
// mask and region. va_block_context must not be NULL.
|
||||
// Note that the mmap lock is not held and there is no MM retained.
|
||||
// Migrate pages from the given GPU to system memory for the given page mask
|
||||
// and region. uvm_service_block_context_t and
|
||||
// uvm_service_block_context_t->va_block_context must not be NULL. Note that
|
||||
// the mmap lock is not held and there is no MM retained.
|
||||
// Locking: the va_block lock must be locked.
|
||||
NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
@@ -572,7 +578,7 @@ typedef struct
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_make_resident_cause_t cause)
|
||||
@@ -581,7 +587,7 @@ typedef struct
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
NvU64 base,
|
||||
NvU64 length,
|
||||
uvm_processor_id_t dest_id,
|
||||
@@ -606,7 +612,7 @@ typedef struct
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region,
|
||||
bool *out_accessed_by_set)
|
||||
@@ -616,7 +622,7 @@ typedef struct
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
|
||||
@@ -27,6 +27,24 @@
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_hopper_fault_buffer.h"
|
||||
|
||||
static uvm_gpu_peer_copy_mode_t hopper_peer_copy_mode(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
// In Confidential Computing the Copy Engine supports encrypted copies
|
||||
// between peers. But in Hopper these transfers require significant
|
||||
// software support (ex: unprotected vidmem), so in practice they are not
|
||||
// allowed.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return UVM_GPU_PEER_COPY_MODE_UNSUPPORTED;
|
||||
|
||||
// TODO: Bug 4174553: In some Grace Hopper setups, physical peer copies
|
||||
// result on errors. Force peer copies to use virtual addressing until the
|
||||
// issue is clarified.
|
||||
if (uvm_parent_gpu_is_coherent(parent_gpu))
|
||||
return UVM_GPU_PEER_COPY_MODE_VIRTUAL;
|
||||
|
||||
return g_uvm_global.peer_copy_mode;
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
parent_gpu->tlb_batch.va_invalidate_supported = true;
|
||||
@@ -58,14 +76,10 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->flat_vidmem_va_base = (64 * UVM_SIZE_1PB) + (32 * UVM_SIZE_1TB);
|
||||
|
||||
// Physical CE writes to vidmem are non-coherent with respect to the CPU on
|
||||
// GH180.
|
||||
// Grace Hopper.
|
||||
parent_gpu->ce_phys_vidmem_write_supported = !uvm_parent_gpu_is_coherent(parent_gpu);
|
||||
|
||||
// TODO: Bug 4174553: [HGX-SkinnyJoe][GH180] channel errors discussion/debug
|
||||
// portion for the uvm tests became nonresponsive after
|
||||
// some time and then failed even after reboot
|
||||
parent_gpu->peer_copy_mode = uvm_parent_gpu_is_coherent(parent_gpu) ?
|
||||
UVM_GPU_PEER_COPY_MODE_VIRTUAL : g_uvm_global.peer_copy_mode;
|
||||
parent_gpu->peer_copy_mode = hopper_peer_copy_mode(parent_gpu);
|
||||
|
||||
// All GR context buffers may be mapped to 57b wide VAs. All "compute" units
|
||||
// accessing GR context buffers support the 57-bit VA range.
|
||||
|
||||
@@ -480,7 +480,6 @@ static NvU64 encrypt_iv_address(uvm_push_t *push, uvm_gpu_address_t dst)
|
||||
return iv_address;
|
||||
}
|
||||
|
||||
// TODO: Bug 3842953: adapt CE encrypt/decrypt for p2p encrypted transfers
|
||||
void uvm_hal_hopper_ce_encrypt(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
uvm_gpu_address_t src,
|
||||
@@ -530,7 +529,6 @@ void uvm_hal_hopper_ce_encrypt(uvm_push_t *push,
|
||||
encrypt_or_decrypt(push, dst, src, size);
|
||||
}
|
||||
|
||||
// TODO: Bug 3842953: adapt CE encrypt/decrypt for p2p encrypted transfers
|
||||
void uvm_hal_hopper_ce_decrypt(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
uvm_gpu_address_t src,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020 NVIDIA Corporation
|
||||
Copyright (c) 2020-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -21,6 +21,7 @@
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_hal_types.h"
|
||||
#include "hwref/hopper/gh100/dev_fault.h"
|
||||
|
||||
@@ -40,3 +41,49 @@ NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_t
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static bool client_id_ce(NvU16 client_id)
|
||||
{
|
||||
if (client_id >= NV_PFAULT_CLIENT_HUB_HSCE0 && client_id <= NV_PFAULT_CLIENT_HUB_HSCE9)
|
||||
return true;
|
||||
|
||||
if (client_id >= NV_PFAULT_CLIENT_HUB_HSCE10 && client_id <= NV_PFAULT_CLIENT_HUB_HSCE15)
|
||||
return true;
|
||||
|
||||
switch (client_id) {
|
||||
case NV_PFAULT_CLIENT_HUB_CE0:
|
||||
case NV_PFAULT_CLIENT_HUB_CE1:
|
||||
case NV_PFAULT_CLIENT_HUB_CE2:
|
||||
case NV_PFAULT_CLIENT_HUB_CE3:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
uvm_mmu_engine_type_t uvm_hal_hopper_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
|
||||
uvm_fault_client_type_t client_type,
|
||||
NvU16 client_id)
|
||||
{
|
||||
// Servicing CE and Host (HUB clients) faults.
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_HUB) {
|
||||
if (client_id_ce(client_id)) {
|
||||
UVM_ASSERT(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_CE0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_CE9);
|
||||
|
||||
return UVM_MMU_ENGINE_TYPE_CE;
|
||||
}
|
||||
|
||||
if (client_id == NV_PFAULT_CLIENT_HUB_HOST || client_id == NV_PFAULT_CLIENT_HUB_ESC) {
|
||||
UVM_ASSERT(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST44);
|
||||
|
||||
return UVM_MMU_ENGINE_TYPE_HOST;
|
||||
}
|
||||
}
|
||||
|
||||
// We shouldn't be servicing faults from any other engines other than GR.
|
||||
UVM_ASSERT_MSG(client_id <= NV_PFAULT_CLIENT_GPC_ROP_3, "Unexpected client ID: 0x%x\n", client_id);
|
||||
UVM_ASSERT_MSG(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS, "Unexpected engine ID: 0x%x\n", mmu_engine_id);
|
||||
UVM_ASSERT(client_type == UVM_FAULT_CLIENT_TYPE_GPC);
|
||||
|
||||
return UVM_MMU_ENGINE_TYPE_GRAPHICS;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2022 NVIDIA Corporation
|
||||
Copyright (c) 2020-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -157,6 +157,7 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
|
||||
@@ -183,7 +184,12 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
@@ -196,7 +202,9 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
@@ -204,7 +212,7 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
@@ -212,6 +220,7 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 va_lo;
|
||||
NvU32 va_hi;
|
||||
NvU64 end;
|
||||
@@ -221,9 +230,9 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 log2_invalidation_size;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
|
||||
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
|
||||
|
||||
// The invalidation size must be a power-of-two number of pages containing
|
||||
@@ -277,8 +286,13 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
sysmembar_value |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
@@ -292,7 +306,9 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
@@ -300,12 +316,12 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
|
||||
{
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 invalidate_gpc_value = 0;
|
||||
NvU32 aperture_value = 0;
|
||||
NvU32 pdb_lo = 0;
|
||||
NvU32 pdb_hi = 0;
|
||||
NvU32 page_table_level = 0;
|
||||
uvm_membar_t membar;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
@@ -332,6 +348,11 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (params->membar == UvmInvalidateTlbMemBarSys)
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
if (params->disable_gpc_invalidate)
|
||||
invalidate_gpc_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
|
||||
else
|
||||
@@ -343,7 +364,7 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
NvU32 va_lo = va & HWMASK(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
NvU32 va_hi = va >> HWSIZE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
@@ -358,7 +379,7 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
else {
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
@@ -372,14 +393,9 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
|
||||
if (params->membar == UvmInvalidateTlbMemBarSys)
|
||||
membar = UVM_MEMBAR_SYS;
|
||||
else if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
membar = UVM_MEMBAR_GPU;
|
||||
else
|
||||
membar = UVM_MEMBAR_NONE;
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base(NvU64 *fifo_entry, NvU64 pushbuffer_va)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2023 NVIDIA Corporation
|
||||
Copyright (c) 2020-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -47,21 +47,7 @@
|
||||
#define ATS_ALLOWED 0
|
||||
#define ATS_NOT_ALLOWED 1
|
||||
|
||||
uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id)
|
||||
{
|
||||
if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST44)
|
||||
return UVM_MMU_ENGINE_TYPE_HOST;
|
||||
|
||||
if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_CE0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_CE9)
|
||||
return UVM_MMU_ENGINE_TYPE_CE;
|
||||
|
||||
// We shouldn't be servicing faults from any other engines
|
||||
UVM_ASSERT_MSG(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS, "Unexpected engine ID: 0x%x\n", mmu_engine_id);
|
||||
|
||||
return UVM_MMU_ENGINE_TYPE_GRAPHICS;
|
||||
}
|
||||
|
||||
static NvU32 page_table_depth_hopper(NvU32 page_size)
|
||||
static NvU32 page_table_depth_hopper(NvU64 page_size)
|
||||
{
|
||||
// The common-case is page_size == UVM_PAGE_SIZE_2M, hence the first check
|
||||
if (page_size == UVM_PAGE_SIZE_2M)
|
||||
@@ -79,7 +65,7 @@ static NvU32 entries_per_index_hopper(NvU32 depth)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static NvLength entry_offset_hopper(NvU32 depth, NvU32 page_size)
|
||||
static NvLength entry_offset_hopper(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 6);
|
||||
if ((page_size == UVM_PAGE_SIZE_4K) && (depth == 4))
|
||||
@@ -92,7 +78,7 @@ static NvLength entry_size_hopper(NvU32 depth)
|
||||
return entries_per_index_hopper(depth) * 8;
|
||||
}
|
||||
|
||||
static NvU32 index_bits_hopper(NvU32 depth, NvU32 page_size)
|
||||
static NvU32 index_bits_hopper(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
static const NvU32 bit_widths[] = {1, 9, 9, 9, 8};
|
||||
|
||||
@@ -120,7 +106,7 @@ static NvU32 num_va_bits_hopper(void)
|
||||
return 57;
|
||||
}
|
||||
|
||||
static NvLength allocation_size_hopper(NvU32 depth, NvU32 page_size)
|
||||
static NvLength allocation_size_hopper(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 6);
|
||||
if (depth == 5 && page_size == UVM_PAGE_SIZE_64K)
|
||||
@@ -233,7 +219,7 @@ static NvU64 make_sparse_pte_hopper(void)
|
||||
HWCONST64(_MMU_VER3, PTE, PCF, SPARSE);
|
||||
}
|
||||
|
||||
static NvU64 unmapped_pte_hopper(NvU32 page_size)
|
||||
static NvU64 unmapped_pte_hopper(NvU64 page_size)
|
||||
{
|
||||
// Setting PCF to NO_VALID_4KB_PAGE on an otherwise-zeroed big PTE causes
|
||||
// the corresponding 4k PTEs to be ignored. This allows the invalidation of
|
||||
@@ -490,7 +476,7 @@ static void make_pde_hopper(void *entry,
|
||||
|
||||
static uvm_mmu_mode_hal_t hopper_mmu_mode_hal;
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size)
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size)
|
||||
{
|
||||
static bool initialized = false;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2023 NVidia Corporation
|
||||
Copyright (c) 2013-2024 NVidia Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -494,7 +494,7 @@ typedef struct
|
||||
NvU64 base NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 length NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 offset NV_ALIGN_BYTES(8); // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS]; // IN
|
||||
NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN
|
||||
NvS32 rmCtrlFd; // IN
|
||||
NvU32 hClient; // IN
|
||||
@@ -837,12 +837,6 @@ typedef struct
|
||||
// Initialize any tracker object such as a queue or counter
|
||||
// UvmToolsCreateEventQueue, UvmToolsCreateProcessAggregateCounters,
|
||||
// UvmToolsCreateProcessorCounters.
|
||||
// Note that the order of structure elements has the version as the last field.
|
||||
// This is used to tell whether the kernel supports V2 events or not because
|
||||
// the V1 UVM_TOOLS_INIT_EVENT_TRACKER ioctl would not read or update that
|
||||
// field but V2 will. This is needed because it is possible to create an event
|
||||
// queue before CUDA is initialized which means UvmSetDriverVersion() hasn't
|
||||
// been called yet and the kernel version is unknown.
|
||||
//
|
||||
#define UVM_TOOLS_INIT_EVENT_TRACKER UVM_IOCTL_BASE(56)
|
||||
typedef struct
|
||||
@@ -853,9 +847,8 @@ typedef struct
|
||||
NvProcessorUuid processor; // IN
|
||||
NvU32 allProcessors; // IN
|
||||
NvU32 uvmFd; // IN
|
||||
NvU32 version; // IN (UvmToolsEventQueueVersion)
|
||||
NV_STATUS rmStatus; // OUT
|
||||
NvU32 requestedVersion; // IN
|
||||
NvU32 grantedVersion; // OUT
|
||||
} UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS;
|
||||
|
||||
//
|
||||
@@ -936,23 +929,15 @@ typedef struct
|
||||
|
||||
//
|
||||
// UvmToolsGetProcessorUuidTable
|
||||
// Note that tablePtr != 0 and count == 0 means that tablePtr is assumed to be
|
||||
// an array of size UVM_MAX_PROCESSORS_V1 and that only UvmEventEntry_V1
|
||||
// processor IDs (physical GPU UUIDs) will be reported.
|
||||
// tablePtr == 0 and count == 0 can be used to query how many processors are
|
||||
// present in order to dynamically allocate the correct size array since the
|
||||
// total number of processors is returned in 'count'.
|
||||
//
|
||||
#define UVM_TOOLS_GET_PROCESSOR_UUID_TABLE UVM_IOCTL_BASE(64)
|
||||
typedef struct
|
||||
{
|
||||
NvU64 tablePtr NV_ALIGN_BYTES(8); // IN
|
||||
NvU32 count; // IN/OUT
|
||||
NvU32 version; // IN (UvmToolsEventQueueVersion)
|
||||
NV_STATUS rmStatus; // OUT
|
||||
NvU32 version; // OUT
|
||||
} UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS;
|
||||
|
||||
|
||||
//
|
||||
// UvmMapDynamicParallelismRegion
|
||||
//
|
||||
@@ -995,7 +980,7 @@ typedef struct
|
||||
{
|
||||
NvU64 base NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 length NV_ALIGN_BYTES(8); // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS]; // IN
|
||||
NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_ALLOC_SEMAPHORE_POOL_PARAMS;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -39,6 +39,7 @@
|
||||
#include "uvm_pte_batch.h"
|
||||
#include "uvm_tlb_batch.h"
|
||||
#include "nv_uvm_interface.h"
|
||||
#include "nv_uvm_types.h"
|
||||
|
||||
#include "uvm_pushbuffer.h"
|
||||
|
||||
@@ -60,7 +61,7 @@ typedef struct
|
||||
size_t buffer_size;
|
||||
|
||||
// Page size in bytes
|
||||
NvU32 page_size;
|
||||
NvU64 page_size;
|
||||
|
||||
// Size of a single PTE in bytes
|
||||
NvU32 pte_size;
|
||||
@@ -90,7 +91,7 @@ static NV_STATUS uvm_pte_buffer_init(uvm_va_range_t *va_range,
|
||||
uvm_gpu_t *gpu,
|
||||
const uvm_map_rm_params_t *map_rm_params,
|
||||
NvU64 length,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_pte_buffer_t *pte_buffer)
|
||||
{
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_range->va_space, gpu);
|
||||
@@ -101,11 +102,11 @@ static NV_STATUS uvm_pte_buffer_init(uvm_va_range_t *va_range,
|
||||
|
||||
pte_buffer->va_range = va_range;
|
||||
pte_buffer->gpu = gpu;
|
||||
pte_buffer->mapping_info.cachingType = map_rm_params->caching_type;
|
||||
pte_buffer->mapping_info.mappingType = map_rm_params->mapping_type;
|
||||
pte_buffer->mapping_info.formatType = map_rm_params->format_type;
|
||||
pte_buffer->mapping_info.elementBits = map_rm_params->element_bits;
|
||||
pte_buffer->mapping_info.compressionType = map_rm_params->compression_type;
|
||||
pte_buffer->mapping_info.cachingType = (UvmRmGpuCachingType) map_rm_params->caching_type;
|
||||
pte_buffer->mapping_info.mappingType = (UvmRmGpuMappingType) map_rm_params->mapping_type;
|
||||
pte_buffer->mapping_info.formatType = (UvmRmGpuFormatType) map_rm_params->format_type;
|
||||
pte_buffer->mapping_info.elementBits = (UvmRmGpuFormatElementBits) map_rm_params->element_bits;
|
||||
pte_buffer->mapping_info.compressionType = (UvmRmGpuCompressionType) map_rm_params->compression_type;
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL)
|
||||
pte_buffer->mapping_info.mappingPageSize = page_size;
|
||||
|
||||
@@ -649,9 +650,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
return NV_OK;
|
||||
}
|
||||
// This is a local or peer allocation, so the owning GPU must have been
|
||||
// registered.
|
||||
// This also checks for if EGM owning GPU is registered.
|
||||
|
||||
// registered. This also checks for if EGM owning GPU is registered.
|
||||
owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
|
||||
if (!owning_gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
@@ -664,7 +663,6 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
// semantics of sysmem allocations.
|
||||
|
||||
// Check if peer access for peer memory is enabled.
|
||||
// This path also handles EGM allocations.
|
||||
if (owning_gpu != mapping_gpu && (!mem_info->sysmem || mem_info->egm)) {
|
||||
// TODO: Bug 1757136: In SLI, the returned UUID may be different but a
|
||||
// local mapping must be used. We need to query SLI groups to know
|
||||
@@ -855,9 +853,10 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
|
||||
UvmGpuMemoryInfo mem_info;
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, mapping_gpu);
|
||||
NvU32 mapping_page_size;
|
||||
NvU64 mapping_page_size;
|
||||
NvU64 biggest_mapping_page_size;
|
||||
NvU64 alignments;
|
||||
NvU32 smallest_alignment;
|
||||
NvU64 smallest_alignment;
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_assert_rwsem_locked_read(&va_space->lock);
|
||||
@@ -946,9 +945,11 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
|
||||
// Check for the maximum page size for the mapping of vidmem allocations,
|
||||
// the vMMU segment size may limit the range of page sizes.
|
||||
biggest_mapping_page_size = uvm_mmu_biggest_page_size_up_to(&gpu_va_space->page_tables,
|
||||
mapping_gpu->mem_info.max_vidmem_page_size);
|
||||
if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
|
||||
(mapping_page_size > mapping_gpu->mem_info.max_vidmem_page_size))
|
||||
mapping_page_size = mapping_gpu->mem_info.max_vidmem_page_size;
|
||||
(mapping_page_size > biggest_mapping_page_size))
|
||||
mapping_page_size = biggest_mapping_page_size;
|
||||
|
||||
mem_info.pageSize = mapping_page_size;
|
||||
|
||||
@@ -970,7 +971,7 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
||||
{
|
||||
uvm_va_range_t *va_range = NULL;
|
||||
uvm_gpu_t *mapping_gpu;
|
||||
uvm_processor_mask_t mapped_gpus;
|
||||
uvm_processor_mask_t *mapped_gpus;
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t i;
|
||||
uvm_map_rm_params_t map_rm_params;
|
||||
@@ -985,9 +986,13 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
||||
if (uvm_api_range_invalid_4k(params->base, params->length))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS_V2)
|
||||
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
mapped_gpus = uvm_processor_mask_cache_alloc();
|
||||
if (!mapped_gpus)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
uvm_va_space_down_read_rm(va_space);
|
||||
va_range = uvm_va_range_find(va_space, params->base);
|
||||
|
||||
@@ -995,10 +1000,11 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
||||
va_range->type != UVM_VA_RANGE_TYPE_EXTERNAL ||
|
||||
va_range->node.end < params->base + params->length - 1) {
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
uvm_processor_mask_cache_free(mapped_gpus);
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
uvm_processor_mask_zero(&mapped_gpus);
|
||||
uvm_processor_mask_zero(mapped_gpus);
|
||||
for (i = 0; i < params->gpuAttributesCount; i++) {
|
||||
if (uvm_api_mapping_type_invalid(params->perGpuAttributes[i].gpuMappingType) ||
|
||||
uvm_api_caching_type_invalid(params->perGpuAttributes[i].gpuCachingType) ||
|
||||
@@ -1034,7 +1040,7 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
uvm_processor_mask_set(&mapped_gpus, mapping_gpu->id);
|
||||
uvm_processor_mask_set(mapped_gpus, mapping_gpu->id);
|
||||
}
|
||||
|
||||
// Wait for outstanding page table operations to finish across all GPUs. We
|
||||
@@ -1043,6 +1049,8 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
||||
status = uvm_tracker_wait_deinit(&tracker);
|
||||
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
uvm_processor_mask_cache_free(mapped_gpus);
|
||||
|
||||
return status;
|
||||
|
||||
error:
|
||||
@@ -1051,7 +1059,7 @@ error:
|
||||
(void)uvm_tracker_wait_deinit(&tracker);
|
||||
|
||||
// Tear down only those mappings we created during this call
|
||||
for_each_va_space_gpu_in_mask(mapping_gpu, va_space, &mapped_gpus) {
|
||||
for_each_va_space_gpu_in_mask(mapping_gpu, va_space, mapped_gpus) {
|
||||
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
|
||||
uvm_ext_gpu_map_t *ext_map, *ext_map_next;
|
||||
|
||||
@@ -1067,6 +1075,7 @@ error:
|
||||
}
|
||||
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
uvm_processor_mask_cache_free(mapped_gpus);
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -1356,9 +1365,7 @@ static NV_STATUS uvm_free(uvm_va_space_t *va_space, NvU64 base, NvU64 length)
|
||||
{
|
||||
uvm_va_range_t *va_range;
|
||||
NV_STATUS status = NV_OK;
|
||||
// TODO: Bug 4351121: retained_mask should be pre-allocated, not on the
|
||||
// stack.
|
||||
uvm_processor_mask_t retained_mask;
|
||||
uvm_processor_mask_t *retained_mask = NULL;
|
||||
LIST_HEAD(deferred_free_list);
|
||||
|
||||
if (uvm_api_range_invalid_4k(base, length))
|
||||
@@ -1391,17 +1398,25 @@ static NV_STATUS uvm_free(uvm_va_space_t *va_space, NvU64 base, NvU64 length)
|
||||
}
|
||||
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL) {
|
||||
retained_mask = va_range->external.retained_mask;
|
||||
|
||||
// Set the retained_mask to NULL to prevent
|
||||
// uvm_va_range_destroy_external() from freeing the mask.
|
||||
va_range->external.retained_mask = NULL;
|
||||
|
||||
UVM_ASSERT(retained_mask);
|
||||
|
||||
// External ranges may have deferred free work, so the GPUs may have to
|
||||
// be retained. Construct the mask of all the GPUs that need to be
|
||||
// retained.
|
||||
uvm_processor_mask_and(&retained_mask, &va_range->external.mapped_gpus, &va_space->registered_gpus);
|
||||
uvm_processor_mask_and(retained_mask, &va_range->external.mapped_gpus, &va_space->registered_gpus);
|
||||
}
|
||||
|
||||
uvm_va_range_destroy(va_range, &deferred_free_list);
|
||||
|
||||
// If there is deferred work, retain the required GPUs.
|
||||
if (!list_empty(&deferred_free_list))
|
||||
uvm_global_gpu_retain(&retained_mask);
|
||||
uvm_global_gpu_retain(retained_mask);
|
||||
|
||||
out:
|
||||
uvm_va_space_up_write(va_space);
|
||||
@@ -1409,9 +1424,13 @@ out:
|
||||
if (!list_empty(&deferred_free_list)) {
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
uvm_deferred_free_object_list(&deferred_free_list);
|
||||
uvm_global_gpu_release(&retained_mask);
|
||||
uvm_global_gpu_release(retained_mask);
|
||||
}
|
||||
|
||||
// Free the mask allocated in uvm_va_range_create_external() since
|
||||
// uvm_va_range_destroy() won't free this mask.
|
||||
uvm_processor_mask_cache_free(retained_mask);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
Copyright (c) 2021-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -74,6 +74,14 @@ NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm
|
||||
return 0;
|
||||
}
|
||||
|
||||
uvm_mmu_engine_type_t uvm_hal_maxwell_fault_buffer_get_mmu_engine_type_unsupported(NvU16 mmu_engine_id,
|
||||
uvm_fault_client_type_t client_type,
|
||||
NvU16 client_id)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "fault_buffer_get_mmu_engine_type is not supported on Maxwell GPUs.\n");
|
||||
return UVM_MMU_ENGINE_TYPE_GRAPHICS;
|
||||
}
|
||||
|
||||
uvm_fault_type_t uvm_hal_maxwell_fault_buffer_get_fault_type_unsupported(const NvU32 *fault_entry)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "fault_buffer_get_fault_type is not supported.\n");
|
||||
|
||||
@@ -108,7 +108,7 @@ void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
// No per VA invalidate on Maxwell, redirect to invalidate all.
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_mmu.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_push_macros.h"
|
||||
#include "hwref/maxwell/gm107/dev_mmu.h"
|
||||
|
||||
@@ -52,7 +53,7 @@ static NvU32 entries_per_index_maxwell(NvU32 depth)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static NvLength entry_offset_maxwell(NvU32 depth, NvU32 page_size)
|
||||
static NvLength entry_offset_maxwell(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 2);
|
||||
if (page_size == UVM_PAGE_SIZE_4K && depth == 0)
|
||||
@@ -128,7 +129,7 @@ static NvLength entry_size_maxwell(NvU32 depth)
|
||||
return 8;
|
||||
}
|
||||
|
||||
static NvU32 index_bits_maxwell_64(NvU32 depth, NvU32 page_size)
|
||||
static NvU32 index_bits_maxwell_64(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 2);
|
||||
UVM_ASSERT(page_size == UVM_PAGE_SIZE_4K ||
|
||||
@@ -146,7 +147,7 @@ static NvU32 index_bits_maxwell_64(NvU32 depth, NvU32 page_size)
|
||||
}
|
||||
}
|
||||
|
||||
static NvU32 index_bits_maxwell_128(NvU32 depth, NvU32 page_size)
|
||||
static NvU32 index_bits_maxwell_128(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 2);
|
||||
UVM_ASSERT(page_size == UVM_PAGE_SIZE_4K ||
|
||||
@@ -169,32 +170,32 @@ static NvU32 num_va_bits_maxwell(void)
|
||||
return 40;
|
||||
}
|
||||
|
||||
static NvLength allocation_size_maxwell_64(NvU32 depth, NvU32 page_size)
|
||||
static NvLength allocation_size_maxwell_64(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
return entry_size_maxwell(depth) << index_bits_maxwell_64(depth, page_size);
|
||||
}
|
||||
|
||||
static NvLength allocation_size_maxwell_128(NvU32 depth, NvU32 page_size)
|
||||
static NvLength allocation_size_maxwell_128(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
return entry_size_maxwell(depth) << index_bits_maxwell_128(depth, page_size);
|
||||
}
|
||||
|
||||
static NvU32 page_table_depth_maxwell(NvU32 page_size)
|
||||
static NvU32 page_table_depth_maxwell(NvU64 page_size)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static NvU32 page_sizes_maxwell_128(void)
|
||||
static NvU64 page_sizes_maxwell_128(void)
|
||||
{
|
||||
return UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
static NvU32 page_sizes_maxwell_64(void)
|
||||
static NvU64 page_sizes_maxwell_64(void)
|
||||
{
|
||||
return UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
static NvU64 unmapped_pte_maxwell(NvU32 page_size)
|
||||
static NvU64 unmapped_pte_maxwell(NvU64 page_size)
|
||||
{
|
||||
// Setting the privilege bit on an otherwise-zeroed big PTE causes the
|
||||
// corresponding 4k PTEs to be ignored. This allows the invalidation of a
|
||||
@@ -356,7 +357,7 @@ static uvm_mmu_mode_hal_t maxwell_128_mmu_mode_hal =
|
||||
.page_sizes = page_sizes_maxwell_128
|
||||
};
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU32 big_page_size)
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU64 big_page_size)
|
||||
{
|
||||
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
|
||||
if (big_page_size == UVM_PAGE_SIZE_64K)
|
||||
@@ -375,12 +376,6 @@ void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *p
|
||||
UVM_ASSERT_MSG(false, "mmu disable_prefetch_faults called on Maxwell GPU\n");
|
||||
}
|
||||
|
||||
uvm_mmu_engine_type_t uvm_hal_maxwell_mmu_engine_id_to_type_unsupported(NvU16 mmu_engine_id)
|
||||
{
|
||||
UVM_ASSERT(0);
|
||||
return UVM_MMU_ENGINE_TYPE_COUNT;
|
||||
}
|
||||
|
||||
NvU16 uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported(NvU16 client_id)
|
||||
{
|
||||
UVM_ASSERT(0);
|
||||
|
||||
@@ -290,15 +290,15 @@ uvm_chunk_sizes_mask_t uvm_mem_kernel_chunk_sizes(uvm_gpu_t *gpu)
|
||||
// Get the mmu mode hal directly as the internal address space tree has not
|
||||
// been created yet.
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(gpu->big_page.internal_size);
|
||||
NvU32 page_sizes = hal->page_sizes();
|
||||
NvU64 page_sizes = hal->page_sizes();
|
||||
|
||||
return (uvm_chunk_sizes_mask_t)(page_sizes & UVM_CHUNK_SIZES_MASK);
|
||||
}
|
||||
|
||||
static NvU32 mem_pick_chunk_size(uvm_mem_t *mem)
|
||||
static NvU64 mem_pick_chunk_size(uvm_mem_t *mem)
|
||||
{
|
||||
NvU32 biggest_page_size;
|
||||
NvU32 chunk_size;
|
||||
NvU64 biggest_page_size;
|
||||
NvU64 chunk_size;
|
||||
|
||||
if (uvm_mem_is_sysmem(mem))
|
||||
return PAGE_SIZE;
|
||||
@@ -315,12 +315,12 @@ static NvU32 mem_pick_chunk_size(uvm_mem_t *mem)
|
||||
// When UVM_PAGE_SIZE_DEFAULT is used on NUMA-enabled GPUs, we force
|
||||
// chunk_size to be PAGE_SIZE at least, to allow CPU mappings.
|
||||
if (mem->backing_gpu->mem_info.numa.enabled)
|
||||
chunk_size = max(chunk_size, (NvU32)PAGE_SIZE);
|
||||
chunk_size = max(chunk_size, (NvU64)PAGE_SIZE);
|
||||
|
||||
return chunk_size;
|
||||
}
|
||||
|
||||
static NvU32 mem_pick_gpu_page_size(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_tree_t *gpu_page_tree)
|
||||
static NvU64 mem_pick_gpu_page_size(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_tree_t *gpu_page_tree)
|
||||
{
|
||||
if (uvm_mem_is_vidmem(mem)) {
|
||||
// For vidmem allocations the chunk size is picked out of the supported
|
||||
@@ -467,7 +467,7 @@ static NV_STATUS mem_alloc_sysmem_dma_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
|
||||
NvU64 *dma_addrs;
|
||||
|
||||
UVM_ASSERT_MSG(mem->chunk_size == PAGE_SIZE,
|
||||
"mem->chunk_size is 0x%x. PAGE_SIZE is only supported.",
|
||||
"mem->chunk_size is 0x%llx. PAGE_SIZE is only supported.",
|
||||
mem->chunk_size);
|
||||
UVM_ASSERT(uvm_mem_is_sysmem_dma(mem));
|
||||
|
||||
@@ -528,10 +528,9 @@ static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
|
||||
|
||||
// In case of failure, the caller is required to handle cleanup by calling
|
||||
// uvm_mem_free
|
||||
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unprotected)
|
||||
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_pmm_gpu_memory_type_t mem_type;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_vidmem(mem));
|
||||
|
||||
@@ -548,23 +547,15 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unpr
|
||||
if (!mem->vidmem.chunks)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
// When CC is disabled the behavior is identical to that of PMM, and the
|
||||
// protection flag is ignored (squashed by PMM internally).
|
||||
if (is_unprotected)
|
||||
mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_UNPROTECTED;
|
||||
else
|
||||
mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_PROTECTED;
|
||||
|
||||
status = uvm_pmm_gpu_alloc(&mem->backing_gpu->pmm,
|
||||
mem->chunks_count,
|
||||
mem->chunk_size,
|
||||
mem_type,
|
||||
UVM_PMM_ALLOC_FLAGS_NONE,
|
||||
mem->vidmem.chunks,
|
||||
NULL);
|
||||
status = uvm_pmm_gpu_alloc_kernel(&mem->backing_gpu->pmm,
|
||||
mem->chunks_count,
|
||||
mem->chunk_size,
|
||||
UVM_PMM_ALLOC_FLAGS_NONE,
|
||||
mem->vidmem.chunks,
|
||||
NULL);
|
||||
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_pmm_gpu_alloc (count=%zd, size=0x%x) failed: %s\n",
|
||||
UVM_ERR_PRINT("uvm_pmm_gpu_alloc_kernel (count=%zd, size=0x%llx) failed: %s\n",
|
||||
mem->chunks_count,
|
||||
mem->chunk_size,
|
||||
nvstatusToString(status));
|
||||
@@ -574,7 +565,7 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unpr
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero, bool is_unprotected)
|
||||
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero)
|
||||
{
|
||||
if (uvm_mem_is_sysmem(mem)) {
|
||||
gfp_t gfp_flags;
|
||||
@@ -596,7 +587,7 @@ static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zer
|
||||
return status;
|
||||
}
|
||||
|
||||
return mem_alloc_vidmem_chunks(mem, zero, is_unprotected);
|
||||
return mem_alloc_vidmem_chunks(mem, zero);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_mem_map_kernel(uvm_mem_t *mem, const uvm_processor_mask_t *mask)
|
||||
@@ -626,7 +617,6 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
|
||||
NV_STATUS status;
|
||||
NvU64 physical_size;
|
||||
uvm_mem_t *mem = NULL;
|
||||
bool is_unprotected = false;
|
||||
|
||||
UVM_ASSERT(params->size > 0);
|
||||
|
||||
@@ -648,12 +638,7 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
|
||||
physical_size = UVM_ALIGN_UP(mem->size, mem->chunk_size);
|
||||
mem->chunks_count = physical_size / mem->chunk_size;
|
||||
|
||||
if (params->is_unprotected)
|
||||
UVM_ASSERT(uvm_mem_is_vidmem(mem));
|
||||
|
||||
is_unprotected = params->is_unprotected;
|
||||
|
||||
status = mem_alloc_chunks(mem, params->mm, params->zero, is_unprotected);
|
||||
status = mem_alloc_chunks(mem, params->mm, params->zero);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
@@ -1050,7 +1035,7 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
|
||||
uvm_page_table_range_vec_t **range_vec)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 page_size;
|
||||
NvU64 page_size;
|
||||
uvm_pmm_alloc_flags_t pmm_flags = UVM_PMM_ALLOC_FLAGS_EVICT;
|
||||
|
||||
uvm_mem_pte_maker_data_t pte_maker_data = {
|
||||
@@ -1059,7 +1044,7 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
|
||||
};
|
||||
|
||||
page_size = mem_pick_gpu_page_size(mem, gpu, tree);
|
||||
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x\n", page_size);
|
||||
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%llx\n", page_size);
|
||||
|
||||
// When the Confidential Computing feature is enabled, DMA allocations are
|
||||
// majoritarily allocated and managed by a per-GPU DMA buffer pool
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -126,12 +126,7 @@ typedef struct
|
||||
//
|
||||
// CPU mappings will always use PAGE_SIZE, so the physical allocation chunk
|
||||
// has to be aligned to PAGE_SIZE.
|
||||
NvU32 page_size;
|
||||
|
||||
// The protection flag is only observed for vidmem allocations when CC is
|
||||
// enabled. If set to true, the allocation returns unprotected vidmem;
|
||||
// otherwise, the allocation returns protected vidmem.
|
||||
bool is_unprotected;
|
||||
NvU64 page_size;
|
||||
|
||||
// If true, the allocation is zeroed (scrubbed).
|
||||
bool zero;
|
||||
@@ -199,7 +194,7 @@ struct uvm_mem_struct
|
||||
size_t chunks_count;
|
||||
|
||||
// Size of each physical chunk (vidmem) or CPU page (sysmem)
|
||||
NvU32 chunk_size;
|
||||
NvU64 chunk_size;
|
||||
|
||||
// Size of the allocation
|
||||
NvU64 size;
|
||||
@@ -329,8 +324,7 @@ uvm_gpu_phys_address_t uvm_mem_gpu_physical(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU6
|
||||
uvm_gpu_address_t uvm_mem_gpu_address_physical(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU64 offset, NvU64 size);
|
||||
|
||||
// Helper to get an address suitable for accessing_gpu (which may be the backing
|
||||
// GPU) to access with CE. Note that mappings for indirect peers are not
|
||||
// created automatically.
|
||||
// GPU) to access with CE.
|
||||
uvm_gpu_address_t uvm_mem_gpu_address_copy(uvm_mem_t *mem, uvm_gpu_t *accessing_gpu, NvU64 offset, NvU64 size);
|
||||
|
||||
static bool uvm_mem_is_sysmem(uvm_mem_t *mem)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -33,7 +33,7 @@
|
||||
|
||||
static const size_t sysmem_alloc_sizes[] = { 1, PAGE_SIZE - 1, PAGE_SIZE, 7 * PAGE_SIZE };
|
||||
|
||||
static NvU32 first_page_size(NvU32 page_sizes)
|
||||
static NvU64 first_page_size(NvU64 page_sizes)
|
||||
{
|
||||
return page_sizes & ~(page_sizes - 1);
|
||||
}
|
||||
@@ -43,7 +43,7 @@ static NvU32 first_page_size(NvU32 page_sizes)
|
||||
page_size; \
|
||||
page_size = first_page_size((page_sizes) & ~(page_size | (page_size - 1))))
|
||||
|
||||
static inline NV_STATUS __alloc_map_sysmem(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **sys_mem)
|
||||
static inline NV_STATUS mem_alloc_sysmem_and_map_cpu_kernel(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **sys_mem)
|
||||
{
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(size, gpu, current->mm, sys_mem);
|
||||
@@ -67,7 +67,7 @@ static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
|
||||
UVM_ASSERT(uvm_mem_physical_size(mem) >= verif_size);
|
||||
UVM_ASSERT(verif_size >= sizeof(*sys_verif));
|
||||
|
||||
TEST_NV_CHECK_GOTO(__alloc_map_sysmem(verif_size, gpu, &sys_mem), done);
|
||||
TEST_NV_CHECK_GOTO(mem_alloc_sysmem_and_map_cpu_kernel(verif_size, gpu, &sys_mem), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(sys_mem, gpu), done);
|
||||
|
||||
sys_verif = (NvU64*)uvm_mem_get_cpu_addr_kernel(sys_mem);
|
||||
@@ -100,9 +100,9 @@ static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
|
||||
"Memcopy %zd bytes from virtual sys_mem 0x%llx to %s mem 0x%llx [mem loc: %s, page size: %u]",
|
||||
size_this_time,
|
||||
sys_mem_gpu_address.address,
|
||||
mem_gpu_address.is_virtual? "virtual" : "physical",
|
||||
mem_gpu_address.is_virtual ? "virtual" : "physical",
|
||||
mem_gpu_address.address,
|
||||
uvm_mem_is_sysmem(mem)? "sys" : "vid",
|
||||
uvm_mem_is_sysmem(mem) ? "sys" : "vid",
|
||||
mem->chunk_size);
|
||||
|
||||
gpu->parent->ce_hal->memcopy(&push, mem_gpu_address, sys_mem_gpu_address, size_this_time);
|
||||
@@ -140,7 +140,7 @@ static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
|
||||
"Memcopy %zd bytes from virtual mem 0x%llx to %s sys_mem 0x%llx",
|
||||
size_this_time,
|
||||
mem_gpu_address.address,
|
||||
sys_mem_gpu_address.is_virtual? "virtual" : "physical",
|
||||
sys_mem_gpu_address.is_virtual ? "virtual" : "physical",
|
||||
sys_mem_gpu_address.address);
|
||||
|
||||
gpu->parent->ce_hal->memcopy(&push, sys_mem_gpu_address, mem_gpu_address, size_this_time);
|
||||
@@ -153,7 +153,7 @@ static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
|
||||
|
||||
for (i = 0; i < verif_size / sizeof(*sys_verif); ++i) {
|
||||
if (sys_verif[i] != mem->size + i) {
|
||||
UVM_TEST_PRINT("Verif failed for %zd = 0x%llx instead of 0x%llx, verif_size=0x%llx mem(size=0x%llx, page_size=%u, processor=%u)\n",
|
||||
UVM_TEST_PRINT("Verif failed for %zd = 0x%llx instead of 0x%llx, verif_size=0x%llx mem(size=0x%llx, page_size=%llu, processor=%u)\n",
|
||||
i,
|
||||
sys_verif[i],
|
||||
(NvU64)(verif_size + i),
|
||||
@@ -241,7 +241,7 @@ static NV_STATUS test_map_cpu(uvm_mem_t *mem)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_alloc_sysmem(uvm_va_space_t *va_space, NvU32 page_size, size_t size, uvm_mem_t **mem_out)
|
||||
static NV_STATUS test_alloc_sysmem(uvm_va_space_t *va_space, NvU64 page_size, size_t size, uvm_mem_t **mem_out)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_mem_t *mem;
|
||||
@@ -252,10 +252,9 @@ static NV_STATUS test_alloc_sysmem(uvm_va_space_t *va_space, NvU32 page_size, si
|
||||
params.page_size = page_size;
|
||||
params.mm = current->mm;
|
||||
|
||||
status = uvm_mem_alloc(¶ms, &mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, error);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc(¶ms, &mem), error);
|
||||
|
||||
TEST_CHECK_GOTO(test_map_cpu(mem) == NV_OK, error);
|
||||
TEST_NV_CHECK_GOTO(test_map_cpu(mem), error);
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space)
|
||||
TEST_NV_CHECK_GOTO(test_map_gpu(mem, gpu), error);
|
||||
@@ -266,6 +265,7 @@ static NV_STATUS test_alloc_sysmem(uvm_va_space_t *va_space, NvU32 page_size, si
|
||||
|
||||
error:
|
||||
uvm_mem_free(mem);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -299,7 +299,7 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_alloc_vidmem(uvm_gpu_t *gpu, NvU32 page_size, size_t size, uvm_mem_t **mem_out)
|
||||
static NV_STATUS test_alloc_vidmem(uvm_gpu_t *gpu, NvU64 page_size, size_t size, uvm_mem_t **mem_out)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_mem_t *mem;
|
||||
@@ -334,7 +334,7 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
static bool should_test_page_size(size_t alloc_size, NvU32 page_size)
|
||||
static bool should_test_page_size(size_t alloc_size, NvU64 page_size)
|
||||
{
|
||||
if (g_uvm_global.num_simulated_devices == 0)
|
||||
return true;
|
||||
@@ -352,21 +352,22 @@ static NV_STATUS test_all(uvm_va_space_t *va_space)
|
||||
NvU32 current_alloc = 0;
|
||||
|
||||
// Create allocations of these sizes
|
||||
static const size_t sizes[] = {1, 4, 16, 1024, 4096, 1024 * 1024, 7 * 1024 * 1024 + 17 };
|
||||
static const size_t sizes[] = { 1, 4, 16, 1024, 4096, 1024 * 1024, 7 * 1024 * 1024 + 17 };
|
||||
|
||||
// Pascal+ can map sysmem with 4K, 64K and 2M PTEs, other GPUs can only use
|
||||
// 4K. Test all of the sizes supported by Pascal+ and 128K to match big page
|
||||
// size on pre-Pascal GPUs with 128K big page size.
|
||||
// Ampere+ also supports 512M PTEs, but since UVM's maximum chunk size is
|
||||
// 2M, we don't test for this page size.
|
||||
static const NvU32 cpu_chunk_sizes = PAGE_SIZE | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_2M;
|
||||
// Blackwell+ also supports 256G PTEs and the above holds for this case too.
|
||||
|
||||
static const NvU64 cpu_chunk_sizes = PAGE_SIZE | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_2M;
|
||||
|
||||
// All supported page sizes will be tested, CPU has the most with 4 and +1
|
||||
// for the default.
|
||||
static const int max_supported_page_sizes = 4 + 1;
|
||||
int i;
|
||||
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
@@ -386,13 +387,13 @@ static NV_STATUS test_all(uvm_va_space_t *va_space)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
|
||||
NvU32 page_size = 0;
|
||||
NvU64 page_size = 0;
|
||||
uvm_mem_t *mem;
|
||||
|
||||
if (should_test_page_size(sizes[i], UVM_PAGE_SIZE_DEFAULT)) {
|
||||
status = test_alloc_sysmem(va_space, UVM_PAGE_SIZE_DEFAULT, sizes[i], &mem);
|
||||
if (status != NV_OK) {
|
||||
UVM_TEST_PRINT("Failed to alloc sysmem size %zd, page_size default\n", sizes[i], page_size);
|
||||
UVM_TEST_PRINT("Failed to alloc sysmem size %zd, page_size default\n", sizes[i]);
|
||||
goto cleanup;
|
||||
}
|
||||
all_mem[current_alloc++] = mem;
|
||||
@@ -404,14 +405,14 @@ static NV_STATUS test_all(uvm_va_space_t *va_space)
|
||||
|
||||
status = test_alloc_sysmem(va_space, page_size, sizes[i], &mem);
|
||||
if (status != NV_OK) {
|
||||
UVM_TEST_PRINT("Failed to alloc sysmem size %zd, page_size %u\n", sizes[i], page_size);
|
||||
UVM_TEST_PRINT("Failed to alloc sysmem size %zd, page_size %llu\n", sizes[i], page_size);
|
||||
goto cleanup;
|
||||
}
|
||||
all_mem[current_alloc++] = mem;
|
||||
}
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
NvU32 page_sizes = gpu->address_space_tree.hal->page_sizes();
|
||||
NvU64 page_sizes = gpu->address_space_tree.hal->page_sizes();
|
||||
|
||||
UVM_ASSERT(max_supported_page_sizes >= hweight_long(page_sizes));
|
||||
|
||||
@@ -428,7 +429,7 @@ static NV_STATUS test_all(uvm_va_space_t *va_space)
|
||||
for_each_page_size(page_size, page_sizes) {
|
||||
status = test_alloc_vidmem(gpu, page_size, sizes[i], &mem);
|
||||
if (status != NV_OK) {
|
||||
UVM_TEST_PRINT("Test alloc vidmem failed, page_size %u size %zd GPU %s\n",
|
||||
UVM_TEST_PRINT("Test alloc vidmem failed, page_size %llu size %zd GPU %s\n",
|
||||
page_size,
|
||||
sizes[i],
|
||||
uvm_gpu_name(gpu));
|
||||
@@ -461,17 +462,17 @@ cleanup:
|
||||
static NV_STATUS test_basic_vidmem(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 page_size;
|
||||
NvU32 page_sizes = gpu->address_space_tree.hal->page_sizes();
|
||||
NvU32 biggest_page_size = uvm_mmu_biggest_page_size_up_to(&gpu->address_space_tree, UVM_CHUNK_SIZE_MAX);
|
||||
NvU32 smallest_page_size = page_sizes & ~(page_sizes - 1);
|
||||
NvU64 page_size;
|
||||
NvU64 page_sizes = gpu->address_space_tree.hal->page_sizes();
|
||||
NvU64 biggest_page_size = uvm_mmu_biggest_page_size_up_to(&gpu->address_space_tree, UVM_CHUNK_SIZE_MAX);
|
||||
NvU64 smallest_page_size = page_sizes & ~(page_sizes - 1);
|
||||
uvm_mem_t *mem = NULL;
|
||||
|
||||
page_sizes &= UVM_CHUNK_SIZES_MASK;
|
||||
for_each_page_size(page_size, page_sizes) {
|
||||
TEST_CHECK_GOTO(uvm_mem_alloc_vidmem(page_size - 1, gpu, &mem) == NV_OK, done);
|
||||
if (gpu->mem_info.numa.enabled)
|
||||
TEST_CHECK_GOTO(mem->chunk_size >= PAGE_SIZE && mem->chunk_size <= max(page_size, (NvU32)PAGE_SIZE), done);
|
||||
TEST_CHECK_GOTO(mem->chunk_size >= PAGE_SIZE && mem->chunk_size <= max(page_size, (NvU64)PAGE_SIZE), done);
|
||||
else
|
||||
TEST_CHECK_GOTO(mem->chunk_size < page_size || page_size == smallest_page_size, done);
|
||||
uvm_mem_free(mem);
|
||||
@@ -479,14 +480,14 @@ static NV_STATUS test_basic_vidmem(uvm_gpu_t *gpu)
|
||||
|
||||
TEST_CHECK_GOTO(uvm_mem_alloc_vidmem(page_size, gpu, &mem) == NV_OK, done);
|
||||
if (gpu->mem_info.numa.enabled)
|
||||
TEST_CHECK_GOTO(mem->chunk_size == max(page_size, (NvU32)PAGE_SIZE), done);
|
||||
TEST_CHECK_GOTO(mem->chunk_size == max(page_size, (NvU64)PAGE_SIZE), done);
|
||||
else
|
||||
TEST_CHECK_GOTO(mem->chunk_size == page_size, done);
|
||||
uvm_mem_free(mem);
|
||||
mem = NULL;
|
||||
}
|
||||
|
||||
TEST_CHECK_GOTO(uvm_mem_alloc_vidmem(5 * ((NvU64)biggest_page_size) - 1, gpu, &mem) == NV_OK, done);
|
||||
TEST_CHECK_GOTO(uvm_mem_alloc_vidmem(5 * biggest_page_size - 1, gpu, &mem) == NV_OK, done);
|
||||
TEST_CHECK_GOTO(mem->chunk_size == biggest_page_size, done);
|
||||
|
||||
done:
|
||||
@@ -494,41 +495,6 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_basic_vidmem_unprotected(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_mem_t *mem = NULL;
|
||||
|
||||
uvm_mem_alloc_params_t params = { 0 };
|
||||
params.size = UVM_PAGE_SIZE_4K;
|
||||
params.backing_gpu = gpu;
|
||||
params.page_size = UVM_PAGE_SIZE_4K;
|
||||
|
||||
// If CC is enabled, the protection flag is observed. Because currently all
|
||||
// vidmem is in the protected region, the allocation should succeed.
|
||||
//
|
||||
// If CC is disabled, the protection flag is ignored.
|
||||
params.is_unprotected = false;
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc(¶ms, &mem));
|
||||
|
||||
uvm_mem_free(mem);
|
||||
mem = NULL;
|
||||
|
||||
// If CC is enabled, the allocation should fail because currently the
|
||||
// unprotected region is empty.
|
||||
//
|
||||
// If CC is disabled, the behavior should be identical to that of a
|
||||
// protected allocation.
|
||||
params.is_unprotected = true;
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
TEST_CHECK_RET(uvm_mem_alloc(¶ms, &mem) == NV_ERR_NO_MEMORY);
|
||||
else
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc(¶ms, &mem));
|
||||
|
||||
uvm_mem_free(mem);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_basic_sysmem(void)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@@ -604,6 +570,135 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS check_huge_page_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem, NvU64 offset)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_mem_t *sys_mem = NULL;
|
||||
uvm_push_t push;
|
||||
NvU64 *sys_verif;
|
||||
NvU64 *expected_value;
|
||||
NvU64 verif_size = mem->size;
|
||||
uvm_gpu_address_t mem_gpu_address, sys_mem_gpu_address;
|
||||
|
||||
UVM_ASSERT(uvm_mem_physical_size(mem) >= verif_size);
|
||||
|
||||
TEST_NV_CHECK_GOTO(mem_alloc_sysmem_and_map_cpu_kernel(verif_size, gpu, &sys_mem), done);
|
||||
sys_verif = uvm_mem_get_cpu_addr_kernel(sys_mem);
|
||||
memset(sys_verif, 0x0, mem->size);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(sys_mem, gpu), done);
|
||||
|
||||
mem_gpu_address = uvm_gpu_address_virtual(offset);
|
||||
sys_mem_gpu_address = uvm_mem_gpu_address_virtual_kernel(sys_mem, gpu);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_GPU_TO_CPU,
|
||||
&push,
|
||||
"Memcopy %llu bytes from virtual mem 0x%llx to virtual sys_mem 0x%llx",
|
||||
verif_size,
|
||||
mem_gpu_address.address,
|
||||
sys_mem_gpu_address.address),
|
||||
done);
|
||||
|
||||
gpu->parent->ce_hal->memcopy(&push, sys_mem_gpu_address, mem_gpu_address, verif_size);
|
||||
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), done);
|
||||
|
||||
expected_value = uvm_mem_get_cpu_addr_kernel(mem);
|
||||
TEST_CHECK_GOTO(memcmp(sys_verif, expected_value, verif_size) == 0, done);
|
||||
|
||||
done:
|
||||
uvm_mem_free(sys_mem);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NvU64 test_pte_maker(uvm_page_table_range_vec_t *range_vec, NvU64 offset, void *phys_addr)
|
||||
{
|
||||
uvm_page_tree_t *tree = range_vec->tree;
|
||||
uvm_gpu_phys_address_t phys = uvm_gpu_phys_address(UVM_APERTURE_SYS, (NvU64)phys_addr);
|
||||
|
||||
return tree->hal->make_pte(phys.aperture, phys.address, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
|
||||
}
|
||||
|
||||
static NV_STATUS test_huge_page_size(uvm_va_space_t *va_space, uvm_gpu_t *gpu, NvU64 page_size)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_mem_t *mem = NULL;
|
||||
size_t size = PAGE_SIZE;
|
||||
NvU64 *cpu_addr;
|
||||
NvU64 huge_gpu_va;
|
||||
NvU64 gpu_phys_addr;
|
||||
uvm_page_table_range_vec_t *range_vec;
|
||||
NvU8 value = 0xA5;
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
TEST_NV_CHECK_GOTO(mem_alloc_sysmem_and_map_cpu_kernel(size, gpu, &mem), cleanup);
|
||||
cpu_addr = uvm_mem_get_cpu_addr_kernel(mem);
|
||||
memset(cpu_addr, value, mem->size);
|
||||
|
||||
// Map it on the GPU (uvm_mem base area), it creates GPU physical address
|
||||
// for the sysmem mapping.
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(mem, gpu), cleanup);
|
||||
|
||||
huge_gpu_va = UVM_ALIGN_UP(gpu->parent->uvm_mem_va_base + gpu->parent->uvm_mem_va_size, page_size);
|
||||
TEST_CHECK_GOTO(IS_ALIGNED(huge_gpu_va, page_size), cleanup);
|
||||
TEST_CHECK_GOTO((huge_gpu_va + page_size) < (1ull << gpu->address_space_tree.hal->num_va_bits()), cleanup);
|
||||
|
||||
// Manually mapping huge_gpu_va because page_size is larger than the largest
|
||||
// uvm_mem_t chunk/page size, so we don't use uvm_mem_gpu_kernel() helper.
|
||||
TEST_NV_CHECK_GOTO(uvm_page_table_range_vec_create(&gpu->address_space_tree,
|
||||
huge_gpu_va,
|
||||
page_size,
|
||||
page_size,
|
||||
UVM_PMM_ALLOC_FLAGS_NONE,
|
||||
&range_vec), cleanup);
|
||||
|
||||
gpu_phys_addr = uvm_mem_gpu_physical(mem, gpu, 0, size).address;
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_page_table_range_vec_write_ptes(range_vec,
|
||||
UVM_MEMBAR_NONE,
|
||||
test_pte_maker,
|
||||
(void *)gpu_phys_addr), cleanup_range);
|
||||
|
||||
// Despite the huge page_size mapping, only PAGE_SIZE is backed by an
|
||||
// allocation "own" by the test. We compute the offset within the huge page
|
||||
// to verify only this segment.
|
||||
TEST_NV_CHECK_GOTO(check_huge_page_from_gpu(gpu, mem, huge_gpu_va + (gpu_phys_addr % page_size)),
|
||||
cleanup_range);
|
||||
|
||||
cleanup_range:
|
||||
uvm_page_table_range_vec_destroy(range_vec);
|
||||
range_vec = NULL;
|
||||
|
||||
cleanup:
|
||||
uvm_mem_free(mem);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Check the GPU access to memory from a 512MB+ page size mapping.
|
||||
// The test allocates a PAGE_SIZE sysmem page, but uses the GMMU to map a huge
|
||||
// page size area. It maps the allocated page to this area, and uses the CE to
|
||||
// access it, thus, exercising a memory access using a huge page.
|
||||
static NV_STATUS test_huge_pages(uvm_va_space_t *va_space, uvm_gpu_t *gpu)
|
||||
{
|
||||
NvU64 page_sizes = gpu->address_space_tree.hal->page_sizes();
|
||||
NvU64 page_size = 0;
|
||||
|
||||
for_each_page_size(page_size, page_sizes) {
|
||||
if (page_size < UVM_PAGE_SIZE_512M)
|
||||
continue;
|
||||
|
||||
TEST_NV_CHECK_RET(test_huge_page_size(va_space, gpu, page_size));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_basic(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
@@ -613,8 +708,8 @@ static NV_STATUS test_basic(uvm_va_space_t *va_space)
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
TEST_NV_CHECK_RET(test_basic_vidmem(gpu));
|
||||
TEST_NV_CHECK_RET(test_basic_sysmem_dma(gpu));
|
||||
TEST_NV_CHECK_RET(test_basic_vidmem_unprotected(gpu));
|
||||
TEST_NV_CHECK_RET(test_basic_dma_pool(gpu));
|
||||
TEST_NV_CHECK_RET(test_huge_pages(va_space, gpu));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
||||
@@ -214,13 +214,14 @@ static NV_STATUS block_migrate_add_mappings(uvm_va_block_t *va_block,
|
||||
|
||||
NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_migrate_mode_t mode,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_va_block_context_t *va_block_context = service_context->block_context;
|
||||
NV_STATUS status, tracker_status = NV_OK;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
@@ -229,7 +230,7 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
status = uvm_hmm_va_block_migrate_locked(va_block,
|
||||
va_block_retry,
|
||||
va_block_context,
|
||||
service_context,
|
||||
dest_id,
|
||||
region,
|
||||
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE);
|
||||
@@ -438,7 +439,7 @@ static void preunmap_multi_block(uvm_va_range_t *va_range,
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_va_range_migrate_multi_block(uvm_va_range_t *va_range,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
NvU64 start,
|
||||
NvU64 end,
|
||||
uvm_processor_id_t dest_id,
|
||||
@@ -470,10 +471,11 @@ static NV_STATUS uvm_va_range_migrate_multi_block(uvm_va_range_t *va_range,
|
||||
max(start, va_block->start),
|
||||
min(end, va_block->end));
|
||||
|
||||
status = UVM_VA_BLOCK_LOCK_RETRY(va_block, &va_block_retry,
|
||||
status = UVM_VA_BLOCK_LOCK_RETRY(va_block,
|
||||
&va_block_retry,
|
||||
uvm_va_block_migrate_locked(va_block,
|
||||
&va_block_retry,
|
||||
va_block_context,
|
||||
service_context,
|
||||
region,
|
||||
dest_id,
|
||||
mode,
|
||||
@@ -486,7 +488,7 @@ static NV_STATUS uvm_va_range_migrate_multi_block(uvm_va_range_t *va_range,
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
NvU64 start,
|
||||
NvU64 end,
|
||||
uvm_processor_id_t dest_id,
|
||||
@@ -510,7 +512,7 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
||||
preunmap_range_end = min(preunmap_range_end - 1, end);
|
||||
|
||||
preunmap_multi_block(va_range,
|
||||
va_block_context,
|
||||
service_context->block_context,
|
||||
preunmap_range_start,
|
||||
preunmap_range_end,
|
||||
dest_id);
|
||||
@@ -520,7 +522,7 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
||||
}
|
||||
|
||||
status = uvm_va_range_migrate_multi_block(va_range,
|
||||
va_block_context,
|
||||
service_context,
|
||||
preunmap_range_start,
|
||||
preunmap_range_end,
|
||||
dest_id,
|
||||
@@ -536,7 +538,7 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_service_block_context_t *service_context,
|
||||
uvm_va_range_t *first_va_range,
|
||||
NvU64 base,
|
||||
NvU64 length,
|
||||
@@ -552,13 +554,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
|
||||
if (!first_va_range) {
|
||||
// For HMM, we iterate over va_blocks since there is no va_range.
|
||||
return uvm_hmm_migrate_ranges(va_space,
|
||||
va_block_context,
|
||||
base,
|
||||
length,
|
||||
dest_id,
|
||||
mode,
|
||||
out_tracker);
|
||||
return uvm_hmm_migrate_ranges(va_space, service_context, base, length, dest_id, mode, out_tracker);
|
||||
}
|
||||
|
||||
UVM_ASSERT(first_va_range == uvm_va_space_iter_first(va_space, base, base));
|
||||
@@ -587,11 +583,13 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
if (!iter.migratable) {
|
||||
// Only return NV_WARN_MORE_PROCESSING_REQUIRED if the pages aren't
|
||||
// already resident at dest_id.
|
||||
if (!uvm_va_policy_preferred_location_equal(policy, dest_id, va_block_context->make_resident.dest_nid))
|
||||
if (!uvm_va_policy_preferred_location_equal(policy,
|
||||
dest_id,
|
||||
service_context->block_context->make_resident.dest_nid))
|
||||
skipped_migrate = true;
|
||||
}
|
||||
else if (uvm_processor_mask_test(&va_range->uvm_lite_gpus, dest_id) &&
|
||||
!uvm_id_equal(dest_id, policy->preferred_location)) {
|
||||
!uvm_va_policy_preferred_location_equal(policy, dest_id, NUMA_NO_NODE)) {
|
||||
// Don't migrate to a non-faultable GPU that is in UVM-Lite mode,
|
||||
// unless it's the preferred location
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
@@ -599,7 +597,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
}
|
||||
else {
|
||||
status = uvm_va_range_migrate(va_range,
|
||||
va_block_context,
|
||||
service_context,
|
||||
iter.start,
|
||||
iter.end,
|
||||
dest_id,
|
||||
@@ -636,7 +634,7 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_block_context_t *va_block_context;
|
||||
uvm_service_block_context_t *service_context;
|
||||
bool do_mappings;
|
||||
bool do_two_passes;
|
||||
bool is_single_block;
|
||||
@@ -654,11 +652,11 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
else if (!first_va_range)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
va_block_context = uvm_va_block_context_alloc(mm);
|
||||
if (!va_block_context)
|
||||
service_context = uvm_service_block_context_alloc(mm);
|
||||
if (!service_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
va_block_context->make_resident.dest_nid = dest_nid;
|
||||
service_context->block_context->make_resident.dest_nid = dest_nid;
|
||||
|
||||
// We perform two passes (unless the migration only covers a single VA
|
||||
// block or UVM_MIGRATE_FLAG_SKIP_CPU_MAP is passed). This helps in the
|
||||
@@ -688,7 +686,7 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
should_do_cpu_preunmap = migration_should_do_cpu_preunmap(va_space, UVM_MIGRATE_PASS_FIRST, is_single_block);
|
||||
|
||||
status = uvm_migrate_ranges(va_space,
|
||||
va_block_context,
|
||||
service_context,
|
||||
first_va_range,
|
||||
base,
|
||||
length,
|
||||
@@ -706,7 +704,7 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
should_do_cpu_preunmap = migration_should_do_cpu_preunmap(va_space, pass, is_single_block);
|
||||
|
||||
status = uvm_migrate_ranges(va_space,
|
||||
va_block_context,
|
||||
service_context,
|
||||
first_va_range,
|
||||
base,
|
||||
length,
|
||||
@@ -716,7 +714,7 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
out_tracker);
|
||||
}
|
||||
|
||||
uvm_va_block_context_free(va_block_context);
|
||||
uvm_service_block_context_free(service_context);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2023 NVIDIA Corporation
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -52,10 +52,6 @@ static NV_STATUS migrate_vma_page_copy_address(struct page *page,
|
||||
uvm_gpu_t *owning_gpu = UVM_ID_IS_CPU(resident_id)? NULL: uvm_va_space_get_gpu(va_space, resident_id);
|
||||
const bool can_copy_from = uvm_processor_mask_test(&va_space->can_copy_from[uvm_id_value(copying_gpu->id)],
|
||||
resident_id);
|
||||
const bool direct_peer = owning_gpu &&
|
||||
(owning_gpu != copying_gpu) &&
|
||||
can_copy_from &&
|
||||
!uvm_gpu_peer_caps(owning_gpu, copying_gpu)->is_indirect_peer;
|
||||
|
||||
UVM_ASSERT(page_index < state->num_pages);
|
||||
|
||||
@@ -65,15 +61,13 @@ static NV_STATUS migrate_vma_page_copy_address(struct page *page,
|
||||
// Local vidmem address
|
||||
*gpu_addr = uvm_gpu_address_copy(owning_gpu, uvm_gpu_page_to_phys_address(owning_gpu, page));
|
||||
}
|
||||
else if (direct_peer) {
|
||||
// Direct GPU peer
|
||||
else if (owning_gpu && can_copy_from) {
|
||||
uvm_gpu_identity_mapping_t *gpu_peer_mappings = uvm_gpu_get_peer_mapping(copying_gpu, owning_gpu->id);
|
||||
uvm_gpu_phys_address_t phys_addr = uvm_gpu_page_to_phys_address(owning_gpu, page);
|
||||
|
||||
*gpu_addr = uvm_gpu_address_virtual(gpu_peer_mappings->base + phys_addr.address);
|
||||
}
|
||||
else {
|
||||
// Sysmem/Indirect Peer
|
||||
NV_STATUS status = uvm_parent_gpu_map_cpu_page(copying_gpu->parent, page, &state->dma.addrs[page_index]);
|
||||
|
||||
if (status != NV_OK)
|
||||
@@ -507,7 +501,7 @@ static NV_STATUS migrate_vma_copy_pages(struct vm_area_struct *vma,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void migrate_vma_cleanup_pages(unsigned long *dst, unsigned long npages)
|
||||
static void migrate_vma_cleanup_pages(unsigned long *dst, unsigned long npages)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
@@ -523,7 +517,7 @@ void migrate_vma_cleanup_pages(unsigned long *dst, unsigned long npages)
|
||||
}
|
||||
}
|
||||
|
||||
void uvm_migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_state_t *state)
|
||||
static void migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_state_t *state)
|
||||
{
|
||||
struct vm_area_struct *vma = args->vma;
|
||||
unsigned long start = args->start;
|
||||
@@ -553,12 +547,13 @@ void uvm_migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_state_
|
||||
migrate_vma_cleanup_pages(args->dst, state->num_pages);
|
||||
}
|
||||
|
||||
void uvm_migrate_vma_alloc_and_copy_helper(struct vm_area_struct *vma,
|
||||
const unsigned long *src,
|
||||
unsigned long *dst,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
void *private)
|
||||
#if defined(CONFIG_MIGRATE_VMA_HELPER)
|
||||
static void migrate_vma_alloc_and_copy_helper(struct vm_area_struct *vma,
|
||||
const unsigned long *src,
|
||||
unsigned long *dst,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
void *private)
|
||||
{
|
||||
struct migrate_vma args =
|
||||
{
|
||||
@@ -569,10 +564,11 @@ void uvm_migrate_vma_alloc_and_copy_helper(struct vm_area_struct *vma,
|
||||
.end = end,
|
||||
};
|
||||
|
||||
uvm_migrate_vma_alloc_and_copy(&args, (migrate_vma_state_t *) private);
|
||||
migrate_vma_alloc_and_copy(&args, (migrate_vma_state_t *) private);
|
||||
}
|
||||
#endif
|
||||
|
||||
void uvm_migrate_vma_finalize_and_map(struct migrate_vma *args, migrate_vma_state_t *state)
|
||||
static void uvm_migrate_vma_finalize_and_map(struct migrate_vma *args, migrate_vma_state_t *state)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
@@ -642,12 +638,13 @@ void uvm_migrate_vma_finalize_and_map(struct migrate_vma *args, migrate_vma_stat
|
||||
UVM_ASSERT(!bitmap_intersects(state->populate_pages_mask, state->allocation_failed_mask, state->num_pages));
|
||||
}
|
||||
|
||||
void uvm_migrate_vma_finalize_and_map_helper(struct vm_area_struct *vma,
|
||||
const unsigned long *src,
|
||||
const unsigned long *dst,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
void *private)
|
||||
#if defined(CONFIG_MIGRATE_VMA_HELPER)
|
||||
static void migrate_vma_finalize_and_map_helper(struct vm_area_struct *vma,
|
||||
const unsigned long *src,
|
||||
const unsigned long *dst,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
void *private)
|
||||
{
|
||||
struct migrate_vma args =
|
||||
{
|
||||
@@ -660,6 +657,7 @@ void uvm_migrate_vma_finalize_and_map_helper(struct vm_area_struct *vma,
|
||||
|
||||
uvm_migrate_vma_finalize_and_map(&args, (migrate_vma_state_t *) private);
|
||||
}
|
||||
#endif
|
||||
|
||||
static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *state)
|
||||
{
|
||||
@@ -668,8 +666,8 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
|
||||
#if defined(CONFIG_MIGRATE_VMA_HELPER)
|
||||
static const struct migrate_vma_ops uvm_migrate_vma_ops =
|
||||
{
|
||||
.alloc_and_copy = uvm_migrate_vma_alloc_and_copy_helper,
|
||||
.finalize_and_map = uvm_migrate_vma_finalize_and_map_helper,
|
||||
.alloc_and_copy = migrate_vma_alloc_and_copy_helper,
|
||||
.finalize_and_map = migrate_vma_finalize_and_map_helper,
|
||||
};
|
||||
|
||||
ret = migrate_vma(&uvm_migrate_vma_ops, args->vma, args->start, args->end, args->src, args->dst, state);
|
||||
@@ -685,7 +683,7 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
|
||||
if (ret < 0)
|
||||
return errno_to_nv_status(ret);
|
||||
|
||||
uvm_migrate_vma_alloc_and_copy(args, state);
|
||||
migrate_vma_alloc_and_copy(args, state);
|
||||
if (state->status == NV_OK) {
|
||||
migrate_vma_pages(args);
|
||||
uvm_migrate_vma_finalize_and_map(args, state);
|
||||
|
||||
@@ -150,23 +150,6 @@ struct migrate_vma {
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
};
|
||||
|
||||
void uvm_migrate_vma_alloc_and_copy_helper(struct vm_area_struct *vma,
|
||||
const unsigned long *src,
|
||||
unsigned long *dst,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
void *private);
|
||||
|
||||
void uvm_migrate_vma_finalize_and_map_helper(struct vm_area_struct *vma,
|
||||
const unsigned long *src,
|
||||
const unsigned long *dst,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
void *private);
|
||||
#else
|
||||
void uvm_migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_state_t *state);
|
||||
void uvm_migrate_vma_finalize_and_map(struct migrate_vma *args, migrate_vma_state_t *state);
|
||||
#endif // CONFIG_MIGRATE_VMA_HELPER
|
||||
|
||||
// Populates the given VA range and tries to migrate all the pages to dst_id. If
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user