mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-28 03:59:48 +00:00
Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6dd092ddb7 | ||
|
|
4397463e73 | ||
|
|
e598191e8e | ||
|
|
1dc88ff75e | ||
|
|
811073c51e | ||
|
|
dac2350c7f | ||
|
|
9594cc0169 | ||
|
|
5f40a5aee5 | ||
|
|
758b4ee818 |
55
CHANGELOG.md
55
CHANGELOG.md
@@ -1,7 +1,60 @@
|
||||
# Changelog
|
||||
|
||||
## Release 530 Entries
|
||||
|
||||
### [530.41.03] 2023-03-23
|
||||
|
||||
### [530.30.02] 2023-02-28
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Add support for resizable BAR on Linux when NVreg_EnableResizableBar=1 module param is set. [#3](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/3) by @sjkelly
|
||||
|
||||
#### Added
|
||||
|
||||
- Support for power management features like Suspend, Hibernate and Resume.
|
||||
|
||||
## Release 525 Entries
|
||||
|
||||
### [525.89.02] 2023-02-08
|
||||
|
||||
### [525.85.12] 2023-01-30
|
||||
|
||||
### [525.85.05] 2023-01-19
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix build problems with Clang 15.0, [#377](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/377) by @ptr1337
|
||||
|
||||
### [525.78.01] 2023-01-05
|
||||
|
||||
### [525.60.13] 2022-12-05
|
||||
|
||||
### [525.60.11] 2022-11-28
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed nvenc compatibility with usermode clients [#104](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/104)
|
||||
|
||||
### [525.53] 2022-11-10
|
||||
|
||||
#### Changed
|
||||
|
||||
- GSP firmware is now distributed as multiple firmware files: this release has `gsp_tu10x.bin` and `gsp_ad10x.bin` replacing `gsp.bin` from previous releases.
|
||||
- Each file is named after a GPU architecture and supports GPUs from one or more architectures. This allows GSP firmware to better leverage each architecture's capabilities.
|
||||
- The .run installer will continue to install firmware to `/lib/firmware/nvidia/<version>` and the `nvidia.ko` kernel module will load the appropriate firmware for each GPU at runtime.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Add support for IBT (indirect branch tracking) on supported platforms, [#256](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/256) by @rnd-ash
|
||||
- Return EINVAL when [failing to] allocating memory, [#280](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/280) by @YusufKhan-gamedev
|
||||
- Fix various typos in nvidia/src/kernel, [#16](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/16) by @alexisgeoffrey
|
||||
- Added support for rotation in X11, Quadro Sync, Stereo, and YUV 4:2:0 on Turing.
|
||||
|
||||
## Release 520 Entries
|
||||
|
||||
### [520.61.07] 2022-10-20
|
||||
|
||||
### [520.56.06] 2022-10-12
|
||||
|
||||
#### Added
|
||||
@@ -29,6 +82,8 @@
|
||||
- Improved compatibility with new Linux kernel releases
|
||||
- Fixed possible excessive GPU power draw on an idle X11 or Wayland desktop when driving high resolutions or refresh rates
|
||||
|
||||
### [515.65.07] 2022-10-19
|
||||
|
||||
### [515.65.01] 2022-08-02
|
||||
|
||||
#### Fixed
|
||||
|
||||
62
README.md
62
README.md
@@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 520.56.06.
|
||||
version 530.41.03.
|
||||
|
||||
|
||||
## How to Build
|
||||
@@ -15,9 +15,9 @@ as root:
|
||||
|
||||
make modules_install -j$(nproc)
|
||||
|
||||
Note that the kernel modules built here must be used with gsp.bin
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
520.56.06 driver release. This can be achieved by installing
|
||||
530.41.03 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@@ -167,7 +167,7 @@ for the target kernel.
|
||||
## Compatible GPUs
|
||||
|
||||
The open-gpu-kernel-modules can be used on any Turing or later GPU
|
||||
(see the table below). However, in the 520.56.06 release,
|
||||
(see the table below). However, in the 530.41.03 release,
|
||||
GeForce and Workstation support is still considered alpha-quality.
|
||||
|
||||
To enable use of the open kernel modules on GeForce and Workstation GPUs,
|
||||
@@ -175,7 +175,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
|
||||
parameter to 1. For more details, see the NVIDIA GPU driver end user
|
||||
README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/520.56.06/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/530.41.03/README/kernel_open.html
|
||||
|
||||
In the below table, if three IDs are listed, the first is the PCI Device
|
||||
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
|
||||
@@ -645,13 +645,23 @@ Subsystem Device ID.
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 147F |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1622 |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1623 |
|
||||
| NVIDIA PG506-242 | 20B3 10DE 14A7 |
|
||||
| NVIDIA PG506-243 | 20B3 10DE 14A8 |
|
||||
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A7 |
|
||||
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A8 |
|
||||
| NVIDIA A100 80GB PCIe | 20B5 10DE 1533 |
|
||||
| NVIDIA A100 80GB PCIe | 20B5 10DE 1642 |
|
||||
| NVIDIA PG506-232 | 20B6 10DE 1492 |
|
||||
| NVIDIA A30 | 20B7 10DE 1532 |
|
||||
| NVIDIA A100-PCIE-40GB | 20F1 10DE 145F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179C |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179D |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179E |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A0 |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A1 |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A2 |
|
||||
| NVIDIA A800 80GB PCIe | 20F5 10DE 1799 |
|
||||
| NVIDIA A800 80GB PCIe LC | 20F5 10DE 179A |
|
||||
| NVIDIA GeForce GTX 1660 Ti | 2182 |
|
||||
| NVIDIA GeForce GTX 1660 | 2184 |
|
||||
| NVIDIA GeForce GTX 1650 SUPER | 2187 |
|
||||
@@ -710,7 +720,13 @@ Subsystem Device ID.
|
||||
| NVIDIA A10 | 2236 10DE 1482 |
|
||||
| NVIDIA A10G | 2237 10DE 152F |
|
||||
| NVIDIA A10M | 2238 10DE 1677 |
|
||||
| NVIDIA H800 PCIe | 2322 10DE 17A4 |
|
||||
| NVIDIA H800 | 2324 10DE 17A6 |
|
||||
| NVIDIA H800 | 2324 10DE 17A8 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
|
||||
| NVIDIA H100 PCIe | 2331 10DE 1626 |
|
||||
| NVIDIA H100 | 2339 10DE 17FC |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 2414 |
|
||||
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
|
||||
| NVIDIA RTX A5500 Laptop GPU | 2438 |
|
||||
@@ -761,6 +777,7 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX A2000 12GB | 2571 103C 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 10DE 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 17AA 1611 |
|
||||
| NVIDIA GeForce RTX 3050 | 2582 |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25A0 |
|
||||
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 8928 |
|
||||
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 89F9 |
|
||||
@@ -773,6 +790,9 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 2050 | 25A7 |
|
||||
| NVIDIA GeForce RTX 2050 | 25A9 |
|
||||
| NVIDIA GeForce MX570 A | 25AA |
|
||||
| NVIDIA GeForce RTX 3050 4GB Laptop GPU | 25AB |
|
||||
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25AC |
|
||||
| NVIDIA GeForce RTX 2050 | 25AD |
|
||||
| NVIDIA A16 | 25B6 10DE 14A9 |
|
||||
| NVIDIA A2 | 25B6 10DE 157E |
|
||||
| NVIDIA RTX A2000 Laptop GPU | 25B8 |
|
||||
@@ -782,5 +802,33 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25E0 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E2 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E5 |
|
||||
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25EC |
|
||||
| NVIDIA GeForce RTX 2050 | 25ED |
|
||||
| NVIDIA RTX A1000 Embedded GPU | 25F9 |
|
||||
| NVIDIA RTX A2000 Embedded GPU | 25FA |
|
||||
| NVIDIA RTX A500 Embedded GPU | 25FB |
|
||||
| NVIDIA GeForce RTX 4090 | 2684 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 1028 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 17AA 16A1 |
|
||||
| NVIDIA L40 | 26B5 10DE 169D |
|
||||
| NVIDIA L40 | 26B5 10DE 17DA |
|
||||
| NVIDIA GeForce RTX 4080 | 2704 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2757 |
|
||||
| NVIDIA GeForce RTX 4070 Ti | 2782 |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27A0 |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 1028 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 10DE 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 17AA 16FA |
|
||||
| NVIDIA L4 | 27B8 10DE 16CA |
|
||||
| NVIDIA L4 | 27B8 10DE 16EE |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27E0 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2820 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2860 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
|
||||
|
||||
@@ -70,9 +70,13 @@ $(foreach _module, $(NV_KERNEL_MODULES), \
|
||||
|
||||
EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"520.56.06\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"530.41.03\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
endif
|
||||
|
||||
EXTRA_CFLAGS += -Wno-unused-function
|
||||
|
||||
@@ -229,6 +233,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_ioctl.h \
|
||||
drm/drm_device.h \
|
||||
drm/drm_mode_config.h \
|
||||
drm/drm_modeset_lock.h \
|
||||
dt-bindings/interconnect/tegra_icc_id.h \
|
||||
generated/autoconf.h \
|
||||
generated/compile.h \
|
||||
@@ -243,6 +248,8 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/log2.h \
|
||||
linux/of.h \
|
||||
linux/bug.h \
|
||||
linux/sched.h \
|
||||
linux/sched/mm.h \
|
||||
linux/sched/signal.h \
|
||||
linux/sched/task.h \
|
||||
linux/sched/task_stack.h \
|
||||
@@ -256,6 +263,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/platform/tegra/dce/dce-client-ipc.h \
|
||||
linux/nvhost.h \
|
||||
linux/nvhost_t194.h \
|
||||
linux/host1x-next.h \
|
||||
asm/book3s/64/hash-64k.h \
|
||||
asm/set_memory.h \
|
||||
asm/prom.h \
|
||||
@@ -286,7 +294,10 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/ioasid.h \
|
||||
linux/stdarg.h \
|
||||
linux/iosys-map.h \
|
||||
asm/coco.h
|
||||
asm/coco.h \
|
||||
linux/vfio_pci_core.h \
|
||||
soc/tegra/bpmp-abi.h \
|
||||
soc/tegra/bpmp.h
|
||||
|
||||
# Filename to store the define for the header in $(1); this is only consumed by
|
||||
# the rule below that concatenates all of these together.
|
||||
|
||||
@@ -242,7 +242,7 @@
|
||||
#endif
|
||||
|
||||
/* For verification-only features not intended to be included in normal drivers */
|
||||
#if (defined(NV_MODS) || defined(NV_GSP_MODS)) && defined(DEBUG) && !defined(DISABLE_VERIF_FEATURES)
|
||||
#if defined(ENABLE_VERIF_FEATURES)
|
||||
#define NV_VERIF_FEATURES
|
||||
#endif
|
||||
|
||||
@@ -276,12 +276,6 @@
|
||||
#define NV_IS_MODS 0
|
||||
#endif
|
||||
|
||||
#if defined(NV_GSP_MODS)
|
||||
#define NV_IS_GSP_MODS 1
|
||||
#else
|
||||
#define NV_IS_GSP_MODS 0
|
||||
#endif
|
||||
|
||||
#if defined(NV_WINDOWS)
|
||||
#define NVOS_IS_WINDOWS 1
|
||||
#else
|
||||
|
||||
132
kernel-open/common/inc/nv-firmware.h
Normal file
132
kernel-open/common/inc/nv-firmware.h
Normal file
@@ -0,0 +1,132 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NV_FIRMWARE_H
|
||||
#define NV_FIRMWARE_H
|
||||
|
||||
|
||||
|
||||
#include <nvtypes.h>
|
||||
#include <nvmisc.h>
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_TYPE_GSP,
|
||||
NV_FIRMWARE_TYPE_GSP_LOG
|
||||
} nv_firmware_type_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_CHIP_FAMILY_NULL = 0,
|
||||
NV_FIRMWARE_CHIP_FAMILY_TU10X = 1,
|
||||
NV_FIRMWARE_CHIP_FAMILY_TU11X = 2,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GA100 = 3,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GA10X = 4,
|
||||
NV_FIRMWARE_CHIP_FAMILY_AD10X = 5,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GH100 = 6,
|
||||
NV_FIRMWARE_CHIP_FAMILY_END,
|
||||
} nv_firmware_chip_family_t;
|
||||
|
||||
static inline const char *nv_firmware_chip_family_to_string(
|
||||
nv_firmware_chip_family_t fw_chip_family
|
||||
)
|
||||
{
|
||||
switch (fw_chip_family) {
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: return "gh100";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: return "ad10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X: return "ga10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: return "ga100";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: return "tu11x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X: return "tu10x";
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
return NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// The includer (presumably nv.c) may optionally define
|
||||
// NV_FIRMWARE_PATH_FOR_FILENAME(filename)
|
||||
// to return a string "path" given a gsp_*.bin or gsp_log_*.bin filename.
|
||||
//
|
||||
// The function nv_firmware_path will then be available.
|
||||
#if defined(NV_FIRMWARE_PATH_FOR_FILENAME)
|
||||
static inline const char *nv_firmware_path(
|
||||
nv_firmware_type_t fw_type,
|
||||
nv_firmware_chip_family_t fw_chip_family
|
||||
)
|
||||
{
|
||||
if (fw_type == NV_FIRMWARE_TYPE_GSP)
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_ga10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_tu10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
else if (fw_type == NV_FIRMWARE_TYPE_GSP_LOG)
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_ga10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_tu10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
#endif // defined(NV_FIRMWARE_PATH_FOR_FILENAME)
|
||||
|
||||
// The includer (presumably nv.c) may optionally define
|
||||
// NV_FIRMWARE_DECLARE_GSP_FILENAME(filename)
|
||||
// which will then be invoked (at the top-level) for each
|
||||
// gsp_*.bin (but not gsp_log_*.bin)
|
||||
#if defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
|
||||
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_ga10x.bin")
|
||||
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_tu10x.bin")
|
||||
#endif // defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
|
||||
|
||||
#endif // NV_FIRMWARE_DECLARE_GSP_FILENAME
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-22 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -91,6 +91,6 @@ static inline void _nv_hash_init(struct hlist_head *ht, unsigned int sz)
|
||||
* @key: the key of the objects to iterate over
|
||||
*/
|
||||
#define nv_hash_for_each_possible(name, obj, member, key) \
|
||||
nv_hlist_for_each_entry(obj, &name[NV_HASH_MIN(key, NV_HASH_BITS(name))], member)
|
||||
hlist_for_each_entry(obj, &name[NV_HASH_MIN(key, NV_HASH_BITS(name))], member)
|
||||
|
||||
#endif // __NV_HASH_H__
|
||||
|
||||
@@ -27,15 +27,13 @@
|
||||
#include <nv-kernel-interface-api.h>
|
||||
|
||||
// Enums for supported hypervisor types.
|
||||
// New hypervisor type should be added before OS_HYPERVISOR_CUSTOM_FORCED
|
||||
// New hypervisor type should be added before OS_HYPERVISOR_UNKNOWN
|
||||
typedef enum _HYPERVISOR_TYPE
|
||||
{
|
||||
OS_HYPERVISOR_XEN = 0,
|
||||
OS_HYPERVISOR_VMWARE,
|
||||
OS_HYPERVISOR_HYPERV,
|
||||
OS_HYPERVISOR_KVM,
|
||||
OS_HYPERVISOR_PARALLELS,
|
||||
OS_HYPERVISOR_CUSTOM_FORCED,
|
||||
OS_HYPERVISOR_UNKNOWN
|
||||
} HYPERVISOR_TYPE;
|
||||
|
||||
@@ -92,30 +90,6 @@ typedef enum VGPU_DEVICE_STATE_E
|
||||
NV_VGPU_DEV_IN_USE = 2
|
||||
} VGPU_DEVICE_STATE;
|
||||
|
||||
typedef enum _VMBUS_CMD_TYPE
|
||||
{
|
||||
VMBUS_CMD_TYPE_INVALID = 0,
|
||||
VMBUS_CMD_TYPE_SETUP = 1,
|
||||
VMBUS_CMD_TYPE_SENDPACKET = 2,
|
||||
VMBUS_CMD_TYPE_CLEANUP = 3,
|
||||
} VMBUS_CMD_TYPE;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU32 request_id;
|
||||
NvU32 page_count;
|
||||
NvU64 *pPfns;
|
||||
void *buffer;
|
||||
NvU32 bufferlen;
|
||||
} vmbus_send_packet_cmd_params;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU32 override_sint;
|
||||
NvU8 *nv_guid;
|
||||
} vmbus_setup_cmd_params;
|
||||
|
||||
/*
|
||||
* Function prototypes
|
||||
*/
|
||||
|
||||
@@ -104,7 +104,7 @@ typedef struct nv_ioctl_rm_api_version
|
||||
|
||||
#define NV_RM_API_VERSION_CMD_STRICT 0
|
||||
#define NV_RM_API_VERSION_CMD_RELAXED '1'
|
||||
#define NV_RM_API_VERSION_CMD_OVERRIDE '2'
|
||||
#define NV_RM_API_VERSION_CMD_QUERY '2'
|
||||
|
||||
#define NV_RM_API_VERSION_REPLY_UNRECOGNIZED 0
|
||||
#define NV_RM_API_VERSION_REPLY_RECOGNIZED 1
|
||||
|
||||
@@ -115,11 +115,6 @@ struct nv_kthread_q_item
|
||||
void *function_args;
|
||||
};
|
||||
|
||||
#if defined(NV_KTHREAD_CREATE_ON_NODE_PRESENT)
|
||||
#define NV_KTHREAD_Q_SUPPORTS_AFFINITY() 1
|
||||
#else
|
||||
#define NV_KTHREAD_Q_SUPPORTS_AFFINITY() 0
|
||||
#endif
|
||||
|
||||
#ifndef NUMA_NO_NODE
|
||||
#define NUMA_NO_NODE (-1)
|
||||
@@ -142,18 +137,12 @@ struct nv_kthread_q_item
|
||||
//
|
||||
// A short prefix of the qname arg will show up in []'s, via the ps(1) utility.
|
||||
//
|
||||
// The kernel thread stack is preferably allocated on the specified NUMA node if
|
||||
// NUMA-affinity (NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1) is supported, but
|
||||
// fallback to another node is possible because kernel allocators do not
|
||||
// The kernel thread stack is preferably allocated on the specified NUMA node,
|
||||
// but fallback to another node is possible because kernel allocators do not
|
||||
// guarantee affinity. Note that NUMA-affinity applies only to
|
||||
// the kthread stack. This API does not do anything about limiting the CPU
|
||||
// affinity of the kthread. That is left to the caller.
|
||||
//
|
||||
// On kernels, which do not support NUMA-aware kthread stack allocations
|
||||
// (NV_KTHTREAD_Q_SUPPORTS_AFFINITY() == 0), the API will return -ENOTSUPP
|
||||
// if the value supplied for 'preferred_node' is anything other than
|
||||
// NV_KTHREAD_NO_NODE.
|
||||
//
|
||||
// Reusing a queue: once a queue is initialized, it must be safely shut down
|
||||
// (see "Stopping the queue(s)", below), before it can be reused. So, for
|
||||
// a simple queue use case, the following will work:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -191,13 +191,6 @@
|
||||
*/
|
||||
#define NV_CURRENT_EUID() (__kuid_val(current->cred->euid))
|
||||
|
||||
#if !defined(NV_KUID_T_PRESENT)
|
||||
static inline uid_t __kuid_val(uid_t uid)
|
||||
{
|
||||
return uid;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_VGA_ARB)
|
||||
#include <linux/vgaarb.h>
|
||||
#endif
|
||||
@@ -234,18 +227,6 @@ static inline uid_t __kuid_val(uid_t uid)
|
||||
#include <asm-generic/pci-dma-compat.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_EFI_ENABLED_PRESENT) && defined(NV_EFI_ENABLED_ARGUMENT_COUNT)
|
||||
#if (NV_EFI_ENABLED_ARGUMENT_COUNT == 1)
|
||||
#define NV_EFI_ENABLED() efi_enabled(EFI_BOOT)
|
||||
#else
|
||||
#error "NV_EFI_ENABLED_ARGUMENT_COUNT value unrecognized!"
|
||||
#endif
|
||||
#elif (defined(NV_EFI_ENABLED_PRESENT) || defined(efi_enabled))
|
||||
#define NV_EFI_ENABLED() efi_enabled
|
||||
#else
|
||||
#define NV_EFI_ENABLED() 0
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_CRAY_XT)
|
||||
#include <cray/cray_nvidia.h>
|
||||
NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32,
|
||||
@@ -521,7 +502,7 @@ static inline void *nv_vmalloc(unsigned long size)
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void nv_vfree(void *ptr, NvU32 size)
|
||||
static inline void nv_vfree(void *ptr, NvU64 size)
|
||||
{
|
||||
NV_MEMDBG_REMOVE(ptr, size);
|
||||
vfree(ptr);
|
||||
@@ -592,11 +573,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
{
|
||||
if (node_id < 0 || node_id >= MAX_NUMNODES)
|
||||
return NV_FALSE;
|
||||
#if defined(NV_NODE_STATES_N_MEMORY_PRESENT)
|
||||
return node_state(node_id, N_MEMORY) ? NV_TRUE : NV_FALSE;
|
||||
#else
|
||||
return node_state(node_id, N_HIGH_MEMORY) ? NV_TRUE : NV_FALSE;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NV_KMALLOC(ptr, size) \
|
||||
@@ -606,6 +583,13 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KZALLOC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kzalloc(size, NV_GFP_KERNEL); \
|
||||
if (ptr) \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KMALLOC_ATOMIC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kmalloc(size, NV_GFP_ATOMIC); \
|
||||
@@ -649,6 +633,26 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
free_pages(ptr, order); \
|
||||
}
|
||||
|
||||
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
|
||||
{
|
||||
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
|
||||
#if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(NV_PGPROT_DECRYPTED_PRESENT)
|
||||
/*
|
||||
* When AMD memory encryption is enabled, device memory mappings with the
|
||||
* C-bit set read as 0xFF, so ensure the bit is cleared for user mappings.
|
||||
*
|
||||
* If cc_mkdec() is present, then pgprot_decrypted() can't be used.
|
||||
*/
|
||||
#if defined(NV_CC_MKDEC_PRESENT)
|
||||
prot = __pgprot(__sme_clr(pgprot_val(vm_prot)));
|
||||
#else
|
||||
prot = pgprot_decrypted(prot);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return prot;
|
||||
}
|
||||
|
||||
#if defined(PAGE_KERNEL_NOENC)
|
||||
#if defined(__pgprot_mask)
|
||||
#define NV_PAGE_KERNEL_NOCACHE_NOENC __pgprot_mask(__PAGE_KERNEL_NOCACHE)
|
||||
@@ -670,7 +674,8 @@ static inline NvUPtr nv_vmap(struct page **pages, NvU32 page_count,
|
||||
#if defined(PAGE_KERNEL_NOENC)
|
||||
if (unencrypted)
|
||||
{
|
||||
prot = cached ? PAGE_KERNEL_NOENC : NV_PAGE_KERNEL_NOCACHE_NOENC;
|
||||
prot = cached ? nv_adjust_pgprot(PAGE_KERNEL_NOENC, 0) :
|
||||
nv_adjust_pgprot(NV_PAGE_KERNEL_NOCACHE_NOENC, 0);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@@ -838,10 +843,8 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
|
||||
})
|
||||
#endif
|
||||
|
||||
#if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.4.9
|
||||
#if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.18-rc1 for aarch64
|
||||
#define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_stop_and_remove_bus_device(pci_dev)
|
||||
#elif defined(NV_PCI_REMOVE_BUS_DEVICE_PRESENT) // introduced in 2.6
|
||||
#define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_remove_bus_device(pci_dev)
|
||||
#endif
|
||||
|
||||
#define NV_PRINT_AT(nv_debug_level,at) \
|
||||
@@ -957,26 +960,6 @@ static inline int nv_remap_page_range(struct vm_area_struct *vma,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
|
||||
{
|
||||
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
|
||||
#if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(NV_PGPROT_DECRYPTED_PRESENT)
|
||||
/*
|
||||
* When AMD memory encryption is enabled, device memory mappings with the
|
||||
* C-bit set read as 0xFF, so ensure the bit is cleared for user mappings.
|
||||
*
|
||||
* If cc_mkdec() is present, then pgprot_decrypted() can't be used.
|
||||
*/
|
||||
#if defined(NV_CC_MKDEC_PRESENT)
|
||||
prot = __pgprot(__sme_clr(pgprot_val(vm_prot)));
|
||||
#else
|
||||
prot = pgprot_decrypted(prot);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return prot;
|
||||
}
|
||||
|
||||
static inline int nv_io_remap_page_range(struct vm_area_struct *vma,
|
||||
NvU64 phys_addr, NvU64 size, NvU32 extra_prot)
|
||||
{
|
||||
@@ -1139,11 +1122,14 @@ static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
|
||||
{
|
||||
nvidia_stack_t *sp = NULL;
|
||||
#if defined(NVCPU_X86_64)
|
||||
sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache);
|
||||
if (sp == NULL)
|
||||
return -ENOMEM;
|
||||
sp->size = sizeof(sp->stack);
|
||||
sp->top = sp->stack + sp->size;
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache);
|
||||
if (sp == NULL)
|
||||
return -ENOMEM;
|
||||
sp->size = sizeof(sp->stack);
|
||||
sp->top = sp->stack + sp->size;
|
||||
}
|
||||
#endif
|
||||
*stack = sp;
|
||||
return 0;
|
||||
@@ -1152,7 +1138,7 @@ static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
|
||||
static inline void nv_kmem_cache_free_stack(nvidia_stack_t *stack)
|
||||
{
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (stack != NULL)
|
||||
if (stack != NULL && rm_is_altstack_in_use())
|
||||
{
|
||||
NV_KMEM_CACHE_FREE(stack, nvidia_stack_t_cache);
|
||||
}
|
||||
@@ -1197,7 +1183,7 @@ typedef struct nv_alloc_s {
|
||||
NvBool zeroed : 1;
|
||||
NvBool aliased : 1;
|
||||
NvBool user : 1;
|
||||
NvBool node0 : 1;
|
||||
NvBool node : 1;
|
||||
NvBool peer_io : 1;
|
||||
NvBool physical : 1;
|
||||
NvBool unencrypted : 1;
|
||||
@@ -1211,6 +1197,7 @@ typedef struct nv_alloc_s {
|
||||
unsigned int pid;
|
||||
struct page **user_pages;
|
||||
NvU64 guest_id; /* id of guest VM */
|
||||
NvS32 node_id; /* Node id for memory allocation when node is set in flags */
|
||||
void *import_priv;
|
||||
struct sg_table *import_sgt;
|
||||
} nv_alloc_t;
|
||||
@@ -1386,8 +1373,7 @@ typedef struct nv_dma_map_s {
|
||||
* xen_swiotlb_map_sg_attrs may try to route to the SWIOTLB. We must only use
|
||||
* single-page sg elements on Xen Server.
|
||||
*/
|
||||
#if defined(NV_SG_ALLOC_TABLE_FROM_PAGES_PRESENT) && \
|
||||
!defined(NV_DOM0_KERNEL_PRESENT)
|
||||
#if !defined(NV_DOM0_KERNEL_PRESENT)
|
||||
#define NV_ALLOC_DMA_SUBMAP_SCATTERLIST(dm, sm, i) \
|
||||
((sg_alloc_table_from_pages(&sm->sgt, \
|
||||
&dm->pages[NV_DMA_SUBMAP_IDX_TO_PAGE_IDX(i)], \
|
||||
@@ -1452,6 +1438,24 @@ struct nv_dma_device {
|
||||
NvBool nvlink;
|
||||
};
|
||||
|
||||
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
|
||||
/*
|
||||
* acpi data storage structure
|
||||
*
|
||||
* This structure retains the pointer to the device,
|
||||
* and any other baggage we want to carry along
|
||||
*
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
nvidia_stack_t *sp;
|
||||
struct acpi_device *device;
|
||||
struct acpi_handle *handle;
|
||||
void *notifier_data;
|
||||
int notify_handler_installed;
|
||||
} nv_acpi_t;
|
||||
#endif
|
||||
|
||||
/* linux-specific version of old nv_state_t */
|
||||
/* this is a general os-specific state structure. the first element *must* be
|
||||
the general state structure, for the generic unix-based code */
|
||||
@@ -1546,8 +1550,13 @@ typedef struct nv_linux_state_s {
|
||||
/* Per-device notifier block for ACPI events */
|
||||
struct notifier_block acpi_nb;
|
||||
|
||||
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
|
||||
nv_acpi_t* nv_acpi_object;
|
||||
#endif
|
||||
|
||||
/* Lock serializing ISRs for different SOC vectors */
|
||||
nv_spinlock_t soc_isr_lock;
|
||||
void *soc_bh_mutex;
|
||||
|
||||
struct nv_timer snapshot_timer;
|
||||
nv_spinlock_t snapshot_timer_lock;
|
||||
@@ -1593,24 +1602,6 @@ extern struct rw_semaphore nv_system_pm_lock;
|
||||
|
||||
extern NvBool nv_ats_supported;
|
||||
|
||||
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
|
||||
/*
|
||||
* acpi data storage structure
|
||||
*
|
||||
* This structure retains the pointer to the device,
|
||||
* and any other baggage we want to carry along
|
||||
*
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
nvidia_stack_t *sp;
|
||||
struct acpi_device *device;
|
||||
struct acpi_handle *handle;
|
||||
int notify_handler_installed;
|
||||
} nv_acpi_t;
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* file-private data
|
||||
* hide a pointer to our data structures in a file-private ptr
|
||||
@@ -1667,6 +1658,27 @@ static inline nv_linux_file_private_t *nv_get_nvlfp_from_nvfp(nv_file_private_t
|
||||
|
||||
#define NV_STATE_PTR(nvl) &(((nv_linux_state_t *)(nvl))->nv_state)
|
||||
|
||||
static inline nvidia_stack_t *nv_nvlfp_get_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which)
|
||||
{
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
down(&nvlfp->fops_sp_lock[which]);
|
||||
return nvlfp->fops_sp[which];
|
||||
}
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void nv_nvlfp_put_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which)
|
||||
{
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
up(&nvlfp->fops_sp_lock[which]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NV_ATOMIC_READ(data) atomic_read(&(data))
|
||||
#define NV_ATOMIC_SET(data,val) atomic_set(&(data), (val))
|
||||
@@ -1739,6 +1751,7 @@ static inline NV_STATUS nv_check_gpu_state(nv_state_t *nv)
|
||||
|
||||
extern NvU32 NVreg_EnableUserNUMAManagement;
|
||||
extern NvU32 NVreg_RegisterPCIDriver;
|
||||
extern NvU32 NVreg_EnableResizableBar;
|
||||
|
||||
extern NvU32 num_probed_nv_devices;
|
||||
extern NvU32 num_nv_devices;
|
||||
@@ -1895,20 +1908,12 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
|
||||
#define NV_GET_UNUSED_FD_FLAGS(flags) (-1)
|
||||
#endif
|
||||
|
||||
#if defined(NV_SET_CLOSE_ON_EXEC_PRESENT)
|
||||
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) __set_close_on_exec(fd, fdt)
|
||||
#elif defined(NV_LINUX_TIME_H_PRESENT) && defined(FD_SET)
|
||||
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) FD_SET(fd, fdt->close_on_exec)
|
||||
#else
|
||||
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) __set_bit(fd, fdt->close_on_exec)
|
||||
#endif
|
||||
|
||||
#define MODULE_BASE_NAME "nvidia"
|
||||
#define MODULE_INSTANCE_NUMBER 0
|
||||
#define MODULE_INSTANCE_STRING ""
|
||||
#define MODULE_NAME MODULE_BASE_NAME MODULE_INSTANCE_STRING
|
||||
|
||||
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32);
|
||||
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32, const char*);
|
||||
|
||||
static inline void nv_mutex_destroy(struct mutex *lock)
|
||||
{
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -73,21 +73,4 @@
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT)
|
||||
#if NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT == 3
|
||||
#define nv_hlist_for_each_entry(pos, head, member) \
|
||||
hlist_for_each_entry(pos, head, member)
|
||||
#else
|
||||
#if !defined(hlist_entry_safe)
|
||||
#define hlist_entry_safe(ptr, type, member) \
|
||||
(ptr) ? hlist_entry(ptr, type, member) : NULL
|
||||
#endif
|
||||
|
||||
#define nv_hlist_for_each_entry(pos, head, member) \
|
||||
for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member); \
|
||||
pos; \
|
||||
pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
|
||||
#endif
|
||||
#endif // NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT
|
||||
|
||||
#endif // __NV_LIST_HELPERS_H__
|
||||
|
||||
@@ -29,6 +29,25 @@
|
||||
typedef int vm_fault_t;
|
||||
#endif
|
||||
|
||||
/* pin_user_pages
|
||||
* Presence of pin_user_pages() also implies the presence of unpin-user_page().
|
||||
* Both were added in the v5.6-rc1
|
||||
*
|
||||
* pin_user_pages() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6-rc1 (2020-01-30)
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#if defined(NV_PIN_USER_PAGES_PRESENT)
|
||||
#define NV_PIN_USER_PAGES pin_user_pages
|
||||
#define NV_UNPIN_USER_PAGE unpin_user_page
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES NV_GET_USER_PAGES
|
||||
#define NV_UNPIN_USER_PAGE put_page
|
||||
#endif // NV_PIN_USER_PAGES_PRESENT
|
||||
|
||||
/* get_user_pages
|
||||
*
|
||||
* The 8-argument version of get_user_pages was deprecated by commit
|
||||
@@ -47,51 +66,57 @@ typedef int vm_fault_t;
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE)
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
|
||||
#define NV_GET_USER_PAGES get_user_pages
|
||||
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, write, force, pages, vmas) \
|
||||
get_user_pages(current, current->mm, start, nr_pages, write, force, pages, vmas)
|
||||
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages, vmas) \
|
||||
get_user_pages(current, current->mm, start, nr_pages, flags, pages, vmas)
|
||||
#else
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
static inline long NV_GET_USER_PAGES(unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS)
|
||||
return get_user_pages(current, current->mm, start, nr_pages, flags,
|
||||
pages, vmas);
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE)
|
||||
return get_user_pages(start, nr_pages, write, force, pages, vmas);
|
||||
#else
|
||||
// remaining defination(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
|
||||
return get_user_pages(start, nr_pages, flags, pages, vmas);
|
||||
#endif
|
||||
// NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE
|
||||
return get_user_pages(current, current->mm, start, nr_pages, write,
|
||||
force, pages, vmas);
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE
|
||||
}
|
||||
#endif
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS
|
||||
|
||||
/* pin_user_pages_remote
|
||||
*
|
||||
* pin_user_pages_remote() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6 (2020-01-30)
|
||||
*
|
||||
* pin_user_pages_remote() removed 'tsk' parameter by commit
|
||||
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
|
||||
* in v5.9-rc1 (2020-08-11). *
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(NV_PIN_USER_PAGES_REMOTE_PRESENT)
|
||||
#if defined (NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK)
|
||||
#define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
pin_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES_REMOTE pin_user_pages_remote
|
||||
#endif // NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES_REMOTE NV_GET_USER_PAGES_REMOTE
|
||||
#endif // NV_PIN_USER_PAGES_REMOTE_PRESENT
|
||||
|
||||
/*
|
||||
* get_user_pages_remote() was added by commit 1e9877902dc7
|
||||
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6 (2016-02-12).
|
||||
*
|
||||
* The very next commit cde70140fed8 ("mm/gup: Overload get_user_pages()
|
||||
* functions") deprecated the 8-argument version of get_user_pages for the
|
||||
* non-remote case (calling get_user_pages with current and current->mm).
|
||||
*
|
||||
* The guidelines are: call NV_GET_USER_PAGES_REMOTE if you need the 8-argument
|
||||
* version that uses something other than current and current->mm. Use
|
||||
* NV_GET_USER_PAGES if you are refering to current and current->mm.
|
||||
*
|
||||
* Note that get_user_pages_remote() requires the caller to hold a reference on
|
||||
* the task_struct (if non-NULL and if this API has tsk argument) and the mm_struct.
|
||||
* This will always be true when using current and current->mm. If the kernel passes
|
||||
@@ -113,66 +138,55 @@ typedef int vm_fault_t;
|
||||
*/
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_PRESENT)
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas)
|
||||
|
||||
#else
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk,
|
||||
struct mm_struct *mm,
|
||||
// NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
struct vm_area_struct **vmas,
|
||||
int *locked)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS)
|
||||
return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
|
||||
return get_user_pages_remote(NULL, mm, start, nr_pages, write, force,
|
||||
pages, vmas);
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED)
|
||||
return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
|
||||
pages, vmas, NULL);
|
||||
#else
|
||||
// remaining defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
|
||||
return get_user_pages_remote(mm, start, nr_pages, flags,
|
||||
pages, vmas, NULL);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif // NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED
|
||||
#else
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages
|
||||
#else
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk,
|
||||
struct mm_struct *mm,
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
struct vm_area_struct **vmas,
|
||||
int *locked)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
return get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
|
||||
return get_user_pages(NULL, mm, start, nr_pages, write, force, pages, vmas);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages(NULL, mm, start, nr_pages, flags, pages, vmas)
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE
|
||||
#endif // NV_GET_USER_PAGES_REMOTE_PRESENT
|
||||
|
||||
/*
|
||||
* The .virtual_address field was effectively renamed to .address, by these
|
||||
@@ -247,4 +261,22 @@ static inline struct rw_semaphore *nv_mmap_get_lock(struct mm_struct *mm)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
{
|
||||
#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
vm_flags_set(vma, flags);
|
||||
#else
|
||||
vma->vm_flags |= flags;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
{
|
||||
#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
vm_flags_clear(vma, flags);
|
||||
#else
|
||||
vma->vm_flags &= ~flags;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // __NV_MM_H__
|
||||
|
||||
@@ -27,16 +27,8 @@
|
||||
#include <linux/pci.h>
|
||||
#include "nv-linux.h"
|
||||
|
||||
#if defined(NV_DEV_IS_PCI_PRESENT)
|
||||
#define nv_dev_is_pci(dev) dev_is_pci(dev)
|
||||
#else
|
||||
/*
|
||||
* Non-PCI devices are only supported on kernels which expose the
|
||||
* dev_is_pci() function. For older kernels, we only support PCI
|
||||
* devices, hence returning true to take all the PCI code paths.
|
||||
*/
|
||||
#define nv_dev_is_pci(dev) (true)
|
||||
#endif
|
||||
#define NV_GPU_BAR1 1
|
||||
#define NV_GPU_BAR3 3
|
||||
|
||||
int nv_pci_register_driver(void);
|
||||
void nv_pci_unregister_driver(void);
|
||||
|
||||
@@ -78,13 +78,8 @@ static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)
|
||||
|
||||
#define NV_PGPROT_UNCACHED_DEVICE(old_prot) pgprot_noncached(old_prot)
|
||||
#if defined(NVCPU_AARCH64)
|
||||
#if defined(NV_MT_DEVICE_GRE_PRESENT)
|
||||
#define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \
|
||||
PTE_ATTRINDX(MT_DEVICE_GRE))
|
||||
#else
|
||||
#define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \
|
||||
PTE_ATTRINDX(MT_DEVICE_nGnRE))
|
||||
#endif
|
||||
#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot) \
|
||||
__pgprot_modify(old_prot, PTE_ATTRINDX_MASK, NV_PROT_WRITE_COMBINED_DEVICE)
|
||||
#define NV_PGPROT_WRITE_COMBINED(old_prot) NV_PGPROT_UNCACHED(old_prot)
|
||||
|
||||
@@ -74,21 +74,8 @@ typedef struct file_operations nv_proc_ops_t;
|
||||
__entry; \
|
||||
})
|
||||
|
||||
/*
|
||||
* proc_mkdir_mode exists in Linux 2.6.9, but isn't exported until Linux 3.0.
|
||||
* Use the older interface instead unless the newer interface is necessary.
|
||||
*/
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
# define NV_PROC_MKDIR_MODE(name, mode, parent) \
|
||||
proc_mkdir_mode(name, mode, parent)
|
||||
#else
|
||||
# define NV_PROC_MKDIR_MODE(name, mode, parent) \
|
||||
({ \
|
||||
struct proc_dir_entry *__entry; \
|
||||
__entry = create_proc_entry(name, mode, parent); \
|
||||
__entry; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#define NV_CREATE_PROC_DIR(name,parent) \
|
||||
({ \
|
||||
@@ -104,16 +91,6 @@ typedef struct file_operations nv_proc_ops_t;
|
||||
#define NV_PDE_DATA(inode) PDE_DATA(inode)
|
||||
#endif
|
||||
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
# define NV_REMOVE_PROC_ENTRY(entry) \
|
||||
proc_remove(entry);
|
||||
#else
|
||||
# define NV_REMOVE_PROC_ENTRY(entry) \
|
||||
remove_proc_entry(entry->name, entry->parent);
|
||||
#endif
|
||||
|
||||
void nv_procfs_unregister_all(struct proc_dir_entry *entry,
|
||||
struct proc_dir_entry *delimiter);
|
||||
#define NV_DEFINE_SINGLE_PROCFS_FILE_HELPER(name, lock) \
|
||||
static int nv_procfs_open_##name( \
|
||||
struct inode *inode, \
|
||||
|
||||
@@ -54,8 +54,6 @@ void nv_free_contig_pages (nv_alloc_t *);
|
||||
NV_STATUS nv_alloc_system_pages (nv_state_t *, nv_alloc_t *);
|
||||
void nv_free_system_pages (nv_alloc_t *);
|
||||
|
||||
void nv_address_space_init_once (struct address_space *mapping);
|
||||
|
||||
int nv_uvm_init (void);
|
||||
void nv_uvm_exit (void);
|
||||
NV_STATUS nv_uvm_suspend (void);
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
#include <nvstatus.h>
|
||||
#include "nv_stdarg.h"
|
||||
#include <nv-caps.h>
|
||||
#include <nv-firmware.h>
|
||||
#include <nv-ioctl.h>
|
||||
#include <nvmisc.h>
|
||||
|
||||
@@ -160,8 +161,14 @@ typedef enum _TEGRASOC_WHICH_CLK
|
||||
TEGRASOC_WHICH_CLK_MAUD,
|
||||
TEGRASOC_WHICH_CLK_AZA_2XBIT,
|
||||
TEGRASOC_WHICH_CLK_AZA_BIT,
|
||||
TEGRA234_CLK_MIPI_CAL,
|
||||
TEGRA234_CLK_UART_FST_MIPI_CAL,
|
||||
TEGRASOC_WHICH_CLK_MIPI_CAL,
|
||||
TEGRASOC_WHICH_CLK_UART_FST_MIPI_CAL,
|
||||
TEGRASOC_WHICH_CLK_SOR0_DIV,
|
||||
TEGRASOC_WHICH_CLK_DISP_ROOT,
|
||||
TEGRASOC_WHICH_CLK_HUB_ROOT,
|
||||
TEGRASOC_WHICH_CLK_PLLA_DISP,
|
||||
TEGRASOC_WHICH_CLK_PLLA_DISPHUB,
|
||||
TEGRASOC_WHICH_CLK_PLLA,
|
||||
TEGRASOC_WHICH_CLK_MAX, // TEGRASOC_WHICH_CLK_MAX is defined for boundary checks only.
|
||||
} TEGRASOC_WHICH_CLK;
|
||||
|
||||
@@ -304,10 +311,11 @@ typedef struct nv_alloc_mapping_context_s {
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_SOC_IRQ_DISPLAY_TYPE,
|
||||
NV_SOC_IRQ_DISPLAY_TYPE = 0x1,
|
||||
NV_SOC_IRQ_DPAUX_TYPE,
|
||||
NV_SOC_IRQ_GPIO_TYPE,
|
||||
NV_SOC_IRQ_HDACODEC_TYPE,
|
||||
NV_SOC_IRQ_TCPC2DISP_TYPE,
|
||||
NV_SOC_IRQ_INVALID_TYPE
|
||||
} nv_soc_irq_type_t;
|
||||
|
||||
@@ -322,6 +330,7 @@ typedef struct nv_soc_irq_info_s {
|
||||
NvU32 gpio_num;
|
||||
NvU32 dpaux_instance;
|
||||
} irq_data;
|
||||
NvS32 ref_count;
|
||||
} nv_soc_irq_info_t;
|
||||
|
||||
#define NV_MAX_SOC_IRQS 6
|
||||
@@ -368,6 +377,7 @@ typedef struct nv_state_t
|
||||
nv_aperture_t *mipical_regs;
|
||||
nv_aperture_t *fb, ud;
|
||||
nv_aperture_t *simregs;
|
||||
nv_aperture_t *emc_regs;
|
||||
|
||||
NvU32 num_dpaux_instance;
|
||||
NvU32 interrupt_line;
|
||||
@@ -376,9 +386,11 @@ typedef struct nv_state_t
|
||||
NvS32 current_soc_irq;
|
||||
NvU32 num_soc_irqs;
|
||||
NvU32 hdacodec_irq;
|
||||
NvU32 tcpc2disp_irq;
|
||||
NvU8 *soc_dcb_blob;
|
||||
NvU32 soc_dcb_size;
|
||||
NvU32 disp_sw_soc_chip_id;
|
||||
NvBool soc_is_dpalt_mode_supported;
|
||||
|
||||
NvU32 igpu_stall_irq[NV_IGPU_MAX_STALL_IRQS];
|
||||
NvU32 igpu_nonstall_irq;
|
||||
@@ -430,9 +442,6 @@ typedef struct nv_state_t
|
||||
/* Variable to force allocation of 32-bit addressable memory */
|
||||
NvBool force_dma32_alloc;
|
||||
|
||||
/* Variable to track if device has entered dynamic power state */
|
||||
NvBool dynamic_power_entered;
|
||||
|
||||
/* PCI power state should be D0 during system suspend */
|
||||
NvBool d0_state_in_suspend;
|
||||
|
||||
@@ -465,6 +474,9 @@ typedef struct nv_state_t
|
||||
/* Check if NVPCF DSM function is implemented under NVPCF or GPU device scope */
|
||||
NvBool nvpcf_dsm_in_gpu_scope;
|
||||
|
||||
/* Bool to check if the device received a shutdown notification */
|
||||
NvBool is_shutdown;
|
||||
|
||||
} nv_state_t;
|
||||
|
||||
// These define need to be in sync with defines in system.h
|
||||
@@ -473,6 +485,10 @@ typedef struct nv_state_t
|
||||
#define OS_TYPE_SUNOS 0x3
|
||||
#define OS_TYPE_VMWARE 0x4
|
||||
|
||||
#define NVFP_TYPE_NONE 0x0
|
||||
#define NVFP_TYPE_REFCOUNTED 0x1
|
||||
#define NVFP_TYPE_REGISTERED 0x2
|
||||
|
||||
struct nv_file_private_t
|
||||
{
|
||||
NvHandle *handles;
|
||||
@@ -482,6 +498,7 @@ struct nv_file_private_t
|
||||
|
||||
nv_file_private_t *ctl_nvfp;
|
||||
void *ctl_nvfp_priv;
|
||||
NvU32 register_or_refcount;
|
||||
};
|
||||
|
||||
// Forward define the gpu ops structures
|
||||
@@ -513,8 +530,9 @@ typedef struct UvmGpuChannelResourceBindParams_tag *nvgpuChannelResourceBindPar
|
||||
typedef struct UvmGpuPagingChannelAllocParams_tag nvgpuPagingChannelAllocParams_t;
|
||||
typedef struct UvmGpuPagingChannel_tag *nvgpuPagingChannelHandle_t;
|
||||
typedef struct UvmGpuPagingChannelInfo_tag *nvgpuPagingChannelInfo_t;
|
||||
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU32, NvU64 *, NvU32, NvU64, NvU64);
|
||||
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64);
|
||||
typedef enum UvmPmaGpuMemoryType_tag nvgpuGpuMemoryType_t;
|
||||
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU32, NvU64 *, NvU32, NvU64, NvU64, nvgpuGpuMemoryType_t);
|
||||
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemoryType_t);
|
||||
|
||||
/*
|
||||
* flags
|
||||
@@ -566,12 +584,6 @@ typedef enum
|
||||
NV_POWER_STATE_RUNNING
|
||||
} nv_power_state_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_GSP,
|
||||
NV_FIRMWARE_GSP_LOG
|
||||
} nv_firmware_t;
|
||||
|
||||
#define NV_PRIMARY_VGA(nv) ((nv)->primary_vga)
|
||||
|
||||
#define NV_IS_CTL_DEVICE(nv) ((nv)->flags & NV_FLAG_CONTROL)
|
||||
@@ -587,12 +599,6 @@ typedef enum
|
||||
#define NV_SOC_IS_ISO_IOMMU_PRESENT(nv) \
|
||||
((nv)->iso_iommu_present)
|
||||
|
||||
/*
|
||||
* NVIDIA ACPI event ID to be passed into the core NVIDIA driver for
|
||||
* AC/DC event.
|
||||
*/
|
||||
#define NV_SYSTEM_ACPI_BATTERY_POWER_EVENT 0x8002
|
||||
|
||||
/*
|
||||
* GPU add/remove events
|
||||
*/
|
||||
@@ -604,8 +610,6 @@ typedef enum
|
||||
* to core NVIDIA driver for ACPI events.
|
||||
*/
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_DISPLAY_SWITCH_DEFAULT 0
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_POWER_EVENT_AC 0
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_POWER_EVENT_BATTERY 1
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_DOCK_EVENT_UNDOCKED 0
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_DOCK_EVENT_DOCKED 1
|
||||
|
||||
@@ -616,14 +620,18 @@ typedef enum
|
||||
#define NV_EVAL_ACPI_METHOD_NVIF 0x01
|
||||
#define NV_EVAL_ACPI_METHOD_WMMX 0x02
|
||||
|
||||
#define NV_I2C_CMD_READ 1
|
||||
#define NV_I2C_CMD_WRITE 2
|
||||
#define NV_I2C_CMD_SMBUS_READ 3
|
||||
#define NV_I2C_CMD_SMBUS_WRITE 4
|
||||
#define NV_I2C_CMD_SMBUS_QUICK_WRITE 5
|
||||
#define NV_I2C_CMD_SMBUS_QUICK_READ 6
|
||||
#define NV_I2C_CMD_SMBUS_BLOCK_READ 7
|
||||
#define NV_I2C_CMD_SMBUS_BLOCK_WRITE 8
|
||||
typedef enum {
|
||||
NV_I2C_CMD_READ = 1,
|
||||
NV_I2C_CMD_WRITE,
|
||||
NV_I2C_CMD_SMBUS_READ,
|
||||
NV_I2C_CMD_SMBUS_WRITE,
|
||||
NV_I2C_CMD_SMBUS_QUICK_WRITE,
|
||||
NV_I2C_CMD_SMBUS_QUICK_READ,
|
||||
NV_I2C_CMD_SMBUS_BLOCK_READ,
|
||||
NV_I2C_CMD_SMBUS_BLOCK_WRITE,
|
||||
NV_I2C_CMD_BLOCK_READ,
|
||||
NV_I2C_CMD_BLOCK_WRITE
|
||||
} nv_i2c_cmd_t;
|
||||
|
||||
// Flags needed by OSAllocPagesNode
|
||||
#define NV_ALLOC_PAGES_NODE_NONE 0x0
|
||||
@@ -636,27 +644,38 @@ typedef enum
|
||||
#define NV_GET_NV_STATE(pGpu) \
|
||||
(nv_state_t *)((pGpu) ? (pGpu)->pOsGpuInfo : NULL)
|
||||
|
||||
#define IS_REG_OFFSET(nv, offset, length) \
|
||||
(((offset) >= (nv)->regs->cpu_address) && \
|
||||
(((offset) + ((length)-1)) <= \
|
||||
(nv)->regs->cpu_address + ((nv)->regs->size-1)))
|
||||
static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((offset >= nv->regs->cpu_address) &&
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
|
||||
}
|
||||
|
||||
#define IS_FB_OFFSET(nv, offset, length) \
|
||||
(((nv)->fb) && ((offset) >= (nv)->fb->cpu_address) && \
|
||||
(((offset) + ((length)-1)) <= (nv)->fb->cpu_address + ((nv)->fb->size-1)))
|
||||
static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((nv->fb) && (nv->fb->size != 0) &&
|
||||
(offset >= nv->fb->cpu_address) &&
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
|
||||
}
|
||||
|
||||
#define IS_UD_OFFSET(nv, offset, length) \
|
||||
(((nv)->ud.cpu_address != 0) && ((nv)->ud.size != 0) && \
|
||||
((offset) >= (nv)->ud.cpu_address) && \
|
||||
(((offset) + ((length)-1)) <= (nv)->ud.cpu_address + ((nv)->ud.size-1)))
|
||||
static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
|
||||
(offset >= nv->ud.cpu_address) &&
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
|
||||
}
|
||||
|
||||
#define IS_IMEM_OFFSET(nv, offset, length) \
|
||||
(((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) && \
|
||||
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) && \
|
||||
((offset) >= (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) && \
|
||||
(((offset) + ((length) - 1)) <= \
|
||||
(nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + \
|
||||
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))
|
||||
static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
|
||||
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
|
||||
(offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
|
||||
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
|
||||
}
|
||||
|
||||
#define NV_RM_MAX_MSIX_LINES 8
|
||||
|
||||
@@ -725,7 +744,7 @@ nv_state_t* NV_API_CALL nv_get_ctl_state (void);
|
||||
void NV_API_CALL nv_set_dma_address_size (nv_state_t *, NvU32 );
|
||||
|
||||
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU32, NvU32, NvU64, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvBool, NvU32, NvBool, NvBool, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_free_pages (nv_state_t *, NvU32, NvBool, NvU32, void *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_register_user_pages (nv_state_t *, NvU64, NvU64 *, void *, void **);
|
||||
@@ -787,7 +806,7 @@ NV_STATUS NV_API_CALL nv_pci_trigger_recovery (nv_state_t *);
|
||||
NvBool NV_API_CALL nv_requires_dma_remap (nv_state_t *);
|
||||
|
||||
NvBool NV_API_CALL nv_is_rm_firmware_active(nv_state_t *);
|
||||
const void*NV_API_CALL nv_get_firmware(nv_state_t *, nv_firmware_t, const void **, NvU32 *);
|
||||
const void*NV_API_CALL nv_get_firmware(nv_state_t *, nv_firmware_type_t, nv_firmware_chip_family_t, const void **, NvU32 *);
|
||||
void NV_API_CALL nv_put_firmware(const void *);
|
||||
|
||||
nv_file_private_t* NV_API_CALL nv_get_file_private(NvS32, NvBool, void **);
|
||||
@@ -828,6 +847,7 @@ NV_STATUS NV_API_CALL nv_acquire_fabric_mgmt_cap (int, int*);
|
||||
int NV_API_CALL nv_cap_drv_init(void);
|
||||
void NV_API_CALL nv_cap_drv_exit(void);
|
||||
NvBool NV_API_CALL nv_is_gpu_accessible(nv_state_t *);
|
||||
NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
|
||||
|
||||
NvU32 NV_API_CALL nv_get_os_type(void);
|
||||
|
||||
@@ -916,11 +936,11 @@ NvBool NV_API_CALL rm_is_supported_pci_device(NvU8 pci_class,
|
||||
|
||||
void NV_API_CALL rm_i2c_remove_adapters (nvidia_stack_t *, nv_state_t *);
|
||||
NvBool NV_API_CALL rm_i2c_is_smbus_capable (nvidia_stack_t *, nv_state_t *, void *);
|
||||
NV_STATUS NV_API_CALL rm_i2c_transfer (nvidia_stack_t *, nv_state_t *, void *, NvU8, NvU8, NvU8, NvU32, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_i2c_transfer (nvidia_stack_t *, nv_state_t *, void *, nv_i2c_cmd_t, NvU8, NvU8, NvU32, NvU8 *);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_perform_version_check (nvidia_stack_t *, void *, NvU32);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_system_event (nvidia_stack_t *, NvU32, NvU32);
|
||||
void NV_API_CALL rm_power_source_change_event (nvidia_stack_t *, NvU32);
|
||||
|
||||
void NV_API_CALL rm_disable_gpu_state_persistence (nvidia_stack_t *sp, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU64 *, NvU64 *, NvU64 *, NvU64 *, NvU64, NvU64, NvU64, NvU64, void (*)(void *), void *);
|
||||
@@ -943,7 +963,7 @@ NV_STATUS NV_API_CALL rm_log_gpu_crash (nv_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_kernel_rmapi_op(nvidia_stack_t *sp, void *ops_cmd);
|
||||
NvBool NV_API_CALL rm_get_device_remove_flag(nvidia_stack_t *sp, NvU32 gpu_id);
|
||||
NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults_unlocked(nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_handle_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
NvBool NV_API_CALL rm_gpu_need_4k_page_isolation(nv_state_t *);
|
||||
NvBool NV_API_CALL rm_is_chipset_io_coherent(nv_stack_t *);
|
||||
NvBool NV_API_CALL rm_init_event_locks(nvidia_stack_t *, nv_state_t *);
|
||||
@@ -969,12 +989,14 @@ const char* NV_API_CALL rm_get_dynamic_power_management_status(nvidia_stack_t *,
|
||||
const char* NV_API_CALL rm_get_gpu_gcx_support(nvidia_stack_t *, nv_state_t *, NvBool);
|
||||
|
||||
void NV_API_CALL rm_acpi_notify(nvidia_stack_t *, nv_state_t *, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_get_clientnvpcf_power_limits(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *);
|
||||
void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
|
||||
|
||||
NvBool NV_API_CALL rm_is_altstack_in_use(void);
|
||||
|
||||
/* vGPU VFIO specific functions */
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32, NvBool *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 **, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32);
|
||||
@@ -987,6 +1009,10 @@ NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_acces
|
||||
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);
|
||||
void NV_API_CALL nv_flush_coherent_cpu_cache_range(nv_state_t *nv, NvU64 cpu_virtual, NvU64 size);
|
||||
|
||||
#if defined(NV_VMWARE)
|
||||
const void* NV_API_CALL rm_get_firmware(nv_firmware_type_t fw_type, const void **fw_buf, NvU32 *fw_size);
|
||||
#endif
|
||||
|
||||
/* Callbacks should occur roughly every 10ms. */
|
||||
#define NV_SNAPSHOT_TIMER_HZ 100
|
||||
void NV_API_CALL nv_start_snapshot_timer(void (*snapshot_callback)(void *context));
|
||||
@@ -998,6 +1024,16 @@ static inline const NvU8 *nv_get_cached_uuid(nv_state_t *nv)
|
||||
return nv->nv_uuid_cache.valid ? nv->nv_uuid_cache.uuid : NULL;
|
||||
}
|
||||
|
||||
/* nano second resolution timer callback structure */
|
||||
typedef struct nv_nano_timer nv_nano_timer_t;
|
||||
|
||||
/* nano timer functions */
|
||||
void NV_API_CALL nv_create_nano_timer(nv_state_t *, void *pTmrEvent, nv_nano_timer_t **);
|
||||
void NV_API_CALL nv_start_nano_timer(nv_state_t *nv, nv_nano_timer_t *, NvU64 timens);
|
||||
NV_STATUS NV_API_CALL rm_run_nano_timer_callback(nvidia_stack_t *, nv_state_t *, void *pTmrEvent);
|
||||
void NV_API_CALL nv_cancel_nano_timer(nv_state_t *, nv_nano_timer_t *);
|
||||
void NV_API_CALL nv_destroy_nano_timer(nv_state_t *nv, nv_nano_timer_t *);
|
||||
|
||||
#if defined(NVCPU_X86_64)
|
||||
|
||||
static inline NvU64 nv_rdtsc(void)
|
||||
|
||||
@@ -331,10 +331,14 @@ typedef NV_STATUS (*uvmPmaEvictPagesCallback)(void *callbackData,
|
||||
NvU64 *pPages,
|
||||
NvU32 count,
|
||||
NvU64 physBegin,
|
||||
NvU64 physEnd);
|
||||
NvU64 physEnd,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type);
|
||||
|
||||
// Mirrors pmaEvictRangeCb_t, see its documentation in pma.h.
|
||||
typedef NV_STATUS (*uvmPmaEvictRangeCallback)(void *callbackData, NvU64 physBegin, NvU64 physEnd);
|
||||
typedef NV_STATUS (*uvmPmaEvictRangeCallback)(void *callbackData,
|
||||
NvU64 physBegin,
|
||||
NvU64 physEnd,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfacePmaRegisterEvictionCallbacks
|
||||
@@ -917,6 +921,23 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
void *pFaultBuffer,
|
||||
NvU32 *numFaults);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceFlushReplayableFaultBuffer
|
||||
|
||||
This function sends an RPC to GSP in order to flush the HW replayable fault buffer.
|
||||
|
||||
NOTES:
|
||||
- This function DOES NOT acquire the RM API or GPU locks. That is because
|
||||
it is called during fault servicing, which could produce deadlocks.
|
||||
|
||||
Arguments:
|
||||
device[IN] - Device handle associated with the gpu
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceInitAccessCntrInfo
|
||||
|
||||
@@ -1050,11 +1071,13 @@ void nvUvmInterfaceP2pObjectDestroy(uvmGpuSessionHandle session,
|
||||
hMemory[IN] - Memory handle.
|
||||
offset [IN] - Offset from the beginning of the allocation
|
||||
where PTE mappings should begin.
|
||||
Should be aligned with pagesize associated
|
||||
Should be aligned with mappingPagesize
|
||||
in gpuExternalMappingInfo associated
|
||||
with the allocation.
|
||||
size [IN] - Length of the allocation for which PTEs
|
||||
should be built.
|
||||
Should be aligned with pagesize associated
|
||||
Should be aligned with mappingPagesize
|
||||
in gpuExternalMappingInfo associated
|
||||
with the allocation.
|
||||
size = 0 will be interpreted as the total size
|
||||
of the allocation.
|
||||
@@ -1360,8 +1383,6 @@ void nvUvmInterfacePagingChannelsUnmap(uvmGpuAddressSpaceHandle srcVaSpace,
|
||||
a. pre-allocated stack
|
||||
b. the fact that internal RPC infrastructure doesn't acquire GPU lock.
|
||||
Therefore, locking is the caller's responsibility.
|
||||
- This function DOES NOT sleep (does not allocate memory or acquire locks)
|
||||
so it can be invoked while holding a spinlock.
|
||||
|
||||
Arguments:
|
||||
channel[IN] - paging channel handle obtained via
|
||||
|
||||
@@ -110,7 +110,7 @@ typedef struct UvmGpuMemoryInfo_tag
|
||||
NvBool deviceDescendant;
|
||||
|
||||
// Out: Page size associated with the phys alloc.
|
||||
NvU32 pageSize;
|
||||
NvU64 pageSize;
|
||||
|
||||
// Out: Set to TRUE, if the allocation is contiguous.
|
||||
NvBool contig;
|
||||
@@ -306,6 +306,7 @@ typedef struct UvmGpuChannelAllocParams_tag
|
||||
|
||||
// interpreted as UVM_GPU_CHANNEL_ENGINE_TYPE
|
||||
NvU32 engineType;
|
||||
|
||||
} UvmGpuChannelAllocParams;
|
||||
|
||||
typedef struct UvmGpuPagingChannelAllocParams_tag
|
||||
@@ -371,7 +372,6 @@ typedef enum
|
||||
UVM_LINK_TYPE_NVLINK_2,
|
||||
UVM_LINK_TYPE_NVLINK_3,
|
||||
UVM_LINK_TYPE_NVLINK_4,
|
||||
UVM_LINK_TYPE_C2C,
|
||||
} UVM_LINK_TYPE;
|
||||
|
||||
typedef struct UvmGpuCaps_tag
|
||||
@@ -409,7 +409,7 @@ typedef struct UvmGpuCaps_tag
|
||||
|
||||
typedef struct UvmGpuAddressSpaceInfo_tag
|
||||
{
|
||||
NvU32 bigPageSize;
|
||||
NvU64 bigPageSize;
|
||||
|
||||
NvBool atsEnabled;
|
||||
|
||||
@@ -430,7 +430,7 @@ typedef struct UvmGpuAddressSpaceInfo_tag
|
||||
typedef struct UvmGpuAllocInfo_tag
|
||||
{
|
||||
NvU64 gpuPhysOffset; // Returns gpuPhysOffset if contiguous requested
|
||||
NvU32 pageSize; // default is RM big page size - 64K or 128 K" else use 4K or 2M
|
||||
NvU64 pageSize; // default is RM big page size - 64K or 128 K" else use 4K or 2M
|
||||
NvU64 alignment; // Virtual alignment
|
||||
NvBool bContiguousPhysAlloc; // Flag to request contiguous physical allocation
|
||||
NvBool bMemGrowsDown; // Causes RM to reserve physical heap from top of FB
|
||||
@@ -516,6 +516,13 @@ typedef struct UvmGpuExternalMappingInfo_tag
|
||||
// In: Size of the buffer to store PTEs (in bytes).
|
||||
NvU64 pteBufferSize;
|
||||
|
||||
// In: Page size for mapping
|
||||
// If this field is passed as 0, the page size
|
||||
// of the allocation is used for mapping.
|
||||
// nvUvmInterfaceGetExternalAllocPtes must pass
|
||||
// this field as zero.
|
||||
NvU64 mappingPageSize;
|
||||
|
||||
// In: Pointer to a buffer to store PTEs.
|
||||
// Out: The interface will fill the buffer with PTEs
|
||||
NvU64 *pteBuffer;
|
||||
@@ -826,10 +833,13 @@ typedef struct UvmGpuFaultInfo_tag
|
||||
|
||||
// Preallocated stack for functions called from the UVM isr bottom half
|
||||
void *isr_bh_sp;
|
||||
|
||||
} nonReplayable;
|
||||
NvHandle faultBufferHandle;
|
||||
} UvmGpuFaultInfo;
|
||||
|
||||
struct Device;
|
||||
|
||||
typedef struct UvmGpuPagingChannel_tag
|
||||
{
|
||||
struct gpuDevice *device;
|
||||
@@ -837,6 +847,7 @@ typedef struct UvmGpuPagingChannel_tag
|
||||
NvHandle channelHandle;
|
||||
NvHandle errorNotifierHandle;
|
||||
void *pushStreamSp;
|
||||
struct Device *pDevice;
|
||||
} UvmGpuPagingChannel, *UvmGpuPagingChannelHandle;
|
||||
|
||||
typedef struct UvmGpuAccessCntrInfo_tag
|
||||
@@ -897,6 +908,16 @@ typedef struct UvmGpuAccessCntrConfig_tag
|
||||
NvU32 threshold;
|
||||
} UvmGpuAccessCntrConfig;
|
||||
|
||||
//
|
||||
// When modifying this enum, make sure they are compatible with the mirrored
|
||||
// MEMORY_PROTECTION enum in phys_mem_allocator.h.
|
||||
//
|
||||
typedef enum UvmPmaGpuMemoryType_tag
|
||||
{
|
||||
UVM_PMA_GPU_MEMORY_TYPE_UNPROTECTED = 0,
|
||||
UVM_PMA_GPU_MEMORY_TYPE_PROTECTED = 1
|
||||
} UVM_PMA_GPU_MEMORY_TYPE;
|
||||
|
||||
typedef UvmGpuChannelInfo gpuChannelInfo;
|
||||
typedef UvmGpuChannelAllocParams gpuChannelAllocParams;
|
||||
typedef UvmGpuCaps gpuCaps;
|
||||
|
||||
@@ -150,9 +150,7 @@ typedef struct NvSyncPointFenceRec {
|
||||
|* *|
|
||||
\***************************************************************************/
|
||||
|
||||
#if !defined(XAPIGEN) /* NvOffset is XAPIGEN builtin type, so skip typedef */
|
||||
typedef NvU64 NvOffset; /* GPU address */
|
||||
#endif
|
||||
|
||||
#define NvOffset_HI32(n) ((NvU32)(((NvU64)(n)) >> 32))
|
||||
#define NvOffset_LO32(n) ((NvU32)((NvU64)(n)))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -29,6 +29,7 @@
|
||||
#include <nvlimits.h>
|
||||
|
||||
#define NVKMS_MAX_SUBDEVICES NV_MAX_SUBDEVICES
|
||||
#define NVKMS_MAX_HEADS_PER_DISP NV_MAX_HEADS
|
||||
|
||||
#define NVKMS_LEFT 0
|
||||
#define NVKMS_RIGHT 1
|
||||
@@ -530,4 +531,78 @@ typedef struct {
|
||||
NvBool noncoherent;
|
||||
} NvKmsDispIOCoherencyModes;
|
||||
|
||||
enum NvKmsInputColorSpace {
|
||||
/* Unknown colorspace; no de-gamma will be applied */
|
||||
NVKMS_INPUT_COLORSPACE_NONE = 0,
|
||||
|
||||
/* Linear, Rec.709 [-0.5, 7.5) */
|
||||
NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR = 1,
|
||||
|
||||
/* PQ, Rec.2020 unity */
|
||||
NVKMS_INPUT_COLORSPACE_BT2100_PQ = 2,
|
||||
};
|
||||
|
||||
enum NvKmsOutputTf {
|
||||
/*
|
||||
* NVKMS itself won't apply any OETF (clients are still
|
||||
* free to provide a custom OLUT)
|
||||
*/
|
||||
NVKMS_OUTPUT_TF_NONE = 0,
|
||||
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR = 1,
|
||||
NVKMS_OUTPUT_TF_PQ = 2,
|
||||
};
|
||||
|
||||
/*!
|
||||
* HDR Static Metadata Type1 Descriptor as per CEA-861.3 spec.
|
||||
* This is expected to match exactly with the spec.
|
||||
*/
|
||||
struct NvKmsHDRStaticMetadata {
|
||||
/*!
|
||||
* Color primaries of the data.
|
||||
* These are coded as unsigned 16-bit values in units of 0.00002,
|
||||
* where 0x0000 represents zero and 0xC350 represents 1.0000.
|
||||
*/
|
||||
struct {
|
||||
NvU16 x, y;
|
||||
} displayPrimaries[3];
|
||||
|
||||
/*!
|
||||
* White point of colorspace data.
|
||||
* These are coded as unsigned 16-bit values in units of 0.00002,
|
||||
* where 0x0000 represents zero and 0xC350 represents 1.0000.
|
||||
*/
|
||||
struct {
|
||||
NvU16 x, y;
|
||||
} whitePoint;
|
||||
|
||||
/**
|
||||
* Maximum mastering display luminance.
|
||||
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
|
||||
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
|
||||
*/
|
||||
NvU16 maxDisplayMasteringLuminance;
|
||||
|
||||
/*!
|
||||
* Minimum mastering display luminance.
|
||||
* This value is coded as an unsigned 16-bit value in units of
|
||||
* 0.0001 cd/m2, where 0x0001 represents 0.0001 cd/m2 and 0xFFFF
|
||||
* represents 6.5535 cd/m2.
|
||||
*/
|
||||
NvU16 minDisplayMasteringLuminance;
|
||||
|
||||
/*!
|
||||
* Maximum content light level.
|
||||
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
|
||||
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
|
||||
*/
|
||||
NvU16 maxCLL;
|
||||
|
||||
/*!
|
||||
* Maximum frame-average light level.
|
||||
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
|
||||
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
|
||||
*/
|
||||
NvU16 maxFALL;
|
||||
};
|
||||
|
||||
#endif /* NVKMS_API_TYPES_H */
|
||||
|
||||
@@ -86,8 +86,9 @@ enum NvKmsSurfaceMemoryFormat {
|
||||
NvKmsSurfaceMemoryFormatY12___V12U12_N420 = 32,
|
||||
NvKmsSurfaceMemoryFormatY8___U8___V8_N444 = 33,
|
||||
NvKmsSurfaceMemoryFormatY8___U8___V8_N420 = 34,
|
||||
NvKmsSurfaceMemoryFormatRF16GF16BF16XF16 = 35,
|
||||
NvKmsSurfaceMemoryFormatMin = NvKmsSurfaceMemoryFormatI8,
|
||||
NvKmsSurfaceMemoryFormatMax = NvKmsSurfaceMemoryFormatY8___U8___V8_N420,
|
||||
NvKmsSurfaceMemoryFormatMax = NvKmsSurfaceMemoryFormatRF16GF16BF16XF16,
|
||||
};
|
||||
|
||||
typedef struct NvKmsSurfaceMemoryFormatInfo {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -149,6 +149,7 @@ struct NvKmsKapiDeviceResourcesInfo {
|
||||
} caps;
|
||||
|
||||
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
|
||||
NvBool supportsHDR[NVKMS_KAPI_LAYER_MAX];
|
||||
};
|
||||
|
||||
#define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
|
||||
@@ -218,6 +219,11 @@ struct NvKmsKapiLayerConfig {
|
||||
struct NvKmsRRParams rrParams;
|
||||
struct NvKmsKapiSyncpt syncptParams;
|
||||
|
||||
struct NvKmsHDRStaticMetadata hdrMetadata;
|
||||
NvBool hdrMetadataSpecified;
|
||||
|
||||
enum NvKmsOutputTf tf;
|
||||
|
||||
NvU8 minPresentInterval;
|
||||
NvBool tearing;
|
||||
|
||||
@@ -226,6 +232,8 @@ struct NvKmsKapiLayerConfig {
|
||||
|
||||
NvS16 dstX, dstY;
|
||||
NvU16 dstWidth, dstHeight;
|
||||
|
||||
enum NvKmsInputColorSpace inputColorSpace;
|
||||
};
|
||||
|
||||
struct NvKmsKapiLayerRequestedConfig {
|
||||
@@ -277,6 +285,8 @@ struct NvKmsKapiHeadModeSetConfig {
|
||||
NvKmsKapiDisplay displays[NVKMS_KAPI_MAX_CLONE_DISPLAYS];
|
||||
|
||||
struct NvKmsKapiDisplayMode mode;
|
||||
|
||||
NvBool vrrEnabled;
|
||||
};
|
||||
|
||||
struct NvKmsKapiHeadRequestedConfig {
|
||||
@@ -368,6 +378,9 @@ struct NvKmsKapiDynamicDisplayParams {
|
||||
/* [OUT] Connection status */
|
||||
NvU32 connected;
|
||||
|
||||
/* [OUT] VRR status */
|
||||
NvBool vrrSupported;
|
||||
|
||||
/* [IN/OUT] EDID of connected monitor/ Input to override EDID */
|
||||
struct {
|
||||
NvU16 bufferSize;
|
||||
@@ -484,6 +497,47 @@ struct NvKmsKapiFunctionsTable {
|
||||
*/
|
||||
void (*releaseOwnership)(struct NvKmsKapiDevice *device);
|
||||
|
||||
/*!
|
||||
* Grant modeset permissions for a display to fd. Only one (dispIndex, head,
|
||||
* display) is currently supported.
|
||||
*
|
||||
* \param [in] fd fd from opening /dev/nvidia-modeset.
|
||||
*
|
||||
* \param [in] device A device returned by allocateDevice().
|
||||
*
|
||||
* \param [in] head head of display.
|
||||
*
|
||||
* \param [in] display The display to grant.
|
||||
*
|
||||
* \return NV_TRUE on success, NV_FALSE on failure.
|
||||
*/
|
||||
NvBool (*grantPermissions)
|
||||
(
|
||||
NvS32 fd,
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvU32 head,
|
||||
NvKmsKapiDisplay display
|
||||
);
|
||||
|
||||
/*!
|
||||
* Revoke permissions previously granted. Only one (dispIndex, head,
|
||||
* display) is currently supported.
|
||||
*
|
||||
* \param [in] device A device returned by allocateDevice().
|
||||
*
|
||||
* \param [in] head head of display.
|
||||
*
|
||||
* \param [in] display The display to revoke.
|
||||
*
|
||||
* \return NV_TRUE on success, NV_FALSE on failure.
|
||||
*/
|
||||
NvBool (*revokePermissions)
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvU32 head,
|
||||
NvKmsKapiDisplay display
|
||||
);
|
||||
|
||||
/*!
|
||||
* Registers for notification, via
|
||||
* NvKmsKapiAllocateDeviceParams::eventCallback, of the events specified
|
||||
|
||||
@@ -234,12 +234,14 @@ extern "C" {
|
||||
#define DRF_EXTENT(drf) (drf##_HIGH_FIELD)
|
||||
#define DRF_SHIFT(drf) ((drf##_LOW_FIELD) % 32U)
|
||||
#define DRF_SHIFT_RT(drf) ((drf##_HIGH_FIELD) % 32U)
|
||||
#define DRF_SIZE(drf) ((drf##_HIGH_FIELD)-(drf##_LOW_FIELD)+1U)
|
||||
#define DRF_MASK(drf) (0xFFFFFFFFU >> (31U - ((drf##_HIGH_FIELD) % 32U) + ((drf##_LOW_FIELD) % 32U)))
|
||||
#else
|
||||
#define DRF_BASE(drf) (NV_FALSE?drf) // much better
|
||||
#define DRF_EXTENT(drf) (NV_TRUE?drf) // much better
|
||||
#define DRF_SHIFT(drf) (((NvU32)DRF_BASE(drf)) % 32U)
|
||||
#define DRF_SHIFT_RT(drf) (((NvU32)DRF_EXTENT(drf)) % 32U)
|
||||
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
|
||||
#define DRF_MASK(drf) (0xFFFFFFFFU>>(31U - DRF_SHIFT_RT(drf) + DRF_SHIFT(drf)))
|
||||
#endif
|
||||
#define DRF_DEF(d,r,f,c) (((NvU32)(NV ## d ## r ## f ## c))<<DRF_SHIFT(NV ## d ## r ## f))
|
||||
@@ -249,12 +251,12 @@ extern "C" {
|
||||
#define DRF_EXTENT(drf) (1?drf) // much better
|
||||
#define DRF_SHIFT(drf) ((DRF_ISBIT(0,drf)) % 32)
|
||||
#define DRF_SHIFT_RT(drf) ((DRF_ISBIT(1,drf)) % 32)
|
||||
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
|
||||
#define DRF_MASK(drf) (0xFFFFFFFFU>>(31-((DRF_ISBIT(1,drf)) % 32)+((DRF_ISBIT(0,drf)) % 32)))
|
||||
#define DRF_DEF(d,r,f,c) ((NV ## d ## r ## f ## c)<<DRF_SHIFT(NV ## d ## r ## f))
|
||||
#define DRF_NUM(d,r,f,n) (((n)&DRF_MASK(NV ## d ## r ## f))<<DRF_SHIFT(NV ## d ## r ## f))
|
||||
#endif
|
||||
#define DRF_SHIFTMASK(drf) (DRF_MASK(drf)<<(DRF_SHIFT(drf)))
|
||||
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
|
||||
|
||||
#define DRF_VAL(d,r,f,v) (((v)>>DRF_SHIFT(NV ## d ## r ## f))&DRF_MASK(NV ## d ## r ## f))
|
||||
#endif
|
||||
@@ -907,6 +909,16 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
|
||||
return uAddr.p;
|
||||
}
|
||||
|
||||
// Get bit at pos (k) from x
|
||||
#define NV_BIT_GET(k, x) (((x) >> (k)) & 1)
|
||||
// Get bit at pos (n) from (hi) if >= 64, otherwise from (lo). This is paired with NV_BIT_SET_128 which sets the bit.
|
||||
#define NV_BIT_GET_128(n, lo, hi) (((n) < 64) ? NV_BIT_GET((n), (lo)) : NV_BIT_GET((n) - 64, (hi)))
|
||||
//
|
||||
// Set the bit at pos (b) for U64 which is < 128. Since the (b) can be >= 64, we need 2 U64 to store this.
|
||||
// Use (lo) if (b) is less than 64, and (hi) if >= 64.
|
||||
//
|
||||
#define NV_BIT_SET_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) |= NVBIT64(b); else (hi) |= NVBIT64( b & 0x3F ); }
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif //__cplusplus
|
||||
|
||||
@@ -24,11 +24,6 @@
|
||||
#ifndef SDK_NVSTATUS_H
|
||||
#define SDK_NVSTATUS_H
|
||||
|
||||
/* XAPIGEN - this file is not suitable for (nor needed by) xapigen. */
|
||||
/* Rather than #ifdef out every such include in every sdk */
|
||||
/* file, punt here. */
|
||||
#if !defined(XAPIGEN) /* rest of file */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@@ -125,6 +120,4 @@ const char *nvstatusToString(NV_STATUS nvStatusIn);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // XAPIGEN
|
||||
|
||||
#endif /* SDK_NVSTATUS_H */
|
||||
|
||||
@@ -24,11 +24,6 @@
|
||||
#ifndef SDK_NVSTATUSCODES_H
|
||||
#define SDK_NVSTATUSCODES_H
|
||||
|
||||
/* XAPIGEN - this file is not suitable for (nor needed by) xapigen. */
|
||||
/* Rather than #ifdef out every such include in every sdk */
|
||||
/* file, punt here. */
|
||||
#if !defined(XAPIGEN) /* rest of file */
|
||||
|
||||
NV_STATUS_CODE(NV_OK, 0x00000000, "Success")
|
||||
NV_STATUS_CODE(NV_ERR_GENERIC, 0x0000FFFF, "Failure: Generic Error")
|
||||
|
||||
@@ -153,6 +148,7 @@ NV_STATUS_CODE(NV_ERR_NVLINK_CLOCK_ERROR, 0x00000076, "Nvlink Clock
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_TRAINING_ERROR, 0x00000077, "Nvlink Training Error")
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_CONFIGURATION_ERROR, 0x00000078, "Nvlink Configuration Error")
|
||||
NV_STATUS_CODE(NV_ERR_RISCV_ERROR, 0x00000079, "Generic RISC-V assert or halt")
|
||||
NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT, 0x0000007A, "Fabric Manager is not loaded")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
@@ -164,6 +160,4 @@ NV_STATUS_CODE(NV_WARN_NOTHING_TO_DO, 0x00010006, "WARNING Noth
|
||||
NV_STATUS_CODE(NV_WARN_NULL_OBJECT, 0x00010007, "WARNING NULL object found")
|
||||
NV_STATUS_CODE(NV_WARN_OUT_OF_RANGE, 0x00010008, "WARNING value out of range")
|
||||
|
||||
#endif // XAPIGEN
|
||||
|
||||
#endif /* SDK_NVSTATUSCODES_H */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -143,6 +143,14 @@ void NV_API_CALL os_free_semaphore (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_semaphore (void *);
|
||||
NV_STATUS NV_API_CALL os_cond_acquire_semaphore (void *);
|
||||
NV_STATUS NV_API_CALL os_release_semaphore (void *);
|
||||
void* NV_API_CALL os_alloc_rwlock (void);
|
||||
void NV_API_CALL os_free_rwlock (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_rwlock_read (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_rwlock_write (void *);
|
||||
NV_STATUS NV_API_CALL os_cond_acquire_rwlock_read (void *);
|
||||
NV_STATUS NV_API_CALL os_cond_acquire_rwlock_write(void *);
|
||||
void NV_API_CALL os_release_rwlock_read (void *);
|
||||
void NV_API_CALL os_release_rwlock_write (void *);
|
||||
NvBool NV_API_CALL os_semaphore_may_sleep (void);
|
||||
NV_STATUS NV_API_CALL os_get_version_info (os_version_info*);
|
||||
NvBool NV_API_CALL os_is_isr (void);
|
||||
@@ -173,7 +181,6 @@ NV_STATUS NV_API_CALL os_put_page (NvU64 address);
|
||||
NvU32 NV_API_CALL os_get_page_refcount (NvU64 address);
|
||||
NvU32 NV_API_CALL os_count_tail_pages (NvU64 address);
|
||||
void NV_API_CALL os_free_pages_phys (NvU64, NvU32);
|
||||
NV_STATUS NV_API_CALL os_call_nv_vmbus (NvU32, void *);
|
||||
NV_STATUS NV_API_CALL os_open_temporary_file (void **);
|
||||
void NV_API_CALL os_close_file (void *);
|
||||
NV_STATUS NV_API_CALL os_write_file (void *, NvU8 *, NvU64, NvU64);
|
||||
|
||||
@@ -74,6 +74,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_own_page_fault_intr(nvidia_stack_t *, nvgpuDevi
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, void *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -27,6 +27,7 @@
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-connector.h"
|
||||
#include "nvidia-drm-crtc.h"
|
||||
#include "nvidia-drm-utils.h"
|
||||
#include "nvidia-drm-encoder.h"
|
||||
|
||||
@@ -42,6 +43,7 @@
|
||||
|
||||
#include <drm/drm_atomic.h>
|
||||
#include <drm/drm_atomic_helper.h>
|
||||
#include <drm/drm_edid.h>
|
||||
|
||||
static void nv_drm_connector_destroy(struct drm_connector *connector)
|
||||
{
|
||||
@@ -98,7 +100,11 @@ __nv_drm_detect_encoder(struct NvKmsKapiDynamicDisplayParams *pDetectParams,
|
||||
break;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_CONNECTOR_HAS_OVERRIDE_EDID)
|
||||
if (connector->override_edid) {
|
||||
#else
|
||||
if (drm_edid_override_connector_update(connector) > 0) {
|
||||
#endif
|
||||
const struct drm_property_blob *edid = connector->edid_blob_ptr;
|
||||
|
||||
if (edid->length <= sizeof(pDetectParams->edid.buffer)) {
|
||||
@@ -118,6 +124,11 @@ __nv_drm_detect_encoder(struct NvKmsKapiDynamicDisplayParams *pDetectParams,
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_CONNECTOR_HAS_VRR_CAPABLE_PROPERTY)
|
||||
drm_connector_attach_vrr_capable_property(&nv_connector->base);
|
||||
drm_connector_set_vrr_capable_property(&nv_connector->base, pDetectParams->vrrSupported ? true : false);
|
||||
#endif
|
||||
|
||||
if (pDetectParams->connected) {
|
||||
if (!pDetectParams->overrideEdid && pDetectParams->edid.bufferSize) {
|
||||
|
||||
@@ -197,6 +208,11 @@ done:
|
||||
|
||||
nv_drm_free(pDetectParams);
|
||||
|
||||
if (status == connector_status_disconnected &&
|
||||
nv_connector->modeset_permission_filep) {
|
||||
nv_drm_connector_revoke_permissions(dev, nv_connector);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -362,6 +378,8 @@ nv_drm_connector_new(struct drm_device *dev,
|
||||
nv_connector->physicalIndex = physicalIndex;
|
||||
nv_connector->type = type;
|
||||
nv_connector->internal = internal;
|
||||
nv_connector->modeset_permission_filep = NULL;
|
||||
nv_connector->modeset_permission_crtc = NULL;
|
||||
|
||||
strcpy(nv_connector->dpAddress, dpAddress);
|
||||
|
||||
@@ -464,4 +482,26 @@ done:
|
||||
return connector;
|
||||
}
|
||||
|
||||
/*
|
||||
* Revoke the permissions on this connector.
|
||||
*/
|
||||
bool nv_drm_connector_revoke_permissions(struct drm_device *dev,
|
||||
struct nv_drm_connector* nv_connector)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
bool ret = true;
|
||||
|
||||
if (nv_connector->modeset_permission_crtc) {
|
||||
if (nv_connector->nv_detected_encoder) {
|
||||
ret = nvKms->revokePermissions(
|
||||
nv_dev->pDevice, nv_connector->modeset_permission_crtc->head,
|
||||
nv_connector->nv_detected_encoder->hDisplay);
|
||||
}
|
||||
nv_connector->modeset_permission_crtc->modeset_permission_filep = NULL;
|
||||
nv_connector->modeset_permission_crtc = NULL;
|
||||
}
|
||||
nv_connector->modeset_permission_filep = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -51,6 +51,20 @@ struct nv_drm_connector {
|
||||
|
||||
atomic_t connection_status_dirty;
|
||||
|
||||
/**
|
||||
* @modeset_permission_filep:
|
||||
*
|
||||
* The filep using this connector with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
|
||||
*/
|
||||
struct drm_file *modeset_permission_filep;
|
||||
|
||||
/**
|
||||
* @modeset_permission_crtc:
|
||||
*
|
||||
* The crtc using this connector with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
|
||||
*/
|
||||
struct nv_drm_crtc *modeset_permission_crtc;
|
||||
|
||||
struct drm_connector base;
|
||||
};
|
||||
|
||||
@@ -84,6 +98,9 @@ nv_drm_get_connector(struct drm_device *dev,
|
||||
NvBool internal,
|
||||
char dpAddress[NVKMS_DP_ADDRESS_STRING_LENGTH]);
|
||||
|
||||
bool nv_drm_connector_revoke_permissions(struct drm_device *dev,
|
||||
struct nv_drm_connector *nv_connector);
|
||||
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
|
||||
#endif /* __NVIDIA_DRM_CONNECTOR_H__ */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -46,6 +46,35 @@
|
||||
#include <linux/nvhost.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
static int
|
||||
nv_drm_atomic_replace_property_blob_from_id(struct drm_device *dev,
|
||||
struct drm_property_blob **blob,
|
||||
uint64_t blob_id,
|
||||
ssize_t expected_size)
|
||||
{
|
||||
struct drm_property_blob *new_blob = NULL;
|
||||
|
||||
if (blob_id != 0) {
|
||||
new_blob = drm_property_lookup_blob(dev, blob_id);
|
||||
if (new_blob == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((expected_size > 0) &&
|
||||
(new_blob->length != expected_size)) {
|
||||
drm_property_blob_put(new_blob);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
drm_property_replace_blob(blob, new_blob);
|
||||
drm_property_blob_put(new_blob);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void nv_drm_plane_destroy(struct drm_plane *plane)
|
||||
{
|
||||
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
|
||||
@@ -84,9 +113,6 @@ cursor_plane_req_config_update(struct drm_plane *plane,
|
||||
{
|
||||
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
|
||||
struct NvKmsKapiCursorRequestedConfig old_config = *req_config;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(plane_state);
|
||||
|
||||
if (plane_state->fb == NULL) {
|
||||
cursor_req_config_disable(req_config);
|
||||
@@ -186,7 +212,6 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(plane_state);
|
||||
int ret = 0;
|
||||
|
||||
if (plane_state->fb == NULL) {
|
||||
plane_req_config_disable(req_config);
|
||||
@@ -309,6 +334,9 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
nv_plane->defaultCompositionMode;
|
||||
#endif
|
||||
|
||||
req_config->config.inputColorSpace =
|
||||
nv_drm_plane_state->input_colorspace;
|
||||
|
||||
req_config->config.syncptParams.preSyncptSpecified = false;
|
||||
req_config->config.syncptParams.postSyncptRequested = false;
|
||||
|
||||
@@ -320,10 +348,10 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
#if defined(NV_NVHOST_DMA_FENCE_UNPACK_PRESENT)
|
||||
if (plane_state->fence != NULL) {
|
||||
ret = nvhost_dma_fence_unpack(
|
||||
plane_state->fence,
|
||||
&req_config->config.syncptParams.preSyncptId,
|
||||
&req_config->config.syncptParams.preSyncptValue);
|
||||
int ret = nvhost_dma_fence_unpack(
|
||||
plane_state->fence,
|
||||
&req_config->config.syncptParams.preSyncptId,
|
||||
&req_config->config.syncptParams.preSyncptValue);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
@@ -339,6 +367,60 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
if (nv_drm_plane_state->hdr_output_metadata != NULL) {
|
||||
struct hdr_output_metadata *hdr_metadata =
|
||||
nv_drm_plane_state->hdr_output_metadata->data;
|
||||
struct hdr_metadata_infoframe *info_frame =
|
||||
&hdr_metadata->hdmi_metadata_type1;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
uint32_t i;
|
||||
|
||||
if (hdr_metadata->metadata_type != HDMI_STATIC_METADATA_TYPE1) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Unsupported Metadata Type");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(info_frame->display_primaries); i ++) {
|
||||
req_config->config.hdrMetadata.displayPrimaries[i].x =
|
||||
info_frame->display_primaries[i].x;
|
||||
req_config->config.hdrMetadata.displayPrimaries[i].y =
|
||||
info_frame->display_primaries[i].y;
|
||||
}
|
||||
|
||||
req_config->config.hdrMetadata.whitePoint.x =
|
||||
info_frame->white_point.x;
|
||||
req_config->config.hdrMetadata.whitePoint.y =
|
||||
info_frame->white_point.y;
|
||||
req_config->config.hdrMetadata.maxDisplayMasteringLuminance =
|
||||
info_frame->max_display_mastering_luminance;
|
||||
req_config->config.hdrMetadata.minDisplayMasteringLuminance =
|
||||
info_frame->min_display_mastering_luminance;
|
||||
req_config->config.hdrMetadata.maxCLL =
|
||||
info_frame->max_cll;
|
||||
req_config->config.hdrMetadata.maxFALL =
|
||||
info_frame->max_fall;
|
||||
|
||||
req_config->config.hdrMetadataSpecified = true;
|
||||
|
||||
switch (info_frame->eotf) {
|
||||
case HDMI_EOTF_SMPTE_ST2084:
|
||||
req_config->config.tf = NVKMS_OUTPUT_TF_PQ;
|
||||
break;
|
||||
case HDMI_EOTF_TRADITIONAL_GAMMA_SDR:
|
||||
req_config->config.tf =
|
||||
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR;
|
||||
break;
|
||||
default:
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Unsupported EOTF");
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
req_config->config.hdrMetadataSpecified = false;
|
||||
req_config->config.tf = NVKMS_OUTPUT_TF_NONE;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Unconditionally mark the surface as changed, even if nothing changed,
|
||||
* so that we always get a flip event: a DRM client may flip with
|
||||
@@ -509,9 +591,21 @@ static int nv_drm_plane_atomic_set_property(
|
||||
nv_drm_plane_state->fd_user_ptr = u64_to_user_ptr(val);
|
||||
#endif
|
||||
return 0;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
} else if (property == nv_dev->nv_input_colorspace_property) {
|
||||
nv_drm_plane_state->input_colorspace = val;
|
||||
return 0;
|
||||
}
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
else if (property == nv_dev->nv_hdr_output_metadata_property) {
|
||||
return nv_drm_atomic_replace_property_blob_from_id(
|
||||
nv_dev->dev,
|
||||
&nv_drm_plane_state->hdr_output_metadata,
|
||||
val,
|
||||
sizeof(struct hdr_output_metadata));
|
||||
}
|
||||
#endif
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int nv_drm_plane_atomic_get_property(
|
||||
@@ -521,12 +615,26 @@ static int nv_drm_plane_atomic_get_property(
|
||||
uint64_t *val)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
const struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state_const(state);
|
||||
|
||||
if (property == nv_dev->nv_out_fence_property) {
|
||||
return 0;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
} else if (property == nv_dev->nv_input_colorspace_property) {
|
||||
*val = nv_drm_plane_state->input_colorspace;
|
||||
return 0;
|
||||
}
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
else if (property == nv_dev->nv_hdr_output_metadata_property) {
|
||||
const struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state_const(state);
|
||||
*val = nv_drm_plane_state->hdr_output_metadata ?
|
||||
nv_drm_plane_state->hdr_output_metadata->base.id : 0;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static struct drm_plane_state *
|
||||
@@ -544,6 +652,14 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
|
||||
__drm_atomic_helper_plane_duplicate_state(plane, &nv_plane_state->base);
|
||||
|
||||
nv_plane_state->fd_user_ptr = nv_old_plane_state->fd_user_ptr;
|
||||
nv_plane_state->input_colorspace = nv_old_plane_state->input_colorspace;
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
nv_plane_state->hdr_output_metadata = nv_old_plane_state->hdr_output_metadata;
|
||||
if (nv_plane_state->hdr_output_metadata) {
|
||||
drm_property_blob_get(nv_plane_state->hdr_output_metadata);
|
||||
}
|
||||
#endif
|
||||
|
||||
return &nv_plane_state->base;
|
||||
}
|
||||
@@ -557,6 +673,12 @@ static inline void __nv_drm_plane_atomic_destroy_state(
|
||||
#else
|
||||
__drm_atomic_helper_plane_destroy_state(state);
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(state);
|
||||
drm_property_blob_put(nv_drm_plane_state->hdr_output_metadata);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void nv_drm_plane_atomic_destroy_state(
|
||||
@@ -803,7 +925,8 @@ static const struct drm_crtc_helper_funcs nv_crtc_helper_funcs = {
|
||||
};
|
||||
|
||||
static void nv_drm_plane_install_properties(
|
||||
struct drm_plane *plane)
|
||||
struct drm_plane *plane,
|
||||
NvBool supportsHDR)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
|
||||
@@ -811,6 +934,19 @@ static void nv_drm_plane_install_properties(
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_out_fence_property, 0);
|
||||
}
|
||||
|
||||
if (nv_dev->nv_input_colorspace_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_input_colorspace_property,
|
||||
NVKMS_INPUT_COLORSPACE_NONE);
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
if (supportsHDR && nv_dev->nv_hdr_output_metadata_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_hdr_output_metadata_property, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -990,7 +1126,9 @@ nv_drm_plane_create(struct drm_device *dev,
|
||||
drm_plane_helper_add(plane, &nv_plane_helper_funcs);
|
||||
|
||||
if (plane_type != DRM_PLANE_TYPE_CURSOR) {
|
||||
nv_drm_plane_install_properties(plane);
|
||||
nv_drm_plane_install_properties(
|
||||
plane,
|
||||
pResInfo->supportsHDR[layer_idx]);
|
||||
}
|
||||
|
||||
__nv_drm_plane_create_alpha_blending_properties(
|
||||
@@ -1043,6 +1181,7 @@ static struct drm_crtc *__nv_drm_crtc_create(struct nv_drm_device *nv_dev,
|
||||
nv_crtc->head = head;
|
||||
INIT_LIST_HEAD(&nv_crtc->flip_list);
|
||||
spin_lock_init(&nv_crtc->flip_list_lock);
|
||||
nv_crtc->modeset_permission_filep = NULL;
|
||||
|
||||
ret = drm_crtc_init_with_planes(nv_dev->dev,
|
||||
&nv_crtc->base,
|
||||
@@ -1141,11 +1280,13 @@ void nv_drm_enumerate_crtcs_and_planes(
|
||||
}
|
||||
|
||||
for (layer = 0; layer < pResInfo->numLayers[i]; layer++) {
|
||||
struct drm_plane *overlay_plane = NULL;
|
||||
|
||||
if (layer == NVKMS_KAPI_LAYER_PRIMARY_IDX) {
|
||||
continue;
|
||||
}
|
||||
|
||||
struct drm_plane *overlay_plane =
|
||||
overlay_plane =
|
||||
nv_drm_plane_create(nv_dev->dev,
|
||||
DRM_PLANE_TYPE_OVERLAY,
|
||||
layer,
|
||||
@@ -1189,7 +1330,7 @@ int nv_drm_get_crtc_crc32_v2_ioctl(struct drm_device *dev,
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
crtc = nv_drm_crtc_find(dev, params->crtc_id);
|
||||
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
|
||||
if (!crtc) {
|
||||
return -ENOENT;
|
||||
}
|
||||
@@ -1217,7 +1358,7 @@ int nv_drm_get_crtc_crc32_ioctl(struct drm_device *dev,
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
crtc = nv_drm_crtc_find(dev, params->crtc_id);
|
||||
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
|
||||
if (!crtc) {
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -35,38 +35,9 @@
|
||||
|
||||
#include <drm/drm_crtc.h>
|
||||
|
||||
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE) || defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_ROTATE_* , DRM_REFLECT_* */
|
||||
#include <drm/drm_blend.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* */
|
||||
#include <uapi/drm/drm_mode.h>
|
||||
#endif
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvkms-kapi.h"
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/*
|
||||
* 19-05-2017 c2c446ad29437bb92b157423c632286608ebd3ec has added
|
||||
* DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* to UAPI and removed
|
||||
* DRM_ROTATE_* and DRM_MODE_REFLECT_*
|
||||
*/
|
||||
#if !defined(DRM_MODE_ROTATE_0)
|
||||
#define DRM_MODE_ROTATE_0 DRM_ROTATE_0
|
||||
#define DRM_MODE_ROTATE_90 DRM_ROTATE_90
|
||||
#define DRM_MODE_ROTATE_180 DRM_ROTATE_180
|
||||
#define DRM_MODE_ROTATE_270 DRM_ROTATE_270
|
||||
#define DRM_MODE_REFLECT_X DRM_REFLECT_X
|
||||
#define DRM_MODE_REFLECT_Y DRM_REFLECT_Y
|
||||
#define DRM_MODE_ROTATE_MASK DRM_ROTATE_MASK
|
||||
#define DRM_MODE_REFLECT_MASK DRM_REFLECT_MASK
|
||||
#endif
|
||||
|
||||
#endif //NV_DRM_ROTATION_AVAILABLE
|
||||
|
||||
struct nv_drm_crtc {
|
||||
NvU32 head;
|
||||
|
||||
@@ -85,6 +56,13 @@ struct nv_drm_crtc {
|
||||
*/
|
||||
spinlock_t flip_list_lock;
|
||||
|
||||
/**
|
||||
* @modeset_permission_filep:
|
||||
*
|
||||
* The filep using this crtc with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
|
||||
*/
|
||||
struct drm_file *modeset_permission_filep;
|
||||
|
||||
struct drm_crtc base;
|
||||
};
|
||||
|
||||
@@ -205,6 +183,10 @@ static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
|
||||
struct nv_drm_plane_state {
|
||||
struct drm_plane_state base;
|
||||
s32 __user *fd_user_ptr;
|
||||
enum NvKmsInputColorSpace input_colorspace;
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct drm_property_blob *hdr_output_metadata;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_state *state)
|
||||
@@ -212,6 +194,11 @@ static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_
|
||||
return container_of(state, struct nv_drm_plane_state, base);
|
||||
}
|
||||
|
||||
static inline const struct nv_drm_plane_state *to_nv_drm_plane_state_const(const struct drm_plane_state *state)
|
||||
{
|
||||
return container_of(state, const struct nv_drm_plane_state, base);
|
||||
}
|
||||
|
||||
static inline struct nv_drm_crtc *to_nv_crtc(struct drm_crtc *crtc)
|
||||
{
|
||||
if (crtc == NULL) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -30,7 +30,7 @@
|
||||
#include "nvidia-drm-connector.h"
|
||||
#include "nvidia-drm-gem.h"
|
||||
#include "nvidia-drm-crtc.h"
|
||||
#include "nvidia-drm-prime-fence.h"
|
||||
#include "nvidia-drm-fence.h"
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-gem-nvkms-memory.h"
|
||||
#include "nvidia-drm-gem-user-memory.h"
|
||||
@@ -86,6 +86,23 @@
|
||||
|
||||
static struct nv_drm_device *dev_list = NULL;
|
||||
|
||||
static const char* nv_get_input_colorspace_name(
|
||||
enum NvKmsInputColorSpace colorSpace)
|
||||
{
|
||||
switch (colorSpace) {
|
||||
case NVKMS_INPUT_COLORSPACE_NONE:
|
||||
return "None";
|
||||
case NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR:
|
||||
return "IEC 61966-2-2 linear FP";
|
||||
case NVKMS_INPUT_COLORSPACE_BT2100_PQ:
|
||||
return "ITU-R BT.2100-PQ YCbCr";
|
||||
default:
|
||||
/* We shoudn't hit this */
|
||||
WARN_ON("Unsupported input colorspace");
|
||||
return "None";
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
|
||||
static void nv_drm_output_poll_changed(struct drm_device *dev)
|
||||
@@ -240,10 +257,6 @@ nv_drm_init_mode_config(struct nv_drm_device *nv_dev,
|
||||
dev->mode_config.preferred_depth = 24;
|
||||
dev->mode_config.prefer_shadow = 1;
|
||||
|
||||
/* Currently unused. Update when needed. */
|
||||
|
||||
dev->mode_config.fb_base = 0;
|
||||
|
||||
#if defined(NV_DRM_CRTC_STATE_HAS_ASYNC_FLIP) || \
|
||||
defined(NV_DRM_CRTC_STATE_HAS_PAGEFLIP_FLAGS)
|
||||
dev->mode_config.async_page_flip = true;
|
||||
@@ -332,6 +345,15 @@ static void nv_drm_enumerate_encoders_and_connectors
|
||||
*/
|
||||
static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
{
|
||||
struct drm_prop_enum_list enum_list[3] = { };
|
||||
int i, len = 0;
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
enum_list[len].type = i;
|
||||
enum_list[len].name = nv_get_input_colorspace_name(i);
|
||||
len++;
|
||||
}
|
||||
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
if (!nv_dev->supportsSyncpts) {
|
||||
return 0;
|
||||
@@ -345,6 +367,23 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
}
|
||||
#endif
|
||||
|
||||
nv_dev->nv_input_colorspace_property =
|
||||
drm_property_create_enum(nv_dev->dev, 0, "NV_INPUT_COLORSPACE",
|
||||
enum_list, len);
|
||||
if (nv_dev->nv_input_colorspace_property == NULL) {
|
||||
NV_DRM_LOG_ERR("Failed to create NV_INPUT_COLORSPACE property");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
nv_dev->nv_hdr_output_metadata_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_HDR_STATIC_METADATA", 0);
|
||||
if (nv_dev->nv_hdr_output_metadata_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -667,6 +706,16 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_drm_dmabuf_supported_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
/* check the pDevice since this only gets set if modeset = 1
|
||||
* which is a requirement for the dma_buf extension to work
|
||||
*/
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
return nv_dev->pDevice ? 0 : -EINVAL;
|
||||
}
|
||||
|
||||
static
|
||||
int nv_drm_get_client_capability_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
@@ -696,6 +745,455 @@ int nv_drm_get_client_capability_ioctl(struct drm_device *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
static bool nv_drm_connector_is_dpy_id(struct drm_connector *connector,
|
||||
NvU32 dpyId)
|
||||
{
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
return nv_connector->nv_detected_encoder &&
|
||||
nv_connector->nv_detected_encoder->hDisplay == dpyId;
|
||||
}
|
||||
|
||||
static int nv_drm_get_dpy_id_for_connector_id_ioctl(struct drm_device *dev,
|
||||
void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_get_dpy_id_for_connector_id_params *params = data;
|
||||
// Importantly, drm_connector_lookup (with filep) will only return the
|
||||
// connector if we are master, a lessee with the connector, or not master at
|
||||
// all. It will return NULL if we are a lessee with other connectors.
|
||||
struct drm_connector *connector =
|
||||
nv_drm_connector_lookup(dev, filep, params->connectorId);
|
||||
struct nv_drm_connector *nv_connector;
|
||||
int ret = 0;
|
||||
|
||||
if (!connector) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nv_connector = to_nv_connector(connector);
|
||||
if (!nv_connector) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!nv_connector->nv_detected_encoder) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
params->dpyId = nv_connector->nv_detected_encoder->hDisplay;
|
||||
|
||||
done:
|
||||
nv_drm_connector_put(connector);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nv_drm_get_connector_id_for_dpy_id_ioctl(struct drm_device *dev,
|
||||
void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_get_connector_id_for_dpy_id_params *params = data;
|
||||
struct drm_connector *connector;
|
||||
int ret = -EINVAL;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
|
||||
/* Lookup for existing connector with same dpyId */
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
if (nv_drm_connector_is_dpy_id(connector, params->dpyId)) {
|
||||
params->connectorId = connector->base.id;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static NvU32 nv_drm_get_head_bit_from_connector(struct drm_connector *connector)
|
||||
{
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
|
||||
if (connector->state && connector->state->crtc) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(connector->state->crtc);
|
||||
return NVBIT(nv_crtc->head);
|
||||
} else if (nv_connector->nv_detected_encoder &&
|
||||
nv_connector->nv_detected_encoder->base.crtc) {
|
||||
struct nv_drm_crtc *nv_crtc =
|
||||
to_nv_crtc(nv_connector->nv_detected_encoder->base.crtc);
|
||||
return NVBIT(nv_crtc->head);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_drm_grant_permission_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_grant_permissions_params *params = data;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct nv_drm_connector *target_nv_connector = NULL;
|
||||
struct nv_drm_crtc *target_nv_crtc = NULL;
|
||||
struct drm_connector *connector, *target_connector = NULL;
|
||||
struct drm_crtc *crtc;
|
||||
NvU32 head = 0, freeHeadBits, targetHeadBit, possible_crtcs;
|
||||
int ret = 0;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
#endif
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
struct drm_modeset_acquire_ctx ctx;
|
||||
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
|
||||
ret);
|
||||
#else
|
||||
mutex_lock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
/* Get the connector for the dpyId. */
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
if (nv_drm_connector_is_dpy_id(connector, params->dpyId)) {
|
||||
target_connector =
|
||||
nv_drm_connector_lookup(dev, filep, connector->base.id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
// Importantly, drm_connector_lookup/drm_crtc_find (with filep) will only
|
||||
// return the object if we are master, a lessee with the object, or not
|
||||
// master at all. It will return NULL if we are a lessee with other objects.
|
||||
if (!target_connector) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
target_nv_connector = to_nv_connector(target_connector);
|
||||
possible_crtcs =
|
||||
target_nv_connector->nv_detected_encoder->base.possible_crtcs;
|
||||
|
||||
/* Target connector must not be previously granted. */
|
||||
if (target_nv_connector->modeset_permission_filep) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Add all heads that are owned and not already granted. */
|
||||
freeHeadBits = 0;
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (nv_drm_crtc_find(dev, filep, crtc->base.id) &&
|
||||
!nv_crtc->modeset_permission_filep &&
|
||||
(drm_crtc_mask(crtc) & possible_crtcs)) {
|
||||
freeHeadBits |= NVBIT(nv_crtc->head);
|
||||
}
|
||||
}
|
||||
|
||||
targetHeadBit = nv_drm_get_head_bit_from_connector(target_connector);
|
||||
if (targetHeadBit & freeHeadBits) {
|
||||
/* If a crtc is already being used by this connector, use it. */
|
||||
freeHeadBits = targetHeadBit;
|
||||
} else {
|
||||
/* Otherwise, remove heads that are in use by other connectors. */
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
freeHeadBits &= ~nv_drm_get_head_bit_from_connector(connector);
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Fail if no heads are available. */
|
||||
if (!freeHeadBits) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop through the crtc again and find a matching head.
|
||||
* Record the filep that is using the crtc and the connector.
|
||||
*/
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (freeHeadBits & NVBIT(nv_crtc->head)) {
|
||||
target_nv_crtc = nv_crtc;
|
||||
head = nv_crtc->head;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!nvKms->grantPermissions(params->fd, nv_dev->pDevice, head,
|
||||
params->dpyId)) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
target_nv_connector->modeset_permission_crtc = target_nv_crtc;
|
||||
target_nv_connector->modeset_permission_filep = filep;
|
||||
target_nv_crtc->modeset_permission_filep = filep;
|
||||
|
||||
done:
|
||||
if (target_connector) {
|
||||
nv_drm_connector_put(target_connector);
|
||||
}
|
||||
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
|
||||
#else
|
||||
mutex_unlock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool nv_drm_revoke_connector(struct nv_drm_device *nv_dev,
|
||||
struct nv_drm_connector *nv_connector)
|
||||
{
|
||||
bool ret = true;
|
||||
if (nv_connector->modeset_permission_crtc) {
|
||||
if (nv_connector->nv_detected_encoder) {
|
||||
ret = nvKms->revokePermissions(
|
||||
nv_dev->pDevice, nv_connector->modeset_permission_crtc->head,
|
||||
nv_connector->nv_detected_encoder->hDisplay);
|
||||
}
|
||||
nv_connector->modeset_permission_crtc->modeset_permission_filep = NULL;
|
||||
nv_connector->modeset_permission_crtc = NULL;
|
||||
}
|
||||
nv_connector->modeset_permission_filep = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nv_drm_revoke_permission(struct drm_device *dev,
|
||||
struct drm_file *filep, NvU32 dpyId)
|
||||
{
|
||||
struct drm_connector *connector;
|
||||
struct drm_crtc *crtc;
|
||||
int ret = 0;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
#endif
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
struct drm_modeset_acquire_ctx ctx;
|
||||
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
|
||||
ret);
|
||||
#else
|
||||
mutex_lock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If dpyId is set, only revoke those specific resources. Otherwise,
|
||||
* it is from closing the file so revoke all resources for that filep.
|
||||
*/
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
if (nv_connector->modeset_permission_filep == filep &&
|
||||
(!dpyId || nv_drm_connector_is_dpy_id(connector, dpyId))) {
|
||||
if (!nv_drm_connector_revoke_permissions(dev, nv_connector)) {
|
||||
ret = -EINVAL;
|
||||
// Continue trying to revoke as much as possible.
|
||||
}
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (nv_crtc->modeset_permission_filep == filep && !dpyId) {
|
||||
nv_crtc->modeset_permission_filep = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
|
||||
#else
|
||||
mutex_unlock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nv_drm_revoke_permission_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_revoke_permissions_params *params = data;
|
||||
if (!params->dpyId) {
|
||||
return -EINVAL;
|
||||
}
|
||||
return nv_drm_revoke_permission(dev, filep, params->dpyId);
|
||||
}
|
||||
|
||||
static void nv_drm_postclose(struct drm_device *dev, struct drm_file *filep)
|
||||
{
|
||||
/*
|
||||
* Some systems like android can reach here without initializing the
|
||||
* device, so check for that.
|
||||
*/
|
||||
if (dev->mode_config.num_crtc > 0 &&
|
||||
dev->mode_config.crtc_list.next != NULL &&
|
||||
dev->mode_config.crtc_list.prev != NULL &&
|
||||
dev->mode_config.num_connector > 0 &&
|
||||
dev->mode_config.connector_list.next != NULL &&
|
||||
dev->mode_config.connector_list.prev != NULL) {
|
||||
nv_drm_revoke_permission(dev, filep, 0);
|
||||
}
|
||||
}
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
|
||||
#if defined(NV_DRM_MASTER_HAS_LEASES)
|
||||
static struct drm_master *nv_drm_find_lessee(struct drm_master *master,
|
||||
int lessee_id)
|
||||
{
|
||||
int object;
|
||||
void *entry;
|
||||
|
||||
while (master->lessor != NULL) {
|
||||
master = master->lessor;
|
||||
}
|
||||
|
||||
idr_for_each_entry(&master->lessee_idr, entry, object)
|
||||
{
|
||||
if (object == lessee_id) {
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void nv_drm_get_revoked_objects(struct drm_device *dev,
|
||||
struct drm_file *filep, unsigned int cmd,
|
||||
unsigned long arg, int **objects,
|
||||
int *objects_count)
|
||||
{
|
||||
unsigned int ioc_size;
|
||||
struct drm_mode_revoke_lease revoke_lease;
|
||||
struct drm_master *lessor, *lessee;
|
||||
void *entry;
|
||||
int *objs;
|
||||
int obj, obj_count, obj_i;
|
||||
|
||||
ioc_size = _IOC_SIZE(cmd);
|
||||
if (ioc_size > sizeof(revoke_lease)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (copy_from_user(&revoke_lease, (void __user *)arg, ioc_size) != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
lessor = nv_drm_file_get_master(filep);
|
||||
if (lessor == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&dev->mode_config.idr_mutex);
|
||||
lessee = nv_drm_find_lessee(lessor, revoke_lease.lessee_id);
|
||||
|
||||
if (lessee == NULL) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
obj_count = 0;
|
||||
idr_for_each_entry(&lessee->leases, entry, obj) {
|
||||
++obj_count;
|
||||
}
|
||||
if (obj_count == 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
objs = nv_drm_calloc(obj_count, sizeof(int));
|
||||
if (objs == NULL) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
obj_i = 0;
|
||||
idr_for_each_entry(&lessee->leases, entry, obj) {
|
||||
objs[obj_i++] = obj;
|
||||
}
|
||||
*objects = objs;
|
||||
*objects_count = obj_count;
|
||||
|
||||
done:
|
||||
mutex_unlock(&dev->mode_config.idr_mutex);
|
||||
drm_master_put(&lessor);
|
||||
}
|
||||
|
||||
static bool nv_drm_is_in_objects(int object, int *objects, int objects_count)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < objects_count; ++i) {
|
||||
if (objects[i] == object) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void nv_drm_finish_revoking_objects(struct drm_device *dev,
|
||||
struct drm_file *filep, int *objects,
|
||||
int objects_count)
|
||||
{
|
||||
struct drm_connector *connector;
|
||||
struct drm_crtc *crtc;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
#endif
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
int ret = 0;
|
||||
struct drm_modeset_acquire_ctx ctx;
|
||||
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
|
||||
ret);
|
||||
#else
|
||||
mutex_lock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
if (nv_connector->modeset_permission_filep &&
|
||||
nv_drm_is_in_objects(connector->base.id, objects, objects_count)) {
|
||||
nv_drm_connector_revoke_permissions(dev, nv_connector);
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (nv_crtc->modeset_permission_filep &&
|
||||
nv_drm_is_in_objects(crtc->base.id, objects, objects_count)) {
|
||||
nv_crtc->modeset_permission_filep = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
|
||||
#else
|
||||
mutex_unlock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
}
|
||||
#endif /* NV_DRM_MASTER_HAS_LEASES */
|
||||
|
||||
#if defined(NV_DRM_BUS_PRESENT)
|
||||
|
||||
#if defined(NV_DRM_BUS_HAS_GET_IRQ)
|
||||
@@ -727,12 +1225,50 @@ static struct drm_bus nv_drm_bus = {
|
||||
|
||||
#endif /* NV_DRM_BUS_PRESENT */
|
||||
|
||||
/*
|
||||
* Wrapper around drm_ioctl to hook in to upstream ioctl.
|
||||
*
|
||||
* Currently used to add additional handling to REVOKE_LEASE.
|
||||
*/
|
||||
static long nv_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
long retcode;
|
||||
|
||||
#if defined(NV_DRM_MASTER_HAS_LEASES)
|
||||
struct drm_file *file_priv = filp->private_data;
|
||||
struct drm_device *dev = file_priv->minor->dev;
|
||||
int *objects = NULL;
|
||||
int objects_count = 0;
|
||||
|
||||
if (cmd == DRM_IOCTL_MODE_REVOKE_LEASE) {
|
||||
// Save the revoked objects before revoking.
|
||||
nv_drm_get_revoked_objects(dev, file_priv, cmd, arg, &objects,
|
||||
&objects_count);
|
||||
}
|
||||
#endif
|
||||
|
||||
retcode = drm_ioctl(filp, cmd, arg);
|
||||
|
||||
#if defined(NV_DRM_MASTER_HAS_LEASES)
|
||||
if (cmd == DRM_IOCTL_MODE_REVOKE_LEASE && objects) {
|
||||
if (retcode == 0) {
|
||||
// If revoking was successful, finish revoking the objects.
|
||||
nv_drm_finish_revoking_objects(dev, file_priv, objects,
|
||||
objects_count);
|
||||
}
|
||||
nv_drm_free(objects);
|
||||
}
|
||||
#endif
|
||||
|
||||
return retcode;
|
||||
}
|
||||
|
||||
static const struct file_operations nv_drm_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
|
||||
.open = drm_open,
|
||||
.release = drm_release,
|
||||
.unlocked_ioctl = drm_ioctl,
|
||||
.unlocked_ioctl = nv_drm_ioctl,
|
||||
#if defined(CONFIG_COMPAT)
|
||||
.compat_ioctl = drm_compat_ioctl,
|
||||
#endif
|
||||
@@ -768,11 +1304,11 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_SUPPORTED,
|
||||
nv_drm_fence_supported_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_CONTEXT_CREATE,
|
||||
nv_drm_fence_context_create_ioctl,
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_PRIME_FENCE_CONTEXT_CREATE,
|
||||
nv_drm_prime_fence_context_create_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_FENCE_ATTACH,
|
||||
nv_drm_gem_fence_attach_ioctl,
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_PRIME_FENCE_ATTACH,
|
||||
nv_drm_gem_prime_fence_attach_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
#endif
|
||||
|
||||
@@ -798,6 +1334,21 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_IDENTIFY_OBJECT,
|
||||
nv_drm_gem_identify_object_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_DMABUF_SUPPORTED,
|
||||
nv_drm_dmabuf_supported_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID,
|
||||
nv_drm_get_dpy_id_for_connector_id_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID,
|
||||
nv_drm_get_connector_id_for_dpy_id_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GRANT_PERMISSIONS,
|
||||
nv_drm_grant_permission_ioctl,
|
||||
DRM_UNLOCKED|DRM_MASTER),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_REVOKE_PERMISSIONS,
|
||||
nv_drm_revoke_permission_ioctl,
|
||||
DRM_UNLOCKED|DRM_MASTER),
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
};
|
||||
|
||||
@@ -840,6 +1391,9 @@ static struct drm_driver nv_drm_driver = {
|
||||
|
||||
.load = nv_drm_load,
|
||||
.unload = nv_drm_unload,
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
.postclose = nv_drm_postclose,
|
||||
#endif
|
||||
|
||||
.fops = &nv_drm_fops,
|
||||
|
||||
|
||||
@@ -31,17 +31,28 @@
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nvidia-drm-gem.h"
|
||||
#include "nvidia-drm-prime-fence.h"
|
||||
#include "nvidia-drm-fence.h"
|
||||
#include "nvidia-dma-resv-helper.h"
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
|
||||
#include "nvidia-dma-fence-helper.h"
|
||||
|
||||
struct nv_drm_fence_context {
|
||||
struct nv_drm_device *nv_dev;
|
||||
struct nv_drm_fence_context;
|
||||
|
||||
struct nv_drm_fence_context_ops {
|
||||
void (*destroy)(struct nv_drm_fence_context *nv_fence_context);
|
||||
};
|
||||
|
||||
struct nv_drm_fence_context {
|
||||
const struct nv_drm_fence_context_ops *ops;
|
||||
|
||||
struct nv_drm_device *nv_dev;
|
||||
uint32_t context;
|
||||
};
|
||||
|
||||
struct nv_drm_prime_fence_context {
|
||||
struct nv_drm_fence_context base;
|
||||
|
||||
NvU64 fenceSemIndex; /* Index into semaphore surface */
|
||||
|
||||
@@ -53,10 +64,10 @@ struct nv_drm_fence_context {
|
||||
spinlock_t lock;
|
||||
|
||||
/*
|
||||
* Software signaling structures. __nv_drm_fence_context_new()
|
||||
* allocates channel event and __nv_drm_fence_context_destroy() frees it.
|
||||
* There are no simultaneous read/write access to 'cb', therefore it does
|
||||
* not require spin-lock protection.
|
||||
* Software signaling structures. __nv_drm_prime_fence_context_new()
|
||||
* allocates channel event and __nv_drm_prime_fence_context_destroy() frees
|
||||
* it. There are no simultaneous read/write access to 'cb', therefore it
|
||||
* does not require spin-lock protection.
|
||||
*/
|
||||
struct NvKmsKapiChannelEvent *cb;
|
||||
|
||||
@@ -79,7 +90,7 @@ struct nv_drm_prime_fence *to_nv_drm_prime_fence(nv_dma_fence_t *fence)
|
||||
}
|
||||
|
||||
static const char*
|
||||
nv_drm_gem_prime_fence_op_get_driver_name(nv_dma_fence_t *fence)
|
||||
nv_drm_gem_fence_op_get_driver_name(nv_dma_fence_t *fence)
|
||||
{
|
||||
return "NVIDIA";
|
||||
}
|
||||
@@ -122,7 +133,7 @@ nv_drm_gem_prime_fence_op_wait(nv_dma_fence_t *fence,
|
||||
}
|
||||
|
||||
static const nv_dma_fence_ops_t nv_drm_gem_prime_fence_ops = {
|
||||
.get_driver_name = nv_drm_gem_prime_fence_op_get_driver_name,
|
||||
.get_driver_name = nv_drm_gem_fence_op_get_driver_name,
|
||||
.get_timeline_name = nv_drm_gem_prime_fence_op_get_timeline_name,
|
||||
.enable_signaling = nv_drm_gem_prime_fence_op_enable_signaling,
|
||||
.release = nv_drm_gem_prime_fence_op_release,
|
||||
@@ -138,7 +149,7 @@ __nv_drm_prime_fence_signal(struct nv_drm_prime_fence *nv_fence)
|
||||
}
|
||||
|
||||
static void nv_drm_gem_prime_force_fence_signal(
|
||||
struct nv_drm_fence_context *nv_fence_context)
|
||||
struct nv_drm_prime_fence_context *nv_fence_context)
|
||||
{
|
||||
WARN_ON(!spin_is_locked(&nv_fence_context->lock));
|
||||
|
||||
@@ -158,7 +169,7 @@ static void nv_drm_gem_prime_fence_event
|
||||
NvU32 dataU32
|
||||
)
|
||||
{
|
||||
struct nv_drm_fence_context *nv_fence_context = dataPtr;
|
||||
struct nv_drm_prime_fence_context *nv_fence_context = dataPtr;
|
||||
|
||||
spin_lock(&nv_fence_context->lock);
|
||||
|
||||
@@ -187,11 +198,53 @@ static void nv_drm_gem_prime_fence_event
|
||||
spin_unlock(&nv_fence_context->lock);
|
||||
}
|
||||
|
||||
static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
struct nv_drm_device *nv_dev,
|
||||
struct drm_nvidia_fence_context_create_params *p)
|
||||
static inline struct nv_drm_prime_fence_context*
|
||||
to_prime_fence_context(struct nv_drm_fence_context *nv_fence_context) {
|
||||
return (struct nv_drm_prime_fence_context *)nv_fence_context;
|
||||
}
|
||||
|
||||
static void __nv_drm_prime_fence_context_destroy(
|
||||
struct nv_drm_fence_context *nv_fence_context)
|
||||
{
|
||||
struct nv_drm_fence_context *nv_fence_context;
|
||||
struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context =
|
||||
to_prime_fence_context(nv_fence_context);
|
||||
|
||||
/*
|
||||
* Free channel event before destroying the fence context, otherwise event
|
||||
* callback continue to get called.
|
||||
*/
|
||||
nvKms->freeChannelEvent(nv_dev->pDevice, nv_prime_fence_context->cb);
|
||||
|
||||
/* Force signal all pending fences and empty pending list */
|
||||
spin_lock(&nv_prime_fence_context->lock);
|
||||
|
||||
nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);
|
||||
|
||||
spin_unlock(&nv_prime_fence_context->lock);
|
||||
|
||||
/* Free nvkms resources */
|
||||
|
||||
nvKms->unmapMemory(nv_dev->pDevice,
|
||||
nv_prime_fence_context->pSemSurface,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
|
||||
(void *) nv_prime_fence_context->pLinearAddress);
|
||||
|
||||
nvKms->freeMemory(nv_dev->pDevice, nv_prime_fence_context->pSemSurface);
|
||||
|
||||
nv_drm_free(nv_fence_context);
|
||||
}
|
||||
|
||||
static struct nv_drm_fence_context_ops nv_drm_prime_fence_context_ops = {
|
||||
.destroy = __nv_drm_prime_fence_context_destroy,
|
||||
};
|
||||
|
||||
static inline struct nv_drm_prime_fence_context *
|
||||
__nv_drm_prime_fence_context_new(
|
||||
struct nv_drm_device *nv_dev,
|
||||
struct drm_nvidia_prime_fence_context_create_params *p)
|
||||
{
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context;
|
||||
struct NvKmsKapiMemory *pSemSurface;
|
||||
NvU32 *pLinearAddress;
|
||||
|
||||
@@ -225,9 +278,9 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
* event for it.
|
||||
*/
|
||||
|
||||
if ((nv_fence_context = nv_drm_calloc(
|
||||
if ((nv_prime_fence_context = nv_drm_calloc(
|
||||
1,
|
||||
sizeof(*nv_fence_context))) == NULL) {
|
||||
sizeof(*nv_prime_fence_context))) == NULL) {
|
||||
goto failed_alloc_fence_context;
|
||||
}
|
||||
|
||||
@@ -236,17 +289,18 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
* to check a return value.
|
||||
*/
|
||||
|
||||
*nv_fence_context = (struct nv_drm_fence_context) {
|
||||
.nv_dev = nv_dev,
|
||||
.context = nv_dma_fence_context_alloc(1),
|
||||
*nv_prime_fence_context = (struct nv_drm_prime_fence_context) {
|
||||
.base.ops = &nv_drm_prime_fence_context_ops,
|
||||
.base.nv_dev = nv_dev,
|
||||
.base.context = nv_dma_fence_context_alloc(1),
|
||||
.pSemSurface = pSemSurface,
|
||||
.pLinearAddress = pLinearAddress,
|
||||
.fenceSemIndex = p->index,
|
||||
};
|
||||
|
||||
INIT_LIST_HEAD(&nv_fence_context->pending);
|
||||
INIT_LIST_HEAD(&nv_prime_fence_context->pending);
|
||||
|
||||
spin_lock_init(&nv_fence_context->lock);
|
||||
spin_lock_init(&nv_prime_fence_context->lock);
|
||||
|
||||
/*
|
||||
* Except 'cb', the fence context should be completely initialized
|
||||
@@ -256,22 +310,22 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
* There are no simultaneous read/write access to 'cb', therefore it does
|
||||
* not require spin-lock protection.
|
||||
*/
|
||||
nv_fence_context->cb =
|
||||
nv_prime_fence_context->cb =
|
||||
nvKms->allocateChannelEvent(nv_dev->pDevice,
|
||||
nv_drm_gem_prime_fence_event,
|
||||
nv_fence_context,
|
||||
nv_prime_fence_context,
|
||||
p->event_nvkms_params_ptr,
|
||||
p->event_nvkms_params_size);
|
||||
if (!nv_fence_context->cb) {
|
||||
if (!nv_prime_fence_context->cb) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev,
|
||||
"Failed to allocate fence signaling event");
|
||||
goto failed_to_allocate_channel_event;
|
||||
}
|
||||
|
||||
return nv_fence_context;
|
||||
return nv_prime_fence_context;
|
||||
|
||||
failed_to_allocate_channel_event:
|
||||
nv_drm_free(nv_fence_context);
|
||||
nv_drm_free(nv_prime_fence_context);
|
||||
|
||||
failed_alloc_fence_context:
|
||||
|
||||
@@ -287,38 +341,8 @@ failed:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void __nv_drm_fence_context_destroy(
|
||||
struct nv_drm_fence_context *nv_fence_context)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
|
||||
|
||||
/*
|
||||
* Free channel event before destroying the fence context, otherwise event
|
||||
* callback continue to get called.
|
||||
*/
|
||||
nvKms->freeChannelEvent(nv_dev->pDevice, nv_fence_context->cb);
|
||||
|
||||
/* Force signal all pending fences and empty pending list */
|
||||
spin_lock(&nv_fence_context->lock);
|
||||
|
||||
nv_drm_gem_prime_force_fence_signal(nv_fence_context);
|
||||
|
||||
spin_unlock(&nv_fence_context->lock);
|
||||
|
||||
/* Free nvkms resources */
|
||||
|
||||
nvKms->unmapMemory(nv_dev->pDevice,
|
||||
nv_fence_context->pSemSurface,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
|
||||
(void *) nv_fence_context->pLinearAddress);
|
||||
|
||||
nvKms->freeMemory(nv_dev->pDevice, nv_fence_context->pSemSurface);
|
||||
|
||||
nv_drm_free(nv_fence_context);
|
||||
}
|
||||
|
||||
static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
|
||||
struct nv_drm_fence_context *nv_fence_context,
|
||||
static nv_dma_fence_t *__nv_drm_prime_fence_context_create_fence(
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context,
|
||||
unsigned int seqno)
|
||||
{
|
||||
struct nv_drm_prime_fence *nv_fence;
|
||||
@@ -329,14 +353,14 @@ static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock(&nv_fence_context->lock);
|
||||
spin_lock(&nv_prime_fence_context->lock);
|
||||
|
||||
/*
|
||||
* If seqno wrapped, force signal fences to make sure none of them
|
||||
* get stuck.
|
||||
*/
|
||||
if (seqno < nv_fence_context->last_seqno) {
|
||||
nv_drm_gem_prime_force_fence_signal(nv_fence_context);
|
||||
if (seqno < nv_prime_fence_context->last_seqno) {
|
||||
nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&nv_fence->list_entry);
|
||||
@@ -344,14 +368,17 @@ static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
|
||||
spin_lock_init(&nv_fence->lock);
|
||||
|
||||
nv_dma_fence_init(&nv_fence->base, &nv_drm_gem_prime_fence_ops,
|
||||
&nv_fence->lock, nv_fence_context->context,
|
||||
&nv_fence->lock, nv_prime_fence_context->base.context,
|
||||
seqno);
|
||||
|
||||
list_add_tail(&nv_fence->list_entry, &nv_fence_context->pending);
|
||||
/* The context maintains a reference to any pending fences. */
|
||||
nv_dma_fence_get(&nv_fence->base);
|
||||
|
||||
nv_fence_context->last_seqno = seqno;
|
||||
list_add_tail(&nv_fence->list_entry, &nv_prime_fence_context->pending);
|
||||
|
||||
spin_unlock(&nv_fence_context->lock);
|
||||
nv_prime_fence_context->last_seqno = seqno;
|
||||
|
||||
spin_unlock(&nv_prime_fence_context->lock);
|
||||
|
||||
out:
|
||||
return ret != 0 ? ERR_PTR(ret) : &nv_fence->base;
|
||||
@@ -385,12 +412,15 @@ static inline struct nv_drm_gem_fence_context *to_gem_fence_context(
|
||||
* because tear down sequence calls to flush all existing
|
||||
* worker thread.
|
||||
*/
|
||||
static void __nv_drm_gem_fence_context_free(struct nv_drm_gem_object *nv_gem)
|
||||
static void
|
||||
__nv_drm_gem_fence_context_free(struct nv_drm_gem_object *nv_gem)
|
||||
{
|
||||
struct nv_drm_gem_fence_context *nv_gem_fence_context =
|
||||
to_gem_fence_context(nv_gem);
|
||||
struct nv_drm_fence_context *nv_fence_context =
|
||||
nv_gem_fence_context->nv_fence_context;
|
||||
|
||||
__nv_drm_fence_context_destroy(nv_gem_fence_context->nv_fence_context);
|
||||
nv_fence_context->ops->destroy(nv_fence_context);
|
||||
|
||||
nv_drm_free(nv_gem_fence_context);
|
||||
}
|
||||
@@ -400,7 +430,8 @@ const struct nv_drm_gem_object_funcs nv_gem_fence_context_ops = {
|
||||
};
|
||||
|
||||
static inline
|
||||
struct nv_drm_gem_fence_context *__nv_drm_gem_object_fence_context_lookup(
|
||||
struct nv_drm_gem_fence_context *
|
||||
__nv_drm_gem_object_fence_context_lookup(
|
||||
struct drm_device *dev,
|
||||
struct drm_file *filp,
|
||||
u32 handle)
|
||||
@@ -416,11 +447,13 @@ struct nv_drm_gem_fence_context *__nv_drm_gem_object_fence_context_lookup(
|
||||
return to_gem_fence_context(nv_gem);
|
||||
}
|
||||
|
||||
int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
static int
|
||||
__nv_drm_gem_fence_context_create(struct drm_device *dev,
|
||||
struct nv_drm_fence_context *nv_fence_context,
|
||||
u32 *handle,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct drm_nvidia_fence_context_create_params *p = data;
|
||||
struct nv_drm_gem_fence_context *nv_gem_fence_context = NULL;
|
||||
|
||||
if ((nv_gem_fence_context = nv_drm_calloc(
|
||||
@@ -429,10 +462,7 @@ int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
goto done;
|
||||
}
|
||||
|
||||
if ((nv_gem_fence_context->nv_fence_context =
|
||||
__nv_drm_fence_context_new(nv_dev, p)) == NULL) {
|
||||
goto fence_context_new_failed;
|
||||
}
|
||||
nv_gem_fence_context->nv_fence_context = nv_fence_context;
|
||||
|
||||
nv_drm_gem_object_init(nv_dev,
|
||||
&nv_gem_fence_context->base,
|
||||
@@ -442,21 +472,45 @@ int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
|
||||
return nv_drm_gem_handle_create_drop_reference(filep,
|
||||
&nv_gem_fence_context->base,
|
||||
&p->handle);
|
||||
|
||||
fence_context_new_failed:
|
||||
nv_drm_free(nv_gem_fence_context);
|
||||
handle);
|
||||
|
||||
done:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct drm_nvidia_prime_fence_context_create_params *p = data;
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context =
|
||||
__nv_drm_prime_fence_context_new(nv_dev, p);
|
||||
int err;
|
||||
|
||||
if (!nv_prime_fence_context) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = __nv_drm_gem_fence_context_create(dev,
|
||||
&nv_prime_fence_context->base,
|
||||
&p->handle,
|
||||
filep);
|
||||
if (err) {
|
||||
__nv_drm_prime_fence_context_destroy(&nv_prime_fence_context->base);
|
||||
}
|
||||
|
||||
return err;
|
||||
|
||||
done:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct drm_nvidia_gem_fence_attach_params *p = data;
|
||||
struct drm_nvidia_gem_prime_fence_attach_params *p = data;
|
||||
|
||||
struct nv_drm_gem_object *nv_gem;
|
||||
struct nv_drm_gem_fence_context *nv_gem_fence_context;
|
||||
@@ -487,9 +541,22 @@ int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
goto fence_context_lookup_failed;
|
||||
}
|
||||
|
||||
if (IS_ERR(fence = __nv_drm_fence_context_create_fence(
|
||||
nv_gem_fence_context->nv_fence_context,
|
||||
p->sem_thresh))) {
|
||||
if (nv_gem_fence_context->nv_fence_context->ops !=
|
||||
&nv_drm_prime_fence_context_ops) {
|
||||
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Wrong fence context type: 0x%08x",
|
||||
p->fence_context_handle);
|
||||
|
||||
goto fence_context_create_fence_failed;
|
||||
}
|
||||
|
||||
fence = __nv_drm_prime_fence_context_create_fence(
|
||||
to_prime_fence_context(nv_gem_fence_context->nv_fence_context),
|
||||
p->sem_thresh);
|
||||
|
||||
if (IS_ERR(fence)) {
|
||||
ret = PTR_ERR(fence);
|
||||
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
@@ -512,6 +579,9 @@ int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
|
||||
nv_dma_resv_unlock(&nv_gem->resv);
|
||||
|
||||
/* dma_resv_add_excl_fence takes its own reference to the fence. */
|
||||
nv_dma_fence_put(fence);
|
||||
|
||||
fence_context_create_fence_failed:
|
||||
nv_drm_gem_object_unreference_unlocked(&nv_gem_fence_context->base);
|
||||
|
||||
@@ -35,11 +35,11 @@ struct drm_device;
|
||||
int nv_drm_fence_supported_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
|
||||
int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
|
||||
int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
|
||||
#endif /* NV_DRM_FENCE_AVAILABLE */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -40,9 +40,16 @@ static const u32 nvkms_to_drm_format[] = {
|
||||
[NvKmsSurfaceMemoryFormatR5G6B5] = DRM_FORMAT_RGB565,
|
||||
[NvKmsSurfaceMemoryFormatA8R8G8B8] = DRM_FORMAT_ARGB8888,
|
||||
[NvKmsSurfaceMemoryFormatX8R8G8B8] = DRM_FORMAT_XRGB8888,
|
||||
[NvKmsSurfaceMemoryFormatX8B8G8R8] = DRM_FORMAT_XBGR8888,
|
||||
[NvKmsSurfaceMemoryFormatA2B10G10R10] = DRM_FORMAT_ABGR2101010,
|
||||
[NvKmsSurfaceMemoryFormatX2B10G10R10] = DRM_FORMAT_XBGR2101010,
|
||||
[NvKmsSurfaceMemoryFormatA8B8G8R8] = DRM_FORMAT_ABGR8888,
|
||||
#if defined(DRM_FORMAT_ABGR16161616F)
|
||||
[NvKmsSurfaceMemoryFormatRF16GF16BF16AF16] = DRM_FORMAT_ABGR16161616F,
|
||||
#endif
|
||||
#if defined(DRM_FORMAT_XBGR16161616F)
|
||||
[NvKmsSurfaceMemoryFormatRF16GF16BF16XF16] = DRM_FORMAT_XBGR16161616F,
|
||||
#endif
|
||||
|
||||
[NvKmsSurfaceMemoryFormatY8_U8__Y8_V8_N422] = DRM_FORMAT_YUYV,
|
||||
[NvKmsSurfaceMemoryFormatU8_Y8__V8_Y8_N422] = DRM_FORMAT_UYVY,
|
||||
|
||||
@@ -95,7 +95,7 @@ static vm_fault_t __nv_drm_gem_nvkms_handle_vma_fault(
|
||||
pfn >>= PAGE_SHIFT;
|
||||
pfn += page_offset;
|
||||
} else {
|
||||
BUG_ON(page_offset > nv_nvkms_memory->pages_count);
|
||||
BUG_ON(page_offset >= nv_nvkms_memory->pages_count);
|
||||
pfn = page_to_pfn(nv_nvkms_memory->pages[page_offset]);
|
||||
}
|
||||
|
||||
@@ -201,7 +201,7 @@ static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
|
||||
nv_dev,
|
||||
"Cannot create sg_table for NvKmsKapiMemory 0x%p",
|
||||
nv_gem->pMemory);
|
||||
return NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
sg_table = nv_drm_prime_pages_to_sg(nv_dev->dev,
|
||||
|
||||
@@ -92,9 +92,9 @@ static int __nv_drm_gem_user_memory_mmap(struct nv_drm_gem_object *nv_gem,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
vma->vm_flags &= ~VM_PFNMAP;
|
||||
vma->vm_flags &= ~VM_IO;
|
||||
vma->vm_flags |= VM_MIXEDMAP;
|
||||
nv_vm_flags_clear(vma, VM_PFNMAP);
|
||||
nv_vm_flags_clear(vma, VM_IO);
|
||||
nv_vm_flags_set(vma, VM_MIXEDMAP);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -112,8 +112,7 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
|
||||
|
||||
page_offset = vmf->pgoff - drm_vma_node_start(&gem->vma_node);
|
||||
|
||||
BUG_ON(page_offset > nv_user_memory->pages_count);
|
||||
|
||||
BUG_ON(page_offset >= nv_user_memory->pages_count);
|
||||
ret = vm_insert_page(vma, address, nv_user_memory->pages[page_offset]);
|
||||
switch (ret) {
|
||||
case 0:
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nvidia-drm-prime-fence.h"
|
||||
#include "nvidia-drm-fence.h"
|
||||
#include "nvidia-drm-gem.h"
|
||||
#include "nvidia-drm-gem-nvkms-memory.h"
|
||||
#include "nvidia-drm-gem-user-memory.h"
|
||||
@@ -299,7 +299,7 @@ int nv_drm_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
vma->vm_flags &= ~VM_MAYWRITE;
|
||||
nv_vm_flags_clear(vma, VM_MAYWRITE);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -28,6 +28,8 @@
|
||||
*/
|
||||
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-crtc.h"
|
||||
|
||||
#include "nvmisc.h"
|
||||
|
||||
@@ -148,6 +150,18 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
|
||||
goto free;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
nv_drm_for_each_plane(plane, dev) {
|
||||
plane_state = drm_atomic_get_plane_state(state, plane);
|
||||
if (IS_ERR(plane_state)) {
|
||||
ret = PTR_ERR(plane_state);
|
||||
goto free;
|
||||
}
|
||||
|
||||
plane_state->rotation = DRM_MODE_ROTATE_0;
|
||||
}
|
||||
#endif
|
||||
|
||||
nv_drm_for_each_connector_in_state(state, conn, conn_state, i) {
|
||||
ret = drm_atomic_set_crtc_for_connector(conn_state, NULL);
|
||||
if (ret < 0)
|
||||
|
||||
@@ -35,6 +35,35 @@
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE) || defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_ROTATE_* , DRM_REFLECT_* */
|
||||
#include <drm/drm_blend.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* */
|
||||
#include <uapi/drm/drm_mode.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/*
|
||||
* 19-05-2017 c2c446ad29437bb92b157423c632286608ebd3ec has added
|
||||
* DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* to UAPI and removed
|
||||
* DRM_ROTATE_* and DRM_REFLECT_*
|
||||
*/
|
||||
#if !defined(DRM_MODE_ROTATE_0)
|
||||
#define DRM_MODE_ROTATE_0 DRM_ROTATE_0
|
||||
#define DRM_MODE_ROTATE_90 DRM_ROTATE_90
|
||||
#define DRM_MODE_ROTATE_180 DRM_ROTATE_180
|
||||
#define DRM_MODE_ROTATE_270 DRM_ROTATE_270
|
||||
#define DRM_MODE_REFLECT_X DRM_REFLECT_X
|
||||
#define DRM_MODE_REFLECT_Y DRM_REFLECT_Y
|
||||
#define DRM_MODE_ROTATE_MASK DRM_ROTATE_MASK
|
||||
#define DRM_MODE_REFLECT_MASK DRM_REFLECT_MASK
|
||||
#endif
|
||||
|
||||
#endif //NV_DRM_ROTATION_AVAILABLE
|
||||
|
||||
/*
|
||||
* drm_dev_put() is added by commit 9a96f55034e41b4e002b767e9218d55f03bdff7d
|
||||
* (2017-09-26) and drm_dev_unref() is removed by
|
||||
@@ -277,11 +306,33 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
|
||||
for_each_plane_in_state(__state, plane, plane_state, __i)
|
||||
#endif
|
||||
|
||||
static inline struct drm_crtc *nv_drm_crtc_find(struct drm_device *dev,
|
||||
uint32_t id)
|
||||
static inline struct drm_connector *
|
||||
nv_drm_connector_lookup(struct drm_device *dev, struct drm_file *filep,
|
||||
uint32_t id)
|
||||
{
|
||||
#if !defined(NV_DRM_CONNECTOR_LOOKUP_PRESENT)
|
||||
return drm_connector_find(dev, id);
|
||||
#elif defined(NV_DRM_MODE_OBJECT_FIND_HAS_FILE_PRIV_ARG)
|
||||
return drm_connector_lookup(dev, filep, id);
|
||||
#else
|
||||
return drm_connector_lookup(dev, id);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_drm_connector_put(struct drm_connector *connector)
|
||||
{
|
||||
#if defined(NV_DRM_CONNECTOR_PUT_PRESENT)
|
||||
drm_connector_put(connector);
|
||||
#elif defined(NV_DRM_CONNECTOR_LOOKUP_PRESENT)
|
||||
drm_connector_unreference(connector);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct drm_crtc *
|
||||
nv_drm_crtc_find(struct drm_device *dev, struct drm_file *filep, uint32_t id)
|
||||
{
|
||||
#if defined(NV_DRM_MODE_OBJECT_FIND_HAS_FILE_PRIV_ARG)
|
||||
return drm_crtc_find(dev, NULL /* file_priv */, id);
|
||||
return drm_crtc_find(dev, filep, id);
|
||||
#else
|
||||
return drm_crtc_find(dev, id);
|
||||
#endif
|
||||
@@ -297,6 +348,30 @@ static inline struct drm_encoder *nv_drm_encoder_find(struct drm_device *dev,
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_DRM_AUTH_H_PRESENT)
|
||||
#include <drm/drm_auth.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_FILE_H_PRESENT)
|
||||
#include <drm/drm_file.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* drm_file_get_master() added by commit 56f0729a510f ("drm: protect drm_master
|
||||
* pointers in drm_lease.c") in v5.15 (2021-07-20)
|
||||
*/
|
||||
static inline struct drm_master *nv_drm_file_get_master(struct drm_file *filep)
|
||||
{
|
||||
#if defined(NV_DRM_FILE_GET_MASTER_PRESENT)
|
||||
return drm_file_get_master(filep);
|
||||
#else
|
||||
if (filep->master) {
|
||||
return drm_master_get(filep->master);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* drm_connector_for_each_possible_encoder() is added by commit
|
||||
* 83aefbb887b59df0b3520965c3701e01deacfc52 which was Signed-off-by:
|
||||
|
||||
@@ -34,8 +34,8 @@
|
||||
#define DRM_NVIDIA_GEM_IMPORT_USERSPACE_MEMORY 0x02
|
||||
#define DRM_NVIDIA_GET_DEV_INFO 0x03
|
||||
#define DRM_NVIDIA_FENCE_SUPPORTED 0x04
|
||||
#define DRM_NVIDIA_FENCE_CONTEXT_CREATE 0x05
|
||||
#define DRM_NVIDIA_GEM_FENCE_ATTACH 0x06
|
||||
#define DRM_NVIDIA_PRIME_FENCE_CONTEXT_CREATE 0x05
|
||||
#define DRM_NVIDIA_GEM_PRIME_FENCE_ATTACH 0x06
|
||||
#define DRM_NVIDIA_GET_CLIENT_CAPABILITY 0x08
|
||||
#define DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY 0x09
|
||||
#define DRM_NVIDIA_GEM_MAP_OFFSET 0x0a
|
||||
@@ -43,6 +43,11 @@
|
||||
#define DRM_NVIDIA_GET_CRTC_CRC32_V2 0x0c
|
||||
#define DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY 0x0d
|
||||
#define DRM_NVIDIA_GEM_IDENTIFY_OBJECT 0x0e
|
||||
#define DRM_NVIDIA_DMABUF_SUPPORTED 0x0f
|
||||
#define DRM_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID 0x10
|
||||
#define DRM_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID 0x11
|
||||
#define DRM_NVIDIA_GRANT_PERMISSIONS 0x12
|
||||
#define DRM_NVIDIA_REVOKE_PERMISSIONS 0x13
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_IMPORT_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IMPORT_NVKMS_MEMORY), \
|
||||
@@ -65,50 +70,69 @@
|
||||
#if defined(NV_LINUX)
|
||||
#define DRM_IOCTL_NVIDIA_FENCE_SUPPORTED \
|
||||
DRM_IO(DRM_COMMAND_BASE + DRM_NVIDIA_FENCE_SUPPORTED)
|
||||
#define DRM_IOCTL_NVIDIA_DMABUF_SUPPORTED \
|
||||
DRM_IO(DRM_COMMAND_BASE + DRM_NVIDIA_DMABUF_SUPPORTED)
|
||||
#else
|
||||
#define DRM_IOCTL_NVIDIA_FENCE_SUPPORTED 0
|
||||
#define DRM_IOCTL_NVIDIA_DMABUF_SUPPORTED 0
|
||||
#endif
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_FENCE_CONTEXT_CREATE \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_FENCE_CONTEXT_CREATE), \
|
||||
struct drm_nvidia_fence_context_create_params)
|
||||
#define DRM_IOCTL_NVIDIA_PRIME_FENCE_CONTEXT_CREATE \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_PRIME_FENCE_CONTEXT_CREATE),\
|
||||
struct drm_nvidia_prime_fence_context_create_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_FENCE_ATTACH \
|
||||
DRM_IOW((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_FENCE_ATTACH), \
|
||||
struct drm_nvidia_gem_fence_attach_params)
|
||||
#define DRM_IOCTL_NVIDIA_GEM_PRIME_FENCE_ATTACH \
|
||||
DRM_IOW((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_PRIME_FENCE_ATTACH), \
|
||||
struct drm_nvidia_gem_prime_fence_attach_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_CLIENT_CAPABILITY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CLIENT_CAPABILITY), \
|
||||
#define DRM_IOCTL_NVIDIA_GET_CLIENT_CAPABILITY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CLIENT_CAPABILITY), \
|
||||
struct drm_nvidia_get_client_capability_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32 \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32), \
|
||||
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32 \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32), \
|
||||
struct drm_nvidia_get_crtc_crc32_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32_V2 \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32_V2), \
|
||||
#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32_V2 \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32_V2), \
|
||||
struct drm_nvidia_get_crtc_crc32_v2_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY), \
|
||||
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY), \
|
||||
struct drm_nvidia_gem_export_nvkms_memory_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_MAP_OFFSET \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_MAP_OFFSET), \
|
||||
#define DRM_IOCTL_NVIDIA_GEM_MAP_OFFSET \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_MAP_OFFSET), \
|
||||
struct drm_nvidia_gem_map_offset_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_ALLOC_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_ALLOC_NVKMS_MEMORY), \
|
||||
#define DRM_IOCTL_NVIDIA_GEM_ALLOC_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_ALLOC_NVKMS_MEMORY), \
|
||||
struct drm_nvidia_gem_alloc_nvkms_memory_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_DMABUF_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY), \
|
||||
#define DRM_IOCTL_NVIDIA_GEM_EXPORT_DMABUF_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY), \
|
||||
struct drm_nvidia_gem_export_dmabuf_memory_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_IDENTIFY_OBJECT \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IDENTIFY_OBJECT), \
|
||||
#define DRM_IOCTL_NVIDIA_GEM_IDENTIFY_OBJECT \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IDENTIFY_OBJECT), \
|
||||
struct drm_nvidia_gem_identify_object_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID),\
|
||||
struct drm_nvidia_get_dpy_id_for_connector_id_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID),\
|
||||
struct drm_nvidia_get_connector_id_for_dpy_id_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GRANT_PERMISSIONS), \
|
||||
struct drm_nvidia_grant_permissions_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_REVOKE_PERMISSIONS \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_REVOKE_PERMISSIONS), \
|
||||
struct drm_nvidia_revoke_permissions_params)
|
||||
|
||||
struct drm_nvidia_gem_import_nvkms_memory_params {
|
||||
uint64_t mem_size; /* IN */
|
||||
|
||||
@@ -136,7 +160,7 @@ struct drm_nvidia_get_dev_info_params {
|
||||
uint32_t sector_layout; /* OUT */
|
||||
};
|
||||
|
||||
struct drm_nvidia_fence_context_create_params {
|
||||
struct drm_nvidia_prime_fence_context_create_params {
|
||||
uint32_t handle; /* OUT GEM handle to fence context */
|
||||
|
||||
uint32_t index; /* IN Index of semaphore to use for fencing */
|
||||
@@ -151,7 +175,7 @@ struct drm_nvidia_fence_context_create_params {
|
||||
uint64_t event_nvkms_params_size; /* IN */
|
||||
};
|
||||
|
||||
struct drm_nvidia_gem_fence_attach_params {
|
||||
struct drm_nvidia_gem_prime_fence_attach_params {
|
||||
uint32_t handle; /* IN GEM handle to attach fence to */
|
||||
uint32_t fence_context_handle; /* IN GEM handle to fence context on which fence is run on */
|
||||
uint32_t sem_thresh; /* IN Semaphore value to reach before signal */
|
||||
@@ -232,4 +256,23 @@ struct drm_nvidia_gem_identify_object_params {
|
||||
drm_nvidia_gem_object_type object_type; /* OUT GEM object type */
|
||||
};
|
||||
|
||||
struct drm_nvidia_get_dpy_id_for_connector_id_params {
|
||||
uint32_t connectorId; /* IN */
|
||||
uint32_t dpyId; /* OUT */
|
||||
};
|
||||
|
||||
struct drm_nvidia_get_connector_id_for_dpy_id_params {
|
||||
uint32_t dpyId; /* IN */
|
||||
uint32_t connectorId; /* OUT */
|
||||
};
|
||||
|
||||
struct drm_nvidia_grant_permissions_params {
|
||||
int32_t fd; /* IN */
|
||||
uint32_t dpyId; /* IN */
|
||||
};
|
||||
|
||||
struct drm_nvidia_revoke_permissions_params {
|
||||
uint32_t dpyId; /* IN */
|
||||
};
|
||||
|
||||
#endif /* _UAPI_NVIDIA_DRM_IOCTL_H_ */
|
||||
|
||||
@@ -47,6 +47,14 @@ module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);
|
||||
|
||||
void *nv_drm_calloc(size_t nmemb, size_t size)
|
||||
{
|
||||
size_t total_size = nmemb * size;
|
||||
//
|
||||
// Check for overflow.
|
||||
//
|
||||
if ((nmemb != 0) && ((total_size / nmemb) != size))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
return kzalloc(nmemb * size, GFP_KERNEL);
|
||||
}
|
||||
|
||||
@@ -93,8 +101,6 @@ int nv_drm_lock_user_pages(unsigned long address,
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct page **user_pages;
|
||||
const int write = 1;
|
||||
const int force = 0;
|
||||
int pages_pinned;
|
||||
|
||||
user_pages = nv_drm_calloc(pages_count, sizeof(*user_pages));
|
||||
@@ -105,7 +111,7 @@ int nv_drm_lock_user_pages(unsigned long address,
|
||||
|
||||
nv_mmap_read_lock(mm);
|
||||
|
||||
pages_pinned = NV_GET_USER_PAGES(address, pages_count, write, force,
|
||||
pages_pinned = NV_PIN_USER_PAGES(address, pages_count, FOLL_WRITE,
|
||||
user_pages, NULL);
|
||||
nv_mmap_read_unlock(mm);
|
||||
|
||||
@@ -123,7 +129,7 @@ failed:
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pages_pinned; i++) {
|
||||
put_page(user_pages[i]);
|
||||
NV_UNPIN_USER_PAGE(user_pages[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -138,8 +144,7 @@ void nv_drm_unlock_user_pages(unsigned long pages_count, struct page **pages)
|
||||
|
||||
for (i = 0; i < pages_count; i++) {
|
||||
set_page_dirty_lock(pages[i]);
|
||||
|
||||
put_page(pages[i]);
|
||||
NV_UNPIN_USER_PAGE(pages[i]);
|
||||
}
|
||||
|
||||
nv_drm_free(pages);
|
||||
@@ -174,12 +179,7 @@ static void __exit nv_linux_drm_exit(void)
|
||||
module_init(nv_linux_drm_init);
|
||||
module_exit(nv_linux_drm_exit);
|
||||
|
||||
#if defined(MODULE_LICENSE)
|
||||
MODULE_LICENSE("Dual MIT/GPL");
|
||||
#endif
|
||||
#if defined(MODULE_INFO)
|
||||
MODULE_INFO(supported, "external");
|
||||
#endif
|
||||
#if defined(MODULE_VERSION)
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
#endif
|
||||
|
||||
MODULE_INFO(supported, "external");
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
|
||||
@@ -93,9 +93,6 @@ static bool __will_generate_flip_event(struct drm_crtc *crtc,
|
||||
to_nv_crtc_state(new_crtc_state);
|
||||
struct drm_plane_state *old_plane_state = NULL;
|
||||
struct drm_plane *plane = NULL;
|
||||
struct drm_plane *primary_plane = crtc->primary;
|
||||
bool primary_event = false;
|
||||
bool overlay_event = false;
|
||||
int i;
|
||||
|
||||
if (!old_crtc_state->active && !new_crtc_state->active) {
|
||||
@@ -134,16 +131,19 @@ static int __nv_drm_put_back_post_fence_fd(
|
||||
const struct NvKmsKapiLayerReplyConfig *layer_reply_config)
|
||||
{
|
||||
int fd = layer_reply_config->postSyncptFd;
|
||||
int ret = 0;
|
||||
|
||||
if ((fd >= 0) && (plane_state->fd_user_ptr != NULL)) {
|
||||
if (put_user(fd, plane_state->fd_user_ptr)) {
|
||||
return -EFAULT;
|
||||
ret = copy_to_user(plane_state->fd_user_ptr, &fd, sizeof(fd));
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*! set back to Null and let set_property specify it again */
|
||||
plane_state->fd_user_ptr = NULL;
|
||||
}
|
||||
return 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __nv_drm_get_syncpt_data(
|
||||
@@ -274,6 +274,9 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
|
||||
|
||||
nv_new_crtc_state->nv_flip = NULL;
|
||||
}
|
||||
#if defined(NV_DRM_CRTC_STATE_HAS_VRR_ENABLED)
|
||||
requested_config->headRequestedConfig[nv_crtc->head].modeSetConfig.vrrEnabled = new_crtc_state->vrr_enabled;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -122,6 +122,11 @@ struct nv_drm_device {
|
||||
NvBool supportsSyncpts;
|
||||
|
||||
struct drm_property *nv_out_fence_property;
|
||||
struct drm_property *nv_input_colorspace_property;
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct drm_property *nv_hdr_output_metadata_property;
|
||||
#endif
|
||||
|
||||
struct nv_drm_device *next;
|
||||
};
|
||||
|
||||
@@ -16,7 +16,7 @@ NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-connector.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fb.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-modeset.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-prime-fence.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fence.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-linux.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-helper.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nv-pci-table.c
|
||||
@@ -59,11 +59,14 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages_remote
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages_remote
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_lookup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_state_ref_counting
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_driver_has_gem_prime_res_obj
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_connector_dpms
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_funcs_have_mode_in_name
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_has_vrr_capable_property
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_framebuffer_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
|
||||
@@ -100,6 +103,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_has_resv
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_async_flip
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_pageflip_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_vrr_enabled
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_modifiers_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_node_is_allowed_has_tag_arg
|
||||
@@ -115,6 +119,14 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_plane_atomic_check_has_atomic_state_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_device_has_pdev
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_no_vblank
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_config_has_allow_fb_modifiers
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_has_hdr_output_metadata
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_add_fence
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_reserve_fences
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fences_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_has_leases
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_lookup
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
|
||||
@@ -169,7 +169,6 @@ void nv_kthread_q_stop(nv_kthread_q_t *q)
|
||||
//
|
||||
// This function is never invoked when there is no NUMA preference (preferred
|
||||
// node is NUMA_NO_NODE).
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
nv_kthread_q_t *q,
|
||||
int preferred_node,
|
||||
@@ -217,7 +216,6 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
return thread[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
|
||||
{
|
||||
@@ -231,11 +229,7 @@ int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferr
|
||||
q->q_kthread = kthread_create(_main_loop, q, q_name);
|
||||
}
|
||||
else {
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
|
||||
#else
|
||||
return -ENOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (IS_ERR(q->q_kthread)) {
|
||||
|
||||
@@ -34,6 +34,9 @@
|
||||
#include <linux/file.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/freezer.h>
|
||||
|
||||
#include <acpi/video.h>
|
||||
|
||||
#include "nvstatus.h"
|
||||
|
||||
@@ -72,8 +75,26 @@ MODULE_PARM_DESC(malloc_verbose, "Report information about malloc calls on modul
|
||||
static bool malloc_verbose = false;
|
||||
module_param_named(malloc_verbose, malloc_verbose, bool, 0400);
|
||||
|
||||
/* This parameter is used to find the dpy override conf file */
|
||||
#define NVKMS_CONF_FILE_SPECIFIED (nvkms_conf != NULL)
|
||||
|
||||
MODULE_PARM_DESC(config_file,
|
||||
"Path to the nvidia-modeset configuration file "
|
||||
"(default: disabled)");
|
||||
static char *nvkms_conf = NULL;
|
||||
module_param_named(config_file, nvkms_conf, charp, 0400);
|
||||
|
||||
static atomic_t nvkms_alloc_called_count;
|
||||
|
||||
static bool force_api_to_hw_head_identity_mapping = false;
|
||||
module_param_named(force_api_to_hw_head_identity_mapping,
|
||||
force_api_to_hw_head_identity_mapping, bool, 0400);
|
||||
|
||||
NvBool nvkms_force_api_to_hw_head_identity_mappings(void)
|
||||
{
|
||||
return force_api_to_hw_head_identity_mapping;
|
||||
}
|
||||
|
||||
NvBool nvkms_output_rounding_fix(void)
|
||||
{
|
||||
return output_rounding_fix;
|
||||
@@ -180,7 +201,10 @@ static inline int nvkms_read_trylock_pm_lock(void)
|
||||
|
||||
static inline void nvkms_read_lock_pm_lock(void)
|
||||
{
|
||||
down_read(&nvkms_pm_lock);
|
||||
while (!down_read_trylock(&nvkms_pm_lock)) {
|
||||
try_to_freeze();
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void nvkms_read_unlock_pm_lock(void)
|
||||
@@ -956,6 +980,12 @@ nvkms_register_backlight(NvU32 gpu_id, NvU32 display_id, void *drv_priv,
|
||||
struct nvkms_backlight_device *nvkms_bd = NULL;
|
||||
int i;
|
||||
|
||||
#if defined(NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE)
|
||||
if (!acpi_video_backlight_use_native()) {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
gpu_info = nvkms_alloc(NV_MAX_GPUS * sizeof(*gpu_info), NV_TRUE);
|
||||
if (gpu_info == NULL) {
|
||||
return NULL;
|
||||
@@ -1078,7 +1108,7 @@ failed:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void nvkms_close_common(struct nvkms_per_open *popen)
|
||||
void nvkms_close_pm_locked(struct nvkms_per_open *popen)
|
||||
{
|
||||
/*
|
||||
* Don't use down_interruptible(): we need to free resources
|
||||
@@ -1116,13 +1146,13 @@ void nvkms_close_common(struct nvkms_per_open *popen)
|
||||
nvkms_free(popen, sizeof(*popen));
|
||||
}
|
||||
|
||||
static void nvkms_close_deferred(void *data)
|
||||
static void nvkms_close_pm_unlocked(void *data)
|
||||
{
|
||||
struct nvkms_per_open *popen = data;
|
||||
|
||||
nvkms_read_lock_pm_lock();
|
||||
|
||||
nvkms_close_common(popen);
|
||||
nvkms_close_pm_locked(popen);
|
||||
|
||||
nvkms_read_unlock_pm_lock();
|
||||
}
|
||||
@@ -1130,11 +1160,11 @@ static void nvkms_close_deferred(void *data)
|
||||
static void nvkms_close_popen(struct nvkms_per_open *popen)
|
||||
{
|
||||
if (nvkms_read_trylock_pm_lock() == 0) {
|
||||
nvkms_close_common(popen);
|
||||
nvkms_close_pm_locked(popen);
|
||||
nvkms_read_unlock_pm_lock();
|
||||
} else {
|
||||
nv_kthread_q_item_init(&popen->deferred_close_q_item,
|
||||
nvkms_close_deferred,
|
||||
nvkms_close_pm_unlocked,
|
||||
popen);
|
||||
nvkms_queue_work(&nvkms_deferred_close_kthread_q,
|
||||
&popen->deferred_close_q_item);
|
||||
@@ -1187,7 +1217,7 @@ struct nvkms_per_open* nvkms_open_from_kapi
|
||||
|
||||
void nvkms_close_from_kapi(struct nvkms_per_open *popen)
|
||||
{
|
||||
nvkms_close_popen(popen);
|
||||
nvkms_close_pm_unlocked(popen);
|
||||
}
|
||||
|
||||
NvBool nvkms_ioctl_from_kapi
|
||||
@@ -1346,30 +1376,119 @@ static void nvkms_proc_exit(void)
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
proc_remove(nvkms_proc_dir);
|
||||
#else
|
||||
/*
|
||||
* On kernel versions without proc_remove(), we need to explicitly
|
||||
* remove each proc file beneath nvkms_proc_dir.
|
||||
* nvkms_proc_init() only creates files directly under
|
||||
* nvkms_proc_dir, so those are the only files we need to remove
|
||||
* here: warn if there is any deeper directory nesting.
|
||||
*/
|
||||
{
|
||||
struct proc_dir_entry *entry = nvkms_proc_dir->subdir;
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
}
|
||||
|
||||
while (entry != NULL) {
|
||||
struct proc_dir_entry *next = entry->next;
|
||||
WARN_ON(entry->subdir != NULL);
|
||||
remove_proc_entry(entry->name, entry->parent);
|
||||
entry = next;
|
||||
}
|
||||
/*************************************************************************
|
||||
* NVKMS Config File Read
|
||||
************************************************************************/
|
||||
static NvBool nvkms_fs_mounted(void)
|
||||
{
|
||||
return current->fs != NULL;
|
||||
}
|
||||
|
||||
static size_t nvkms_config_file_open
|
||||
(
|
||||
char *fname,
|
||||
char ** const buff
|
||||
)
|
||||
{
|
||||
int i = 0;
|
||||
struct file *file;
|
||||
struct inode *file_inode;
|
||||
size_t file_size = 0;
|
||||
size_t read_size = 0;
|
||||
#if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
|
||||
loff_t pos = 0;
|
||||
#endif
|
||||
|
||||
if (!nvkms_fs_mounted()) {
|
||||
printk(KERN_ERR NVKMS_LOG_PREFIX "ERROR: Filesystems not mounted\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
remove_proc_entry(nvkms_proc_dir->name, nvkms_proc_dir->parent);
|
||||
#endif /* NV_PROC_REMOVE_PRESENT */
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
file = filp_open(fname, O_RDONLY, 0);
|
||||
if (file == NULL || IS_ERR(file)) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Failed to open %s\n",
|
||||
fname);
|
||||
return 0;
|
||||
}
|
||||
|
||||
file_inode = file->f_inode;
|
||||
if (file_inode == NULL || IS_ERR(file_inode)) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Inode is invalid\n");
|
||||
goto done;
|
||||
}
|
||||
file_size = file_inode->i_size;
|
||||
if (file_size > NVKMS_READ_FILE_MAX_SIZE) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: File exceeds maximum size\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
*buff = nvkms_alloc(file_size, NV_FALSE);
|
||||
if (*buff == NULL) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Out of memory\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: Once we have access to GPL symbols, this can be replaced with
|
||||
* kernel_read_file for kernels >= 4.6
|
||||
*/
|
||||
while ((read_size < file_size) && (i++ < NVKMS_READ_FILE_MAX_LOOPS)) {
|
||||
#if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
|
||||
ssize_t ret = kernel_read(file, *buff + read_size,
|
||||
file_size - read_size, &pos);
|
||||
#else
|
||||
ssize_t ret = kernel_read(file, read_size,
|
||||
*buff + read_size,
|
||||
file_size - read_size);
|
||||
#endif
|
||||
if (ret <= 0) {
|
||||
break;
|
||||
}
|
||||
read_size += ret;
|
||||
}
|
||||
|
||||
if (read_size != file_size) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Failed to read %s\n",
|
||||
fname);
|
||||
goto done;
|
||||
}
|
||||
|
||||
filp_close(file, current->files);
|
||||
return file_size;
|
||||
|
||||
done:
|
||||
nvkms_free(*buff, file_size);
|
||||
filp_close(file, current->files);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* must be called with nvkms_lock locked */
|
||||
static void nvkms_read_config_file_locked(void)
|
||||
{
|
||||
char *buffer = NULL;
|
||||
size_t buf_size = 0;
|
||||
|
||||
/* only read the config file if the kernel parameter is set */
|
||||
if (!NVKMS_CONF_FILE_SPECIFIED) {
|
||||
return;
|
||||
}
|
||||
|
||||
buf_size = nvkms_config_file_open(nvkms_conf, &buffer);
|
||||
|
||||
if (buf_size == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (nvKmsReadConf(buffer, buf_size, nvkms_config_file_open)) {
|
||||
printk(KERN_INFO NVKMS_LOG_PREFIX "Successfully read %s\n",
|
||||
nvkms_conf);
|
||||
}
|
||||
|
||||
nvkms_free(buffer, buf_size);
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
@@ -1543,10 +1662,12 @@ static int __init nvkms_init(void)
|
||||
if (!nvKmsModuleLoad()) {
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
up(&nvkms_lock);
|
||||
if (ret != 0) {
|
||||
up(&nvkms_lock);
|
||||
goto fail_module_load;
|
||||
}
|
||||
nvkms_read_config_file_locked();
|
||||
up(&nvkms_lock);
|
||||
|
||||
nvkms_proc_init();
|
||||
|
||||
@@ -1630,12 +1751,7 @@ restart:
|
||||
module_init(nvkms_init);
|
||||
module_exit(nvkms_exit);
|
||||
|
||||
#if defined(MODULE_LICENSE)
|
||||
MODULE_LICENSE("Dual MIT/GPL");
|
||||
#endif
|
||||
#if defined(MODULE_INFO)
|
||||
MODULE_INFO(supported, "external");
|
||||
#endif
|
||||
#if defined(MODULE_VERSION)
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
#endif
|
||||
|
||||
MODULE_INFO(supported, "external");
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
|
||||
@@ -43,14 +43,9 @@ enum NvKmsSyncPtOp {
|
||||
NVKMS_SYNCPT_OP_ALLOC,
|
||||
NVKMS_SYNCPT_OP_GET,
|
||||
NVKMS_SYNCPT_OP_PUT,
|
||||
NVKMS_SYNCPT_OP_INCR_MAX,
|
||||
NVKMS_SYNCPT_OP_CPU_INCR,
|
||||
NVKMS_SYNCPT_OP_FD_TO_ID_AND_THRESH,
|
||||
NVKMS_SYNCPT_OP_ID_AND_THRESH_TO_FD,
|
||||
NVKMS_SYNCPT_OP_READ_MINVAL,
|
||||
NVKMS_SYNCPT_OP_READ_MAXVAL,
|
||||
NVKMS_SYNCPT_OP_SET_MIN_EQ_MAX,
|
||||
NVKMS_SYNCPT_OP_SET_MAXVAL,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
@@ -60,24 +55,10 @@ typedef struct {
|
||||
NvU32 id; /* out */
|
||||
} alloc;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} get;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} put;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 incr; /* in */
|
||||
NvU32 value; /* out */
|
||||
} incr_max;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} cpu_incr;
|
||||
|
||||
struct {
|
||||
NvS32 fd; /* in */
|
||||
NvU32 id; /* out */
|
||||
@@ -94,22 +75,9 @@ typedef struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 minval; /* out */
|
||||
} read_minval;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 maxval; /* out */
|
||||
} read_maxval;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} set_min_eq_max;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 val; /* in */
|
||||
} set_maxval;
|
||||
} NvKmsSyncPtOpParams;
|
||||
|
||||
NvBool nvkms_force_api_to_hw_head_identity_mappings(void);
|
||||
NvBool nvkms_output_rounding_fix(void);
|
||||
|
||||
void nvkms_call_rm (void *ops);
|
||||
|
||||
@@ -85,15 +85,11 @@ $(obj)/$(NVIDIA_MODESET_INTERFACE): $(addprefix $(obj)/,$(NVIDIA_MODESET_OBJECTS
|
||||
|
||||
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_MODESET_OBJECTS)
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_kthread_create_on_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
|
||||
|
||||
@@ -42,6 +42,20 @@ typedef void nvkms_procfs_proc_t(void *data,
|
||||
char *buffer, size_t size,
|
||||
nvkms_procfs_out_string_func_t *outString);
|
||||
|
||||
/* max number of loops to prevent hanging the kernel if an edge case is hit */
|
||||
#define NVKMS_READ_FILE_MAX_LOOPS 1000
|
||||
/* max size for any file read by the config system */
|
||||
#define NVKMS_READ_FILE_MAX_SIZE 8192
|
||||
|
||||
/*
|
||||
* The read file callback should allocate a buffer pointed to by *buff, fill it
|
||||
* with the contents of fname, and return the size of the buffer. Buffer is not
|
||||
* guaranteed to be null-terminated. The caller is responsible for freeing the
|
||||
* buffer with nvkms_free, not nvFree.
|
||||
*/
|
||||
typedef size_t nvkms_config_read_file_func_t(char *fname,
|
||||
char ** const buff);
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
nvkms_procfs_proc_t *func;
|
||||
@@ -74,6 +88,9 @@ void nvKmsResume(NvU32 gpuId);
|
||||
|
||||
void nvKmsGetProcFiles(const nvkms_procfs_file_t **ppProcFiles);
|
||||
|
||||
NvBool nvKmsReadConf(const char *buff, size_t size,
|
||||
nvkms_config_read_file_func_t readfile);
|
||||
|
||||
void nvKmsKapiHandleEventQueueChange
|
||||
(
|
||||
struct NvKmsKapiDevice *device
|
||||
|
||||
@@ -30,8 +30,18 @@ NVIDIA_PEERMEM_CFLAGS += -UDEBUG -U_DEBUG -DNDEBUG -DNV_BUILD_MODULE_INSTANCES=0
|
||||
# MOFED's Module.symvers is needed for the build
|
||||
# to find the additional ib_* symbols.
|
||||
#
|
||||
# Also, MOFED doesn't use kbuild ARCH names.
|
||||
# So adapt OFA_ARCH to match MOFED's conventions.
|
||||
#
|
||||
ifeq ($(ARCH), arm64)
|
||||
OFA_ARCH := aarch64
|
||||
else ifeq ($(ARCH), powerpc)
|
||||
OFA_ARCH := ppc64le
|
||||
else
|
||||
OFA_ARCH := $(ARCH)
|
||||
endif
|
||||
OFA_DIR := /usr/src/ofa_kernel
|
||||
OFA_CANDIDATES = $(OFA_DIR)/$(ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
|
||||
OFA_CANDIDATES = $(OFA_DIR)/$(OFA_ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
|
||||
MLNX_OFED_KERNEL := $(shell for d in $(OFA_CANDIDATES); do \
|
||||
if [ -d "$$d" ]; then \
|
||||
echo "$$d"; \
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016 NVIDIA Corporation
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -69,6 +69,9 @@ extern "C" {
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE 25:25
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
|
||||
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
|
||||
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
|
||||
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2021 NVIDIA Corporation
|
||||
Copyright (c) 2013-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
||||
@@ -481,16 +481,6 @@ static int _check_cpu_affinity_test(void)
|
||||
int result, node;
|
||||
nv_kthread_q_t local_q;
|
||||
|
||||
// If the API does not support CPU affinity, check whether the correct
|
||||
// error code is returned.
|
||||
// Non-affinitized queue allocation has been verified by previous test
|
||||
// so just ensure that the affinitized version also works.
|
||||
if (!NV_KTHREAD_Q_SUPPORTS_AFFINITY()) {
|
||||
result = nv_kthread_q_init_on_node(&local_q, "should_fail", 0);
|
||||
TEST_CHECK_RET(result == -ENOTSUPP);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for_each_online_node(node) {
|
||||
unsigned i;
|
||||
const unsigned max_i = 100;
|
||||
|
||||
@@ -169,7 +169,6 @@ void nv_kthread_q_stop(nv_kthread_q_t *q)
|
||||
//
|
||||
// This function is never invoked when there is no NUMA preference (preferred
|
||||
// node is NUMA_NO_NODE).
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
nv_kthread_q_t *q,
|
||||
int preferred_node,
|
||||
@@ -217,7 +216,6 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
return thread[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
|
||||
{
|
||||
@@ -231,11 +229,7 @@ int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferr
|
||||
q->q_kthread = kthread_create(_main_loop, q, q_name);
|
||||
}
|
||||
else {
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
|
||||
#else
|
||||
return -ENOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (IS_ERR(q->q_kthread)) {
|
||||
|
||||
@@ -1,12 +1,6 @@
|
||||
NVIDIA_UVM_SOURCES ?=
|
||||
NVIDIA_UVM_SOURCES_CXX ?=
|
||||
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
|
||||
@@ -58,6 +52,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
|
||||
@@ -72,6 +67,12 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_policy.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_utils.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_kvmalloc.c
|
||||
@@ -94,7 +95,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_rng.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_allocator_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_semaphore_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hmm_sanity_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_mem_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rm_mem_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_page_tree_test.c
|
||||
|
||||
@@ -36,7 +36,7 @@ NVIDIA_UVM_KO = nvidia-uvm/nvidia-uvm.ko
|
||||
#
|
||||
|
||||
ifeq ($(UVM_BUILD_TYPE),debug)
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG $(call cc-option,-Og,-O0) -g
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG -O1 -g
|
||||
else
|
||||
ifeq ($(UVM_BUILD_TYPE),develop)
|
||||
# -DDEBUG is required, in order to allow pr_devel() print statements to
|
||||
@@ -67,17 +67,11 @@ endif
|
||||
|
||||
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += address_space_init_once
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vzalloc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += wait_on_bit_lock_argument_count
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += bitmap_clear
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += usleep_range
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
@@ -87,18 +81,19 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_set
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += kuid_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += address_space
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages_remote
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages_remote
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
@@ -107,6 +102,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += make_device_exclusive_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_device_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -35,93 +35,95 @@
|
||||
#include "uvm_linux_ioctl.h"
|
||||
#include "uvm_hmm.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
|
||||
#define NVIDIA_UVM_DEVICE_NAME "nvidia-uvm"
|
||||
|
||||
static dev_t g_uvm_base_dev;
|
||||
static struct cdev g_uvm_cdev;
|
||||
static const struct file_operations uvm_fops;
|
||||
|
||||
// List of fault service contexts for CPU faults
|
||||
static LIST_HEAD(g_cpu_service_block_context_list);
|
||||
|
||||
static uvm_spinlock_t g_cpu_service_block_context_list_lock;
|
||||
|
||||
NV_STATUS uvm_service_block_context_init(void)
|
||||
bool uvm_file_is_nvidia_uvm(struct file *filp)
|
||||
{
|
||||
unsigned num_preallocated_contexts = 4;
|
||||
return (filp != NULL) && (filp->f_op == &uvm_fops);
|
||||
}
|
||||
|
||||
uvm_spin_lock_init(&g_cpu_service_block_context_list_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
|
||||
{
|
||||
unsigned long uptr;
|
||||
uvm_fd_type_t type;
|
||||
void *ptr;
|
||||
|
||||
// Pre-allocate some fault service contexts for the CPU and add them to the global list
|
||||
while (num_preallocated_contexts-- > 0) {
|
||||
uvm_service_block_context_t *service_context = uvm_kvmalloc(sizeof(*service_context));
|
||||
if (!service_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
|
||||
|
||||
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
|
||||
uptr = atomic_long_read_acquire((atomic_long_t *) (&filp->private_data));
|
||||
type = (uvm_fd_type_t)(uptr & UVM_FD_TYPE_MASK);
|
||||
ptr = (void *)(uptr & ~UVM_FD_TYPE_MASK);
|
||||
BUILD_BUG_ON(UVM_FD_COUNT > UVM_FD_TYPE_MASK + 1);
|
||||
|
||||
switch (type) {
|
||||
case UVM_FD_UNINITIALIZED:
|
||||
case UVM_FD_INITIALIZING:
|
||||
UVM_ASSERT(!ptr);
|
||||
break;
|
||||
|
||||
case UVM_FD_VA_SPACE:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
default:
|
||||
UVM_ASSERT(0);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_service_block_context_exit(void)
|
||||
{
|
||||
uvm_service_block_context_t *service_context, *service_context_tmp;
|
||||
|
||||
// Free fault service contexts for the CPU and add clear the global list
|
||||
list_for_each_entry_safe(service_context, service_context_tmp, &g_cpu_service_block_context_list,
|
||||
cpu_fault.service_context_list) {
|
||||
uvm_kvfree(service_context);
|
||||
}
|
||||
INIT_LIST_HEAD(&g_cpu_service_block_context_list);
|
||||
}
|
||||
|
||||
// Get a fault service context from the global list or allocate a new one if there are no
|
||||
// available entries
|
||||
static uvm_service_block_context_t *uvm_service_block_context_cpu_alloc(void)
|
||||
{
|
||||
uvm_service_block_context_t *service_context;
|
||||
|
||||
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
service_context = list_first_entry_or_null(&g_cpu_service_block_context_list, uvm_service_block_context_t,
|
||||
cpu_fault.service_context_list);
|
||||
|
||||
if (service_context)
|
||||
list_del(&service_context->cpu_fault.service_context_list);
|
||||
|
||||
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
if (!service_context)
|
||||
service_context = uvm_kvmalloc(sizeof(*service_context));
|
||||
|
||||
return service_context;
|
||||
}
|
||||
|
||||
// Put a fault service context in the global list
|
||||
static void uvm_service_block_context_cpu_free(uvm_service_block_context_t *service_context)
|
||||
{
|
||||
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
|
||||
|
||||
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
|
||||
if (ptr_val)
|
||||
*ptr_val = ptr;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
// Called when opening /dev/nvidia-uvm. This code doesn't take any UVM locks, so
|
||||
// there's no need to acquire g_uvm_global.pm.lock, but if that changes the PM
|
||||
// lock will need to be taken.
|
||||
static int uvm_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct address_space *mapping;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
|
||||
if (status == NV_OK) {
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
if (status != NV_OK)
|
||||
return -nv_status_to_errno(status);
|
||||
|
||||
status = uvm_va_space_create(inode, filp);
|
||||
mapping = uvm_kvmalloc(sizeof(*mapping));
|
||||
if (!mapping)
|
||||
return -ENOMEM;
|
||||
|
||||
uvm_up_read(&g_uvm_global.pm.lock);
|
||||
}
|
||||
// By default all struct files on the same inode share the same
|
||||
// address_space structure (the inode's) across all processes. This means
|
||||
// unmap_mapping_range would unmap virtual mappings across all processes on
|
||||
// that inode.
|
||||
//
|
||||
// Since the UVM driver uses the mapping offset as the VA of the file's
|
||||
// process, we need to isolate the mappings to each process.
|
||||
address_space_init_once(mapping);
|
||||
mapping->host = inode;
|
||||
|
||||
return -nv_status_to_errno(status);
|
||||
// Some paths in the kernel, for example force_page_cache_readahead which
|
||||
// can be invoked from user-space via madvise MADV_WILLNEED and fadvise
|
||||
// POSIX_FADV_WILLNEED, check the function pointers within
|
||||
// file->f_mapping->a_ops for validity. However, those paths assume that a_ops
|
||||
// itself is always valid. Handle that by using the inode's a_ops pointer,
|
||||
// which is what f_mapping->a_ops would point to anyway if we weren't re-
|
||||
// assigning f_mapping.
|
||||
mapping->a_ops = inode->i_mapping->a_ops;
|
||||
|
||||
#if defined(NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO)
|
||||
mapping->backing_dev_info = inode->i_mapping->backing_dev_info;
|
||||
#endif
|
||||
|
||||
filp->private_data = NULL;
|
||||
filp->f_mapping = mapping;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static int uvm_open_entry(struct inode *inode, struct file *filp)
|
||||
@@ -147,9 +149,18 @@ static void uvm_release_deferred(void *data)
|
||||
|
||||
static int uvm_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_fd_type_t fd_type;
|
||||
int ret;
|
||||
|
||||
fd_type = uvm_fd_type(filp, (void **)&va_space);
|
||||
UVM_ASSERT(fd_type != UVM_FD_INITIALIZING);
|
||||
if (fd_type == UVM_FD_UNINITIALIZED) {
|
||||
uvm_kvfree(filp->f_mapping);
|
||||
return 0;
|
||||
}
|
||||
|
||||
UVM_ASSERT(fd_type == UVM_FD_VA_SPACE);
|
||||
filp->private_data = NULL;
|
||||
filp->f_mapping = NULL;
|
||||
|
||||
@@ -167,7 +178,7 @@ static int uvm_release(struct inode *inode, struct file *filp)
|
||||
// been destroyed, and va_space->mapping won't be used again. Still,
|
||||
// the va_space survives the inode if its destruction is deferred, in
|
||||
// which case the references are rendered stale.
|
||||
address_space_init_once(&va_space->mapping);
|
||||
address_space_init_once(va_space->mapping);
|
||||
|
||||
nv_kthread_q_item_init(&va_space->deferred_release_q_item, uvm_release_deferred, va_space);
|
||||
ret = nv_kthread_q_schedule_q_item(&g_uvm_global.deferred_release_q, &va_space->deferred_release_q_item);
|
||||
@@ -430,14 +441,12 @@ static void uvm_vm_open_managed_entry(struct vm_area_struct *vma)
|
||||
static void uvm_vm_close_managed(struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_processor_id_t gpu_id;
|
||||
bool make_zombie = false;
|
||||
|
||||
if (current->mm != NULL)
|
||||
uvm_record_lock_mmap_lock_write(current->mm);
|
||||
|
||||
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
|
||||
|
||||
// current->mm will be NULL on process teardown, in which case we have
|
||||
// special handling.
|
||||
if (current->mm == NULL) {
|
||||
@@ -467,13 +476,11 @@ static void uvm_vm_close_managed(struct vm_area_struct *vma)
|
||||
|
||||
uvm_destroy_vma_managed(vma, make_zombie);
|
||||
|
||||
// Notify GPU address spaces that the fault buffer needs to be flushed to avoid finding stale entries
|
||||
// that can be attributed to new VA ranges reallocated at the same address
|
||||
for_each_va_space_gpu_in_mask(gpu, va_space, &va_space->registered_gpu_va_spaces) {
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
|
||||
UVM_ASSERT(gpu_va_space);
|
||||
|
||||
gpu_va_space->needs_fault_buffer_flush = true;
|
||||
// Notify GPU address spaces that the fault buffer needs to be flushed to
|
||||
// avoid finding stale entries that can be attributed to new VA ranges
|
||||
// reallocated at the same address.
|
||||
for_each_gpu_id_in_mask(gpu_id, &va_space->registered_gpu_va_spaces) {
|
||||
uvm_processor_mask_set_atomic(&va_space->needs_fault_buffer_flush, gpu_id);
|
||||
}
|
||||
uvm_va_space_up_write(va_space);
|
||||
|
||||
@@ -489,139 +496,10 @@ static void uvm_vm_close_managed_entry(struct vm_area_struct *vma)
|
||||
static vm_fault_t uvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
|
||||
uvm_va_block_t *va_block;
|
||||
NvU64 fault_addr = nv_page_fault_va(vmf);
|
||||
bool is_write = vmf->flags & FAULT_FLAG_WRITE;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
bool tools_enabled;
|
||||
bool major_fault = false;
|
||||
uvm_service_block_context_t *service_context;
|
||||
uvm_global_processor_mask_t gpus_to_check_for_ecc;
|
||||
|
||||
if (status != NV_OK)
|
||||
goto convert_error;
|
||||
|
||||
// TODO: Bug 2583279: Lock tracking is disabled for the power management
|
||||
// lock in order to suppress reporting of a lock policy violation.
|
||||
// The violation consists in acquiring the power management lock multiple
|
||||
// times, and it is manifested as an error during release. The
|
||||
// re-acquisition of the power management locks happens upon re-entry in the
|
||||
// UVM module, and it is benign on itself, but when combined with certain
|
||||
// power management scenarios, it is indicative of a potential deadlock.
|
||||
// Tracking will be re-enabled once the power management locking strategy is
|
||||
// modified to avoid deadlocks.
|
||||
if (!uvm_down_read_trylock_no_tracking(&g_uvm_global.pm.lock)) {
|
||||
status = NV_ERR_BUSY_RETRY;
|
||||
goto convert_error;
|
||||
}
|
||||
|
||||
service_context = uvm_service_block_context_cpu_alloc();
|
||||
if (!service_context) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
service_context->cpu_fault.wakeup_time_stamp = 0;
|
||||
|
||||
// The mmap_lock might be held in write mode, but the mode doesn't matter
|
||||
// for the purpose of lock ordering and we don't rely on it being in write
|
||||
// anywhere so just record it as read mode in all cases.
|
||||
uvm_record_lock_mmap_lock_read(vma->vm_mm);
|
||||
|
||||
do {
|
||||
bool do_sleep = false;
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
|
||||
NvU64 now = NV_GETTIME();
|
||||
if (now < service_context->cpu_fault.wakeup_time_stamp)
|
||||
do_sleep = true;
|
||||
|
||||
if (do_sleep)
|
||||
uvm_tools_record_throttling_start(va_space, fault_addr, UVM_ID_CPU);
|
||||
|
||||
// Drop the VA space lock while we sleep
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
// usleep_range is preferred because msleep has a 20ms granularity
|
||||
// and udelay uses a busy-wait loop. usleep_range uses high-resolution
|
||||
// timers and, by adding a range, the Linux scheduler may coalesce
|
||||
// our wakeup with others, thus saving some interrupts.
|
||||
if (do_sleep) {
|
||||
unsigned long nap_us = (service_context->cpu_fault.wakeup_time_stamp - now) / 1000;
|
||||
|
||||
usleep_range(nap_us, nap_us + nap_us / 2);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
if (do_sleep)
|
||||
uvm_tools_record_throttling_end(va_space, fault_addr, UVM_ID_CPU);
|
||||
|
||||
status = uvm_va_block_find_create_managed(va_space, fault_addr, &va_block);
|
||||
if (status != NV_OK) {
|
||||
UVM_ASSERT_MSG(status == NV_ERR_NO_MEMORY, "status: %s\n", nvstatusToString(status));
|
||||
break;
|
||||
}
|
||||
|
||||
// Watch out, current->mm might not be vma->vm_mm
|
||||
UVM_ASSERT(vma == uvm_va_range_vma(va_block->va_range));
|
||||
|
||||
// Loop until thrashing goes away.
|
||||
status = uvm_va_block_cpu_fault(va_block, fault_addr, is_write, service_context);
|
||||
} while (status == NV_WARN_MORE_PROCESSING_REQUIRED);
|
||||
|
||||
if (status != NV_OK) {
|
||||
UvmEventFatalReason reason;
|
||||
|
||||
reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||||
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
|
||||
|
||||
uvm_tools_record_cpu_fatal_fault(va_space, fault_addr, is_write, reason);
|
||||
}
|
||||
|
||||
tools_enabled = va_space->tools.enabled;
|
||||
|
||||
if (status == NV_OK) {
|
||||
uvm_va_space_global_gpus_in_mask(va_space,
|
||||
&gpus_to_check_for_ecc,
|
||||
&service_context->cpu_fault.gpus_to_check_for_ecc);
|
||||
uvm_global_mask_retain(&gpus_to_check_for_ecc);
|
||||
}
|
||||
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_record_unlock_mmap_lock_read(vma->vm_mm);
|
||||
|
||||
if (status == NV_OK) {
|
||||
status = uvm_global_mask_check_ecc_error(&gpus_to_check_for_ecc);
|
||||
uvm_global_mask_release(&gpus_to_check_for_ecc);
|
||||
}
|
||||
|
||||
if (tools_enabled)
|
||||
uvm_tools_flush_events();
|
||||
|
||||
// Major faults involve I/O in order to resolve the fault.
|
||||
// If any pages were DMA'ed between the GPU and host memory, that makes it a major fault.
|
||||
// A process can also get statistics for major and minor faults by calling readproc().
|
||||
major_fault = service_context->cpu_fault.did_migrate;
|
||||
uvm_service_block_context_cpu_free(service_context);
|
||||
|
||||
unlock:
|
||||
// TODO: Bug 2583279: See the comment above the matching lock acquisition
|
||||
uvm_up_read_no_tracking(&g_uvm_global.pm.lock);
|
||||
|
||||
convert_error:
|
||||
switch (status) {
|
||||
case NV_OK:
|
||||
case NV_ERR_BUSY_RETRY:
|
||||
return VM_FAULT_NOPAGE | (major_fault ? VM_FAULT_MAJOR : 0);
|
||||
case NV_ERR_NO_MEMORY:
|
||||
return VM_FAULT_OOM;
|
||||
default:
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
return uvm_va_space_cpu_fault_managed(va_space, vma, vmf);
|
||||
}
|
||||
|
||||
|
||||
static vm_fault_t uvm_vm_fault_entry(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
UVM_ENTRY_RET(uvm_vm_fault(vma, vmf));
|
||||
@@ -752,7 +630,7 @@ static struct vm_operations_struct uvm_vm_ops_semaphore_pool =
|
||||
|
||||
static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_va_range_t *va_range;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
int ret = 0;
|
||||
@@ -761,8 +639,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
if (status != NV_OK)
|
||||
return -nv_status_to_errno(status);
|
||||
|
||||
status = uvm_va_space_initialized(va_space);
|
||||
if (status != NV_OK)
|
||||
va_space = uvm_fd_va_space(filp);
|
||||
if (!va_space)
|
||||
return -EBADFD;
|
||||
|
||||
// When the VA space is associated with an mm, all vmas under the VA space
|
||||
@@ -814,7 +692,11 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
// Using VM_DONTCOPY would be nice, but madvise(MADV_DOFORK) can reset that
|
||||
// so we have to handle vm_open on fork anyway. We could disable MADV_DOFORK
|
||||
// with VM_IO, but that causes other mapping issues.
|
||||
vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
|
||||
// Make the default behavior be VM_DONTCOPY to avoid the performance impact
|
||||
// of removing CPU mappings in the parent on fork()+exec(). Users can call
|
||||
// madvise(MDV_DOFORK) if the child process requires access to the
|
||||
// allocation.
|
||||
nv_vm_flags_set(vma, VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTCOPY);
|
||||
|
||||
vma->vm_ops = &uvm_vm_ops_managed;
|
||||
|
||||
@@ -881,7 +763,53 @@ static int uvm_mmap_entry(struct file *filp, struct vm_area_struct *vma)
|
||||
|
||||
static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *filp)
|
||||
{
|
||||
return uvm_va_space_initialize(uvm_va_space_get(filp), params->flags);
|
||||
uvm_va_space_t *va_space;
|
||||
NV_STATUS status;
|
||||
uvm_fd_type_t old_fd_type;
|
||||
|
||||
// Normally we expect private_data == UVM_FD_UNINITIALIZED. However multiple
|
||||
// threads may call this ioctl concurrently so we have to be careful to
|
||||
// avoid initializing multiple va_spaces and/or leaking memory. To do this
|
||||
// we do an atomic compare and swap. Only one thread will observe
|
||||
// UVM_FD_UNINITIALIZED and that thread will allocate and setup the
|
||||
// va_space.
|
||||
//
|
||||
// Other threads will either see UVM_FD_INITIALIZING or UVM_FD_VA_SPACE. In
|
||||
// the case of UVM_FD_VA_SPACE we return success if and only if the
|
||||
// initialization flags match. If another thread is still initializing the
|
||||
// va_space we return NV_ERR_BUSY_RETRY.
|
||||
//
|
||||
// If va_space initialization fails we return the failure code and reset the
|
||||
// FD state back to UVM_FD_UNINITIALIZED to allow another initialization
|
||||
// attempt to be made. This is safe because other threads will have only had
|
||||
// a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
|
||||
// case.
|
||||
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type == UVM_FD_UNINITIALIZED) {
|
||||
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
|
||||
if (status != NV_OK) {
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
|
||||
return status;
|
||||
}
|
||||
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)va_space | UVM_FD_VA_SPACE);
|
||||
}
|
||||
else if (old_fd_type == UVM_FD_VA_SPACE) {
|
||||
va_space = uvm_va_space_get(filp);
|
||||
|
||||
if (params->flags != va_space->initialization_flags)
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
else
|
||||
status = NV_OK;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(old_fd_type == UVM_FD_INITIALIZING);
|
||||
status = NV_ERR_BUSY_RETRY;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_api_pageable_mem_access(UVM_PAGEABLE_MEM_ACCESS_PARAMS *params, struct file *filp)
|
||||
@@ -978,16 +906,9 @@ static const struct file_operations uvm_fops =
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
bool uvm_file_is_nvidia_uvm(struct file *filp)
|
||||
{
|
||||
return (filp != NULL) && (filp->f_op == &uvm_fops);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params, struct file *filp)
|
||||
{
|
||||
long ret;
|
||||
int write = 1;
|
||||
int force = 0;
|
||||
struct page *page;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
@@ -998,7 +919,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
|
||||
// are not used because unload_state_buf may be a managed memory pointer and
|
||||
// therefore a locking assertion from the CPU fault handler could be fired.
|
||||
nv_mmap_read_lock(current->mm);
|
||||
ret = NV_GET_USER_PAGES(params->unload_state_buf, 1, write, force, &page, NULL);
|
||||
ret = NV_PIN_USER_PAGES(params->unload_state_buf, 1, FOLL_WRITE, &page, NULL);
|
||||
nv_mmap_read_unlock(current->mm);
|
||||
|
||||
if (ret < 0)
|
||||
@@ -1008,7 +929,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
if (g_uvm_global.unload_state.ptr) {
|
||||
put_page(page);
|
||||
NV_UNPIN_USER_PAGE(page);
|
||||
status = NV_ERR_IN_USE;
|
||||
goto error;
|
||||
}
|
||||
@@ -1027,7 +948,7 @@ static void uvm_test_unload_state_exit(void)
|
||||
{
|
||||
if (g_uvm_global.unload_state.ptr) {
|
||||
kunmap(g_uvm_global.unload_state.page);
|
||||
put_page(g_uvm_global.unload_state.page);
|
||||
NV_UNPIN_USER_PAGE(g_uvm_global.unload_state.page);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -211,12 +211,12 @@ NV_STATUS UvmDeinitialize(void);
|
||||
// UvmReopen
|
||||
//
|
||||
// Reinitializes the UVM driver after checking for minimal user-mode state.
|
||||
// Before calling this function, all GPUs must be unregistered with
|
||||
// Before calling this function, all GPUs must be unregistered with
|
||||
// UvmUnregisterGpu() and all allocated VA ranges must be freed with UvmFree().
|
||||
// Note that it is not required to release VA ranges that were reserved with
|
||||
// UvmReserveVa().
|
||||
//
|
||||
// UvmReopen() closes the open file returned by UvmGetFileDescriptor() and
|
||||
// UvmReopen() closes the open file returned by UvmGetFileDescriptor() and
|
||||
// replaces it with a new open file with the same name.
|
||||
//
|
||||
// Arguments:
|
||||
@@ -1746,17 +1746,20 @@ NV_STATUS UvmCreateExternalRange(void *base,
|
||||
// GPUs. The external allocation can be unmapped from a specific GPU using
|
||||
// UvmUnmapExternal or from all GPUs using UvmFree.
|
||||
//
|
||||
// The virtual address range specified by (base, length) must be aligned to the
|
||||
// allocation's physical page size and must fall within a VA range previously
|
||||
// created with UvmCreateExternalRange. A GPU VA space must have been registered
|
||||
// for each GPU in the list. The offset in the physical allocation at which the
|
||||
// allocation must be mapped should also be aligned to the allocation's physical
|
||||
// page size. The (base, length) range must lie within the largest possible
|
||||
// virtual address supported by the specified GPUs.
|
||||
// The virtual address range specified by (base, length) must fall within a VA
|
||||
// range previously created with UvmCreateExternalRange. A GPU VA space must
|
||||
// have been registered for each GPU in the list. The (base, length) range must
|
||||
// lie within the largest possible virtual address supported by the specified
|
||||
// GPUs.
|
||||
//
|
||||
// The page size used for the mapping is the largest supported page size less
|
||||
// than or equal to the alignments of base, length, offset, and the allocation
|
||||
// page size.
|
||||
//
|
||||
// If the range specified by (base, length) falls within any existing mappings,
|
||||
// the behavior is the same as if UvmUnmapExternal with the range specified by
|
||||
// (base, length) had been called first.
|
||||
// (base, length) had been called first, provided that base and length are
|
||||
// aligned to the page size used for the existing one.
|
||||
//
|
||||
// If the allocation resides in GPU memory, that GPU must have been registered
|
||||
// via UvmRegisterGpu. If the allocation resides in GPU memory and a mapping is
|
||||
@@ -1838,8 +1841,9 @@ NV_STATUS UvmCreateExternalRange(void *base,
|
||||
// - The requested address range does not fall entirely within an
|
||||
// existing external VA range created with a single call to
|
||||
// UvmCreateExternalRange.
|
||||
// - At least one of base and length is not aligned to the allocation's
|
||||
// physical page size.
|
||||
// - The mapping page size allowed by the alignments of base, length,
|
||||
// and offset is smaller than the minimum supported page size on the
|
||||
// GPU.
|
||||
// - base or base + length fall within an existing mapping but are not
|
||||
// aligned to that mapping's page size.
|
||||
//
|
||||
@@ -1848,8 +1852,7 @@ NV_STATUS UvmCreateExternalRange(void *base,
|
||||
// address supported by one or more of the specified GPUs.
|
||||
//
|
||||
// NV_ERR_INVALID_OFFSET:
|
||||
// offset is not aligned to the allocation's physical page size or
|
||||
// offset+length exceeds the allocation size.
|
||||
// - offset+length exceeds the allocation size.
|
||||
//
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
// One of the following occurred:
|
||||
@@ -3758,6 +3761,7 @@ NV_STATUS UvmToolsDisableCounters(UvmToolsCountersHandle counters,
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// Read spans more than a single target process allocation.
|
||||
//
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmToolsReadProcessMemory(UvmToolsSessionHandle session,
|
||||
void *buffer,
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
#include "clc7b5.h"
|
||||
#include "clc56f.h" // Needed because HAL ce_init pushes SET_OBJECT
|
||||
|
||||
bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return true;
|
||||
@@ -112,7 +112,7 @@ NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void)
|
||||
return HWCONST(C7B5, LAUNCH_DMA, DISABLE_PLC, TRUE);
|
||||
}
|
||||
|
||||
bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
NvU64 push_begin_gpu_va;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
@@ -183,7 +183,7 @@ void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_
|
||||
src->address -= uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
|
||||
}
|
||||
|
||||
bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
#include "clc56f.h"
|
||||
#include "clc076.h"
|
||||
|
||||
bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
@@ -82,7 +82,7 @@ bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_hal_ampere_host_sw_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_hal_ampere_host_sw_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return true;
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#define __UVM_API_H__
|
||||
|
||||
#include "uvm_types.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_ioctl.h"
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_lock.h"
|
||||
@@ -51,8 +52,10 @@
|
||||
\
|
||||
params.rmStatus = uvm_global_get_status(); \
|
||||
if (params.rmStatus == NV_OK) { \
|
||||
if (do_init_check) \
|
||||
params.rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp)); \
|
||||
if (do_init_check) { \
|
||||
if (!uvm_fd_va_space(filp)) \
|
||||
params.rmStatus = NV_ERR_ILLEGAL_ACTION; \
|
||||
} \
|
||||
if (likely(params.rmStatus == NV_OK)) \
|
||||
params.rmStatus = function_name(¶ms, filp); \
|
||||
} \
|
||||
@@ -88,8 +91,10 @@
|
||||
\
|
||||
params->rmStatus = uvm_global_get_status(); \
|
||||
if (params->rmStatus == NV_OK) { \
|
||||
if (do_init_check) \
|
||||
params->rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp)); \
|
||||
if (do_init_check) { \
|
||||
if (!uvm_fd_va_space(filp)) \
|
||||
params->rmStatus = NV_ERR_ILLEGAL_ACTION; \
|
||||
} \
|
||||
if (likely(params->rmStatus == NV_OK)) \
|
||||
params->rmStatus = function_name(params, filp); \
|
||||
} \
|
||||
|
||||
@@ -25,9 +25,62 @@
|
||||
#include "uvm_ats_faults.h"
|
||||
#include "uvm_migrate_pageable.h"
|
||||
|
||||
// TODO: Bug 2103669: Implement a real prefetching policy and remove or adapt
|
||||
// these experimental parameters. These are intended to help guide that policy.
|
||||
static unsigned int uvm_exp_perf_prefetch_ats_order_replayable = 0;
|
||||
module_param(uvm_exp_perf_prefetch_ats_order_replayable, uint, 0644);
|
||||
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_replayable,
|
||||
"Max order of pages (2^N) to prefetch on replayable ATS faults");
|
||||
|
||||
static unsigned int uvm_exp_perf_prefetch_ats_order_non_replayable = 0;
|
||||
module_param(uvm_exp_perf_prefetch_ats_order_non_replayable, uint, 0644);
|
||||
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_non_replayable,
|
||||
"Max order of pages (2^N) to prefetch on non-replayable ATS faults");
|
||||
|
||||
// Expand the fault region to the naturally-aligned region with order given by
|
||||
// the module parameters, clamped to the vma containing fault_addr (if any).
|
||||
// Note that this means the region contains fault_addr but may not begin at
|
||||
// fault_addr.
|
||||
static void expand_fault_region(struct mm_struct *mm,
|
||||
NvU64 fault_addr,
|
||||
uvm_fault_client_type_t client_type,
|
||||
unsigned long *start,
|
||||
unsigned long *size)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned int order;
|
||||
unsigned long outer, aligned_start, aligned_size;
|
||||
|
||||
*start = fault_addr;
|
||||
*size = PAGE_SIZE;
|
||||
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_HUB)
|
||||
order = uvm_exp_perf_prefetch_ats_order_non_replayable;
|
||||
else
|
||||
order = uvm_exp_perf_prefetch_ats_order_replayable;
|
||||
|
||||
if (order == 0)
|
||||
return;
|
||||
|
||||
vma = find_vma_intersection(mm, fault_addr, fault_addr + 1);
|
||||
if (!vma)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(order < BITS_PER_LONG - PAGE_SHIFT);
|
||||
|
||||
aligned_size = (1UL << order) * PAGE_SIZE;
|
||||
|
||||
aligned_start = fault_addr & ~(aligned_size - 1);
|
||||
|
||||
*start = max(vma->vm_start, aligned_start);
|
||||
outer = min(vma->vm_end, aligned_start + aligned_size);
|
||||
*size = outer - *start;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU64 fault_addr,
|
||||
uvm_fault_access_type_t access_type)
|
||||
uvm_fault_access_type_t access_type,
|
||||
uvm_fault_client_type_t client_type)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
@@ -66,8 +119,6 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
{
|
||||
.va_space = va_space,
|
||||
.mm = mm,
|
||||
.start = fault_addr,
|
||||
.length = PAGE_SIZE,
|
||||
.dst_id = gpu_va_space->gpu->parent->id,
|
||||
.dst_node_id = -1,
|
||||
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
|
||||
@@ -79,6 +130,8 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
|
||||
|
||||
expand_fault_region(mm, fault_addr, client_type, &uvm_migrate_args.start, &uvm_migrate_args.length);
|
||||
|
||||
// TODO: Bug 2103669: Service more than a single fault at a time
|
||||
//
|
||||
// We are trying to use migrate_vma API in the kernel (if it exists) to
|
||||
@@ -131,7 +184,10 @@ NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
|
||||
}
|
||||
else {
|
||||
// TODO: Bug 2103669: Service more than a single fault at a time
|
||||
status = uvm_ats_service_fault(gpu_va_space, current_entry->fault_address, service_access_type);
|
||||
status = uvm_ats_service_fault(gpu_va_space,
|
||||
current_entry->fault_address,
|
||||
service_access_type,
|
||||
current_entry->fault_source.client_type);
|
||||
}
|
||||
|
||||
// Do not flag prefetch faults as fatal unless something fatal happened
|
||||
@@ -155,7 +211,8 @@ NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
|
||||
status = uvm_ats_service_fault(gpu_va_space,
|
||||
current_entry->fault_address,
|
||||
UVM_FAULT_ACCESS_TYPE_READ);
|
||||
UVM_FAULT_ACCESS_TYPE_READ,
|
||||
current_entry->fault_source.client_type);
|
||||
|
||||
// If read accesses are also invalid, cancel the fault. If a
|
||||
// different error code is returned, exit
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "uvm_channel.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_test.h"
|
||||
#include "uvm_tracker.h"
|
||||
@@ -655,9 +656,11 @@ static NV_STATUS test_ce(uvm_va_space_t *va_space, bool skipTimestampTest)
|
||||
TEST_NV_CHECK_RET(test_memcpy_and_memset(gpu));
|
||||
TEST_NV_CHECK_RET(test_semaphore_reduction_inc(gpu));
|
||||
TEST_NV_CHECK_RET(test_semaphore_release(gpu));
|
||||
|
||||
if (!skipTimestampTest)
|
||||
TEST_NV_CHECK_RET(test_semaphore_timestamp(gpu));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "uvm_channel.h"
|
||||
|
||||
#include "uvm_api.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_procfs.h"
|
||||
@@ -68,6 +69,30 @@ typedef enum
|
||||
UVM_CHANNEL_UPDATE_MODE_FORCE_ALL
|
||||
} uvm_channel_update_mode_t;
|
||||
|
||||
static void channel_pool_lock_init(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_mutex_init(&pool->mutex, UVM_LOCK_ORDER_CHANNEL);
|
||||
else
|
||||
uvm_spin_lock_init(&pool->spinlock, UVM_LOCK_ORDER_CHANNEL);
|
||||
}
|
||||
|
||||
static void channel_pool_lock(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_mutex_lock(&pool->mutex);
|
||||
else
|
||||
uvm_spin_lock(&pool->spinlock);
|
||||
}
|
||||
|
||||
static void channel_pool_unlock(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_mutex_unlock(&pool->mutex);
|
||||
else
|
||||
uvm_spin_unlock(&pool->spinlock);
|
||||
}
|
||||
|
||||
// Update channel progress, completing up to max_to_complete entries
|
||||
static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
NvU32 max_to_complete,
|
||||
@@ -80,12 +105,14 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
|
||||
NvU64 completed_value = uvm_channel_update_completed_value(channel);
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
// Completed value should never exceed the queued value
|
||||
UVM_ASSERT_MSG_RELEASE(completed_value <= channel->tracking_sem.queued_value,
|
||||
"GPU %s channel %s unexpected completed_value 0x%llx > queued_value 0x%llx\n",
|
||||
channel->pool->manager->gpu->parent->name, channel->name, completed_value,
|
||||
channel->pool->manager->gpu->parent->name,
|
||||
channel->name,
|
||||
completed_value,
|
||||
channel->tracking_sem.queued_value);
|
||||
|
||||
cpu_put = channel->cpu_put;
|
||||
@@ -108,7 +135,7 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
|
||||
channel->gpu_get = gpu_get;
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
if (cpu_put >= gpu_get)
|
||||
pending_gpfifos = cpu_put - gpu_get;
|
||||
@@ -121,7 +148,8 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
NvU32 uvm_channel_update_progress(uvm_channel_t *channel)
|
||||
{
|
||||
// By default, don't complete too many entries at a time to spread the cost
|
||||
// of doing so across callers and avoid holding a spin lock for too long.
|
||||
// of doing so across callers and avoid potentially holding a spin lock for
|
||||
// too long.
|
||||
return uvm_channel_update_progress_with_max(channel, 8, UVM_CHANNEL_UPDATE_MODE_COMPLETED);
|
||||
}
|
||||
|
||||
@@ -153,70 +181,95 @@ NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager
|
||||
return pending_gpfifos;
|
||||
}
|
||||
|
||||
static bool channel_is_available(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
static NvU32 channel_get_available_gpfifo_entries(uvm_channel_t *channel)
|
||||
{
|
||||
NvU32 pending_entries;
|
||||
NvU32 available = channel->num_gpfifo_entries;
|
||||
|
||||
uvm_assert_spinlock_locked(&channel->pool->lock);
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
|
||||
// Remove sentinel entry
|
||||
available -= 1;
|
||||
|
||||
// Remove entries of ongoing pushes
|
||||
available -= channel->current_gpfifo_count;
|
||||
|
||||
// Remove pending entries
|
||||
if (channel->cpu_put >= channel->gpu_get)
|
||||
pending_entries = channel->cpu_put - channel->gpu_get;
|
||||
available -= (channel->cpu_put - channel->gpu_get);
|
||||
else
|
||||
pending_entries = channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get;
|
||||
available -= (channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get);
|
||||
|
||||
return (pending_entries + channel->current_gpfifo_count + num_gpfifo_entries < channel->num_gpfifo_entries);
|
||||
UVM_ASSERT(available < channel->num_gpfifo_entries);
|
||||
|
||||
return available;
|
||||
}
|
||||
|
||||
static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
NvU32 uvm_channel_get_available_gpfifo_entries(uvm_channel_t *channel)
|
||||
{
|
||||
NvU32 available;
|
||||
|
||||
channel_pool_lock(channel->pool);
|
||||
available = channel_get_available_gpfifo_entries(channel);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
return available;
|
||||
}
|
||||
|
||||
static bool try_claim_channel_locked(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
bool claimed = false;
|
||||
|
||||
UVM_ASSERT(num_gpfifo_entries > 0);
|
||||
UVM_ASSERT(num_gpfifo_entries < channel->num_gpfifo_entries);
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
|
||||
if (channel_is_available(channel, num_gpfifo_entries)) {
|
||||
if (channel_get_available_gpfifo_entries(channel) >= num_gpfifo_entries) {
|
||||
channel->current_gpfifo_count += num_gpfifo_entries;
|
||||
claimed = true;
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
return claimed;
|
||||
}
|
||||
|
||||
static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
bool claimed;
|
||||
|
||||
channel_pool_lock(channel->pool);
|
||||
claimed = try_claim_channel_locked(channel, num_gpfifo_entries);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
return claimed;
|
||||
}
|
||||
|
||||
static void lock_push(uvm_channel_t *channel)
|
||||
static void unlock_channel_for_push(uvm_channel_t *channel)
|
||||
{
|
||||
}
|
||||
|
||||
static void unlock_push(uvm_channel_t *channel)
|
||||
static bool is_channel_locked_for_push(uvm_channel_t *channel)
|
||||
{
|
||||
}
|
||||
|
||||
static bool trylock_push(uvm_channel_t *channel)
|
||||
{
|
||||
// For CE and proxy channels, we always return that the channel is locked,
|
||||
// which has no functional impact in the UVM channel code-flow, this is only
|
||||
// used on UVM_ASSERTs.
|
||||
return true;
|
||||
}
|
||||
|
||||
// Reserve a channel in the specified pool
|
||||
static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
|
||||
// Reserve a channel in the specified CE pool
|
||||
static NV_STATUS channel_reserve_in_ce_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
|
||||
{
|
||||
uvm_channel_t *channel;
|
||||
uvm_spin_loop_t spin;
|
||||
|
||||
UVM_ASSERT(pool);
|
||||
UVM_ASSERT(uvm_channel_pool_is_ce(pool));
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
// TODO: Bug 1764953: Prefer idle/less busy channels
|
||||
if (trylock_push(channel)) {
|
||||
if (try_claim_channel(channel, 1)) {
|
||||
*channel_out = channel;
|
||||
return NV_OK;
|
||||
}
|
||||
else {
|
||||
unlock_push(channel);
|
||||
}
|
||||
if (try_claim_channel(channel, 1)) {
|
||||
*channel_out = channel;
|
||||
return NV_OK;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -228,7 +281,6 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
|
||||
uvm_channel_update_progress(channel);
|
||||
|
||||
if (try_claim_channel(channel, 1)) {
|
||||
lock_push(channel);
|
||||
*channel_out = channel;
|
||||
|
||||
return NV_OK;
|
||||
@@ -248,8 +300,12 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
|
||||
|
||||
NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_type_t type, uvm_channel_t **channel_out)
|
||||
{
|
||||
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
|
||||
return channel_reserve_in_pool(manager->pool_to_use.default_for_type[type], channel_out);
|
||||
uvm_channel_pool_t *pool = manager->pool_to_use.default_for_type[type];
|
||||
|
||||
UVM_ASSERT(pool != NULL);
|
||||
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
|
||||
|
||||
return channel_reserve_in_ce_pool(pool, channel_out);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
|
||||
@@ -265,7 +321,7 @@ NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
|
||||
|
||||
UVM_ASSERT(pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
|
||||
|
||||
return channel_reserve_in_pool(pool, channel_out);
|
||||
return channel_reserve_in_ce_pool(pool, channel_out);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager)
|
||||
@@ -289,14 +345,14 @@ static NvU32 channel_get_available_push_info_index(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_push_info_t *push_info;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
push_info = list_first_entry_or_null(&channel->available_push_infos, uvm_push_info_t, available_list_node);
|
||||
UVM_ASSERT(push_info != NULL);
|
||||
UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
|
||||
list_del(&push_info->available_list_node);
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
return push_info - channel->push_infos;
|
||||
}
|
||||
@@ -311,6 +367,8 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
|
||||
|
||||
manager = channel->pool->manager;
|
||||
|
||||
UVM_ASSERT(is_channel_locked_for_push(channel));
|
||||
|
||||
status = uvm_pushbuffer_begin_push(manager->pushbuffer, push);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@@ -355,10 +413,6 @@ static void proxy_channel_submit_work(uvm_push_t *push, NvU32 push_size)
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_proxy(channel));
|
||||
|
||||
// nvUvmInterfacePagingChannelPushStream should not sleep, because a
|
||||
// spinlock is currently held.
|
||||
uvm_assert_spinlock_locked(&channel->pool->lock);
|
||||
|
||||
status = nvUvmInterfacePagingChannelPushStream(channel->proxy.handle, (char *) push->begin, push_size);
|
||||
|
||||
if (status != NV_OK) {
|
||||
@@ -409,7 +463,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
NvU32 cpu_put;
|
||||
NvU32 new_cpu_put;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
new_tracking_value = ++channel->tracking_sem.queued_value;
|
||||
new_payload = (NvU32)new_tracking_value;
|
||||
@@ -446,8 +500,8 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
// may notice the GPU work to be completed and hence all state tracking the
|
||||
// push must be updated before that. Notably uvm_pushbuffer_end_push() has
|
||||
// to be called first.
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
unlock_push(channel);
|
||||
unlock_channel_for_push(channel);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
// This memory barrier is borrowed from CUDA, as it supposedly fixes perf
|
||||
// issues on some systems. Comment from CUDA: "fixes throughput-related
|
||||
@@ -470,7 +524,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
|
||||
NvU32 new_cpu_put;
|
||||
uvm_gpu_t *gpu = channel->pool->manager->gpu;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
cpu_put = channel->cpu_put;
|
||||
new_cpu_put = (cpu_put + 1) % channel->num_gpfifo_entries;
|
||||
@@ -504,9 +558,10 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
|
||||
|
||||
// The moment the channel is unlocked uvm_channel_update_progress_with_max()
|
||||
// may notice the GPU work to be completed and hence all state tracking the
|
||||
// push must be updated before that.
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
unlock_push(channel);
|
||||
// push must be updated before that. Note that we do not call
|
||||
// unlock_channel_for_push() because a control GPFIFO is followed by a
|
||||
// semaphore release, where the channel is unlocked.
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
// This memory barrier is borrowed from CUDA, as it supposedly fixes perf
|
||||
// issues on some systems. Comment from CUDA: "fixes throughput-related
|
||||
@@ -563,7 +618,7 @@ NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
uvm_spin_loop_t spin;
|
||||
|
||||
if (try_claim_channel(channel, num_gpfifo_entries))
|
||||
goto out;
|
||||
return NV_OK;
|
||||
|
||||
uvm_channel_update_progress(channel);
|
||||
|
||||
@@ -574,10 +629,6 @@ NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
uvm_channel_update_progress(channel);
|
||||
}
|
||||
|
||||
out:
|
||||
if (status == NV_OK)
|
||||
lock_push(channel);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -591,12 +642,12 @@ static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *ch
|
||||
if (pending_count == 0)
|
||||
return NULL;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
if (channel->gpu_get != channel->cpu_put)
|
||||
entry = &channel->gpfifo_entries[channel->gpu_get];
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
return entry;
|
||||
}
|
||||
@@ -720,9 +771,9 @@ static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
|
||||
channel_update_progress_all(channel, UVM_CHANNEL_UPDATE_MODE_FORCE_ALL);
|
||||
}
|
||||
|
||||
uvm_procfs_destroy_entry(channel->procfs.pushes);
|
||||
uvm_procfs_destroy_entry(channel->procfs.info);
|
||||
uvm_procfs_destroy_entry(channel->procfs.dir);
|
||||
proc_remove(channel->procfs.pushes);
|
||||
proc_remove(channel->procfs.info);
|
||||
proc_remove(channel->procfs.dir);
|
||||
|
||||
uvm_kvfree(channel->push_acquire_infos);
|
||||
uvm_kvfree(channel->push_infos);
|
||||
@@ -750,18 +801,17 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel, unsigned engine
|
||||
uvm_channel_manager_t *manager = channel->pool->manager;
|
||||
uvm_gpu_t *gpu = manager->gpu;
|
||||
|
||||
if (uvm_channel_is_ce(channel)) {
|
||||
UVM_ASSERT(channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
|
||||
}
|
||||
|
||||
memset(&channel_alloc_params, 0, sizeof(channel_alloc_params));
|
||||
channel_alloc_params.numGpFifoEntries = manager->conf.num_gpfifo_entries;
|
||||
channel_alloc_params.gpFifoLoc = manager->conf.gpfifo_loc;
|
||||
channel_alloc_params.gpPutLoc = manager->conf.gpput_loc;
|
||||
channel_alloc_params.engineIndex = engine_index;
|
||||
|
||||
if (uvm_channel_is_ce(channel))
|
||||
if (uvm_channel_is_ce(channel)) {
|
||||
UVM_ASSERT(channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
|
||||
|
||||
channel_alloc_params.engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_CE;
|
||||
}
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceChannelAllocate(gpu->rm_address_space,
|
||||
&channel_alloc_params,
|
||||
@@ -893,7 +943,7 @@ NvU64 uvm_channel_tracking_semaphore_get_gpu_va_in_channel(uvm_channel_t *semaph
|
||||
return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, uvm_channel_is_proxy(access_channel));
|
||||
}
|
||||
|
||||
static NV_STATUS init_channel(uvm_channel_t *channel)
|
||||
static NV_STATUS channel_init(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_push_t push;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
@@ -977,9 +1027,10 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
|
||||
pool->engine_index = engine_index;
|
||||
pool->pool_type = pool_type;
|
||||
|
||||
uvm_spin_lock_init(&pool->lock, UVM_LOCK_ORDER_CHANNEL);
|
||||
channel_pool_lock_init(pool);
|
||||
|
||||
num_channels = channel_pool_type_num_channels(pool_type);
|
||||
UVM_ASSERT(num_channels <= UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
|
||||
pool->channels = uvm_kvmalloc_zero(sizeof(*pool->channels) * num_channels);
|
||||
if (!pool->channels)
|
||||
@@ -994,7 +1045,7 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
status = init_channel(channel);
|
||||
status = channel_init(channel);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
}
|
||||
@@ -1374,15 +1425,55 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
manager->conf.gpput_loc = string_to_buffer_location(gpput_loc_value);
|
||||
}
|
||||
|
||||
// A pool is created for each usable CE, even if it has not been selected as the
|
||||
// preferred CE for any type, because as more information is discovered (for
|
||||
// example, a pair of peer GPUs is added) we may start using the previously idle
|
||||
// channels.
|
||||
// Returns the maximum number of pools that are needed in the current
|
||||
// configuration. The implementation may choose to create a smaller number of
|
||||
// pools.
|
||||
static unsigned channel_manager_get_max_pools(uvm_channel_manager_t *manager)
|
||||
{
|
||||
unsigned num_channel_pools;
|
||||
unsigned num_used_ce = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
// Create one CE channel pool per usable CE
|
||||
num_channel_pools = num_used_ce;
|
||||
|
||||
// CE proxy channel pool.
|
||||
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu))
|
||||
num_channel_pools++;
|
||||
|
||||
return num_channel_pools;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_manager_create_ce_pools(uvm_channel_manager_t *manager, unsigned *preferred_ce)
|
||||
{
|
||||
unsigned ce;
|
||||
|
||||
// A pool is created for each usable CE, even if it has not been selected as
|
||||
// the preferred CE for any type, because as more information is discovered
|
||||
// (for example, a pair of peer GPUs is added) we may start using the
|
||||
// previously idle pools.
|
||||
for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
|
||||
NV_STATUS status;
|
||||
unsigned type;
|
||||
uvm_channel_pool_t *pool = NULL;
|
||||
|
||||
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_CE, ce, &pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
for (type = 0; type < UVM_CHANNEL_TYPE_CE_COUNT; type++) {
|
||||
if (preferred_ce[type] == ce)
|
||||
manager->pool_to_use.default_for_type[type] = pool;
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
|
||||
{
|
||||
NV_STATUS status;
|
||||
unsigned ce, type;
|
||||
unsigned num_channel_pools;
|
||||
uvm_channel_type_t type;
|
||||
unsigned max_channel_pools;
|
||||
unsigned preferred_ce[UVM_CHANNEL_TYPE_CE_COUNT];
|
||||
uvm_channel_pool_t *pool = NULL;
|
||||
|
||||
@@ -1393,36 +1484,21 @@ static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// CE channel pools
|
||||
num_channel_pools = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
max_channel_pools = channel_manager_get_max_pools(manager);
|
||||
|
||||
// CE proxy channel pool.
|
||||
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu))
|
||||
num_channel_pools++;
|
||||
|
||||
manager->channel_pools = uvm_kvmalloc_zero(sizeof(*manager->channel_pools) * num_channel_pools);
|
||||
manager->channel_pools = uvm_kvmalloc_zero(sizeof(*manager->channel_pools) * max_channel_pools);
|
||||
if (!manager->channel_pools)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
|
||||
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_CE, ce, &pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
// Assign channel types to pools
|
||||
for (type = 0; type < ARRAY_SIZE(preferred_ce); type++) {
|
||||
unsigned ce = preferred_ce[type];
|
||||
|
||||
UVM_ASSERT(test_bit(ce, manager->ce_mask));
|
||||
|
||||
manager->pool_to_use.default_for_type[type] = channel_manager_ce_pool(manager, ce);
|
||||
}
|
||||
status = channel_manager_create_ce_pools(manager, preferred_ce);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// In SR-IOV heavy, add an additional, single-channel, pool that is
|
||||
// dedicated to the MEMOPS type.
|
||||
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu)) {
|
||||
uvm_channel_type_t channel_type = uvm_channel_proxy_channel_type();
|
||||
|
||||
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_CE_PROXY, preferred_ce[channel_type], &pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@@ -1482,11 +1558,11 @@ void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager)
|
||||
if (channel_manager == NULL)
|
||||
return;
|
||||
|
||||
uvm_procfs_destroy_entry(channel_manager->procfs.pending_pushes);
|
||||
proc_remove(channel_manager->procfs.pending_pushes);
|
||||
|
||||
channel_manager_destroy_pools(channel_manager);
|
||||
|
||||
uvm_procfs_destroy_entry(channel_manager->procfs.channels_dir);
|
||||
proc_remove(channel_manager->procfs.channels_dir);
|
||||
|
||||
uvm_pushbuffer_destroy(channel_manager->pushbuffer);
|
||||
|
||||
@@ -1583,7 +1659,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
|
||||
uvm_channel_manager_t *manager = channel->pool->manager;
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Channel %s\n", channel->name);
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "completed %llu\n", uvm_channel_update_completed_value(channel));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "queued %llu\n", channel->tracking_sem.queued_value);
|
||||
@@ -1595,7 +1671,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore GPU VA 0x%llx\n", uvm_channel_tracking_semaphore_get_gpu_va(channel));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore CPU VA 0x%llx\n", (NvU64)(uintptr_t)channel->tracking_sem.semaphore.payload);
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
channel_pool_unlock(channel->pool);
|
||||
}
|
||||
|
||||
static void channel_print_push_acquires(uvm_push_acquire_info_t *push_acquire_info, struct seq_file *seq)
|
||||
@@ -1639,7 +1715,7 @@ static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_c
|
||||
|
||||
NvU64 completed_value = uvm_channel_update_completed_value(channel);
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
cpu_put = channel->cpu_put;
|
||||
|
||||
@@ -1687,7 +1763,7 @@ static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_c
|
||||
channel_print_push_acquires(push_acquire_info, seq);
|
||||
}
|
||||
}
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
channel_pool_unlock(channel->pool);
|
||||
}
|
||||
|
||||
void uvm_channel_print_pending_pushes(uvm_channel_t *channel)
|
||||
|
||||
@@ -50,6 +50,9 @@
|
||||
#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MIN 32
|
||||
#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX (1024 * 1024)
|
||||
|
||||
// Maximum number of channels per pool.
|
||||
#define UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL 8
|
||||
|
||||
// Semaphore payloads cannot advance too much between calls to
|
||||
// uvm_gpu_tracking_semaphore_update_completed_value(). In practice the jumps
|
||||
// are bound by gpfifo sizing as we have to update the completed value to
|
||||
@@ -61,6 +64,14 @@
|
||||
// uvm_channel.h includes uvm_gpu_semaphore.h.
|
||||
#define UVM_GPU_SEMAPHORE_MAX_JUMP (2 * UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX)
|
||||
|
||||
#define uvm_channel_pool_assert_locked(pool) ( \
|
||||
{ \
|
||||
if (uvm_channel_pool_is_proxy(pool)) \
|
||||
uvm_assert_mutex_locked(&(pool)->mutex); \
|
||||
else \
|
||||
uvm_assert_spinlock_locked(&(pool)->spinlock); \
|
||||
})
|
||||
|
||||
// Channel types
|
||||
typedef enum
|
||||
{
|
||||
@@ -162,8 +173,25 @@ typedef struct
|
||||
// Pool type: Refer to the uvm_channel_pool_type_t enum.
|
||||
uvm_channel_pool_type_t pool_type;
|
||||
|
||||
// Lock protecting the state of channels in the pool
|
||||
uvm_spinlock_t lock;
|
||||
// Lock protecting the state of channels in the pool.
|
||||
//
|
||||
// There are two pool lock types available: spinlock and mutex. The mutex
|
||||
// variant is required when the thread holding the pool lock must
|
||||
// sleep (ex: acquire another mutex) deeper in the call stack, either in UVM
|
||||
// or RM. For example, work submission to proxy channels in SR-IOV heavy
|
||||
// entails calling an RM API that acquires a mutex, so the proxy channel
|
||||
// pool must use the mutex variant.
|
||||
//
|
||||
// Unless the mutex is required, the spinlock is preferred. This is because,
|
||||
// other than for proxy channels, work submission takes little time and does
|
||||
// not involve any RM calls, so UVM can avoid any invocation that may result
|
||||
// on a sleep. All non-proxy channel pools use the spinlock variant, even in
|
||||
// SR-IOV heavy.
|
||||
union {
|
||||
uvm_spinlock_t spinlock;
|
||||
uvm_mutex_t mutex;
|
||||
};
|
||||
|
||||
} uvm_channel_pool_t;
|
||||
|
||||
struct uvm_channel_struct
|
||||
@@ -271,7 +299,7 @@ struct uvm_channel_manager_struct
|
||||
unsigned num_channel_pools;
|
||||
|
||||
// Mask containing the indexes of the usable Copy Engines. Each usable CE
|
||||
// has a pool associated with it, see channel_manager_ce_pool
|
||||
// has at least one pool associated with it.
|
||||
DECLARE_BITMAP(ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
struct
|
||||
@@ -309,16 +337,28 @@ struct uvm_channel_manager_struct
|
||||
// Create a channel manager for the GPU
|
||||
NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
|
||||
|
||||
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
|
||||
return pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_proxy(uvm_channel_t *channel)
|
||||
{
|
||||
UVM_ASSERT(channel->pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
return channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
|
||||
return uvm_channel_pool_is_proxy(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
|
||||
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_pool_is_proxy(pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_ce(uvm_channel_t *channel)
|
||||
{
|
||||
UVM_ASSERT(channel->pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
return (channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_is_proxy(channel);
|
||||
return uvm_channel_pool_is_ce(channel->pool);
|
||||
}
|
||||
|
||||
// Proxy channels are used to push page tree related methods, so their channel
|
||||
@@ -435,6 +475,10 @@ NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_
|
||||
const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type);
|
||||
const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type);
|
||||
|
||||
// Returns the number of available GPFIFO entries. The function internally
|
||||
// acquires the channel pool lock.
|
||||
NvU32 uvm_channel_get_available_gpfifo_entries(uvm_channel_t *channel);
|
||||
|
||||
void uvm_channel_print_pending_pushes(uvm_channel_t *channel);
|
||||
|
||||
static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
|
||||
|
||||
@@ -153,7 +153,6 @@ done:
|
||||
|
||||
static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
@@ -168,11 +167,12 @@ static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
|
||||
completed_value = uvm_channel_update_completed_value(channel);
|
||||
uvm_gpu_semaphore_set_payload(&channel->tracking_sem.semaphore, (NvU32)completed_value + 1);
|
||||
|
||||
TEST_CHECK_RET(uvm_global_get_status() == NV_OK);
|
||||
TEST_NV_CHECK_RET(uvm_global_get_status());
|
||||
uvm_channel_update_progress_all(channel);
|
||||
TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE);
|
||||
|
||||
uvm_channel_manager_destroy(gpu->channel_manager);
|
||||
|
||||
// Destruction will hit the error again, so clear one more time.
|
||||
uvm_global_reset_fatal_error();
|
||||
|
||||
@@ -743,22 +743,6 @@ NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NvU32 get_available_gpfifo_entries(uvm_channel_t *channel)
|
||||
{
|
||||
NvU32 pending_entries;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
|
||||
if (channel->cpu_put >= channel->gpu_get)
|
||||
pending_entries = channel->cpu_put - channel->gpu_get;
|
||||
else
|
||||
pending_entries = channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get;
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
|
||||
return channel->num_gpfifo_entries - pending_entries - 1;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@@ -771,9 +755,10 @@ NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
NvU64 entry;
|
||||
uvm_push_t push;
|
||||
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_manager_t *manager = gpu->channel_manager;
|
||||
gpu = manager->gpu;
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(*cpu_ptr), 0, &mem));
|
||||
cpu_ptr = uvm_rm_mem_get_cpu_va(mem);
|
||||
@@ -791,6 +776,12 @@ NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
gpu->parent->host_hal->semaphore_acquire(&push, gpu_va, 1);
|
||||
uvm_push_end(&push);
|
||||
|
||||
// Flush all completed entries from the GPFIFO ring buffer. This test
|
||||
// requires this flush because we verify (below with
|
||||
// uvm_channel_get_available_gpfifo_entries) the number of free entries
|
||||
// in the channel.
|
||||
uvm_channel_update_progress_all(channel);
|
||||
|
||||
// Populate the remaining GPFIFO entries, leaving 2 slots available.
|
||||
// 2 available entries + 1 semaphore acquire (above) + 1 spare entry to
|
||||
// indicate a terminal condition for the GPFIFO ringbuffer, therefore we
|
||||
@@ -800,7 +791,7 @@ NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
uvm_push_end(&push);
|
||||
}
|
||||
|
||||
TEST_CHECK_GOTO(get_available_gpfifo_entries(channel) == 2, error);
|
||||
TEST_CHECK_GOTO(uvm_channel_get_available_gpfifo_entries(channel) == 2, error);
|
||||
|
||||
// We should have room for the control GPFIFO and the subsequent
|
||||
// semaphore release.
|
||||
@@ -936,7 +927,7 @@ done:
|
||||
static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
|
||||
const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (params->iterations == 0 || params->num_streams == 0)
|
||||
return NV_ERR_INVALID_PARAMETER;
|
||||
@@ -951,10 +942,7 @@ static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
|
||||
params->iterations,
|
||||
params->seed,
|
||||
params->verbose);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
done:
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ static int uvm_debug_prints = UVM_IS_DEBUG() || UVM_IS_DEVELOP();
|
||||
module_param(uvm_debug_prints, int, S_IRUGO|S_IWUSR);
|
||||
MODULE_PARM_DESC(uvm_debug_prints, "Enable uvm debug prints.");
|
||||
|
||||
bool uvm_debug_prints_enabled()
|
||||
bool uvm_debug_prints_enabled(void)
|
||||
{
|
||||
return uvm_debug_prints != 0;
|
||||
}
|
||||
|
||||
@@ -347,6 +347,21 @@ typedef struct
|
||||
NvHandle user_object;
|
||||
} uvm_rm_user_object_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING,
|
||||
UVM_FD_VA_SPACE,
|
||||
UVM_FD_COUNT
|
||||
} uvm_fd_type_t;
|
||||
|
||||
// This should be large enough to fit the valid values from uvm_fd_type_t above.
|
||||
// Note we can't use order_base_2(UVM_FD_COUNT) to define this because our code
|
||||
// coverage tool fails due when the preprocessor expands that to a huge mess of
|
||||
// ternary operators.
|
||||
#define UVM_FD_TYPE_BITS 2
|
||||
#define UVM_FD_TYPE_MASK ((1UL << UVM_FD_TYPE_BITS) - 1)
|
||||
|
||||
// Macro used to compare two values for types that support less than operator.
|
||||
// It returns -1 if a < b, 1 if a > b and 0 if a == 0
|
||||
#define UVM_CMP_DEFAULT(a,b) \
|
||||
@@ -369,6 +384,10 @@ typedef struct
|
||||
// file. A NULL input returns false.
|
||||
bool uvm_file_is_nvidia_uvm(struct file *filp);
|
||||
|
||||
// Returns the type of data filp->private_data contains to and if ptr_val !=
|
||||
// NULL returns the value of the pointer.
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val);
|
||||
|
||||
// Reads the first word in the supplied struct page.
|
||||
static inline void uvm_touch_page(struct page *page)
|
||||
{
|
||||
|
||||
@@ -28,6 +28,8 @@ typedef struct uvm_global_struct uvm_global_t;
|
||||
|
||||
typedef struct uvm_gpu_struct uvm_gpu_t;
|
||||
typedef struct uvm_parent_gpu_struct uvm_parent_gpu_t;
|
||||
typedef struct uvm_gpu_chunk_struct uvm_gpu_chunk_t;
|
||||
typedef struct uvm_cpu_chunk_struct uvm_cpu_chunk_t;
|
||||
typedef struct uvm_rm_mem_struct uvm_rm_mem_t;
|
||||
typedef struct uvm_mem_struct uvm_mem_t;
|
||||
typedef struct uvm_host_hal_struct uvm_host_hal_t;
|
||||
@@ -56,6 +58,7 @@ typedef struct uvm_va_range_struct uvm_va_range_t;
|
||||
typedef struct uvm_va_block_struct uvm_va_block_t;
|
||||
typedef struct uvm_va_block_test_struct uvm_va_block_test_t;
|
||||
typedef struct uvm_va_block_wrapper_struct uvm_va_block_wrapper_t;
|
||||
typedef struct uvm_va_block_retry_struct uvm_va_block_retry_t;
|
||||
typedef struct uvm_va_space_struct uvm_va_space_t;
|
||||
typedef struct uvm_va_space_mm_struct uvm_va_space_mm_t;
|
||||
|
||||
|
||||
@@ -186,12 +186,21 @@ static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
|
||||
|
||||
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index]);
|
||||
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == parent_gpu);
|
||||
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == NULL || g_uvm_global.parent_gpus[gpu_index] == parent_gpu);
|
||||
|
||||
g_uvm_global.parent_gpus[gpu_index] = NULL;
|
||||
}
|
||||
|
||||
// Get a parent gpu by its id.
|
||||
// Returns a pointer to the parent GPU object, or NULL if not found.
|
||||
//
|
||||
// LOCKING: requires that you hold the gpu_table_lock, the global lock, or have
|
||||
// retained at least one of the child GPUs.
|
||||
static uvm_parent_gpu_t *uvm_parent_gpu_get(uvm_gpu_id_t id)
|
||||
{
|
||||
return g_uvm_global.parent_gpus[uvm_id_gpu_index(id)];
|
||||
}
|
||||
|
||||
// Get a gpu by its global id.
|
||||
// Returns a pointer to the GPU object, or NULL if not found.
|
||||
//
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -94,8 +94,6 @@ static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
|
||||
return UVM_GPU_LINK_NVLINK_3;
|
||||
case UVM_LINK_TYPE_NVLINK_4:
|
||||
return UVM_GPU_LINK_NVLINK_4;
|
||||
case UVM_LINK_TYPE_C2C:
|
||||
return UVM_GPU_LINK_C2C;
|
||||
default:
|
||||
return UVM_GPU_LINK_INVALID;
|
||||
}
|
||||
@@ -210,27 +208,12 @@ static bool gpu_supports_uvm(uvm_parent_gpu_t *parent_gpu)
|
||||
return parent_gpu->rm_info.subdeviceCount == 1;
|
||||
}
|
||||
|
||||
static bool parent_gpu_uses_canonical_form_address(uvm_parent_gpu_t *parent_gpu)
|
||||
static bool platform_uses_canonical_form_address(void)
|
||||
{
|
||||
NvU64 gpu_addr_shift;
|
||||
NvU64 cpu_addr_shift;
|
||||
|
||||
// PPC64LE doesn't use canonical form addresses.
|
||||
if (NVCPU_IS_PPC64LE)
|
||||
return false;
|
||||
|
||||
// We use big_page_size as UVM_PAGE_SIZE_64K because num_va_bits() is
|
||||
// big_page_size invariant in the MMU HAL.
|
||||
UVM_ASSERT(!parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_128K) ||
|
||||
(parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits() ==
|
||||
parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_128K)->num_va_bits()));
|
||||
|
||||
gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
|
||||
cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
|
||||
|
||||
// Refer to the comments and diagram in uvm_gpu.c:uvm_gpu_can_address().
|
||||
return gpu_addr_shift >= cpu_addr_shift;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
@@ -239,6 +222,9 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
// the canonical address form.
|
||||
NvU64 max_va_lower;
|
||||
NvU64 addr_end = addr + size - 1;
|
||||
NvU8 gpu_addr_shift;
|
||||
NvU8 cpu_addr_shift;
|
||||
NvU8 addr_shift;
|
||||
|
||||
// Watch out for calling this too early in init
|
||||
UVM_ASSERT(gpu->address_space_tree.hal);
|
||||
@@ -246,6 +232,10 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
UVM_ASSERT(addr <= addr_end);
|
||||
UVM_ASSERT(size > 0);
|
||||
|
||||
gpu_addr_shift = gpu->address_space_tree.hal->num_va_bits();
|
||||
cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
|
||||
addr_shift = gpu_addr_shift;
|
||||
|
||||
// Pascal+ GPUs are capable of accessing kernel pointers in various modes
|
||||
// by applying the same upper-bit checks that x86, ARM, and Power
|
||||
// processors do. x86 and ARM use canonical form addresses. For ARM, even
|
||||
@@ -255,13 +245,15 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
// mapped (or addressed) by the GPU/CPU when the CPU uses canonical form.
|
||||
// (C) regions are only accessible by the CPU. Similarly, (G) regions
|
||||
// are only accessible by the GPU. (X) regions are not addressible.
|
||||
// Note that we only consider (V) regions, i.e., address ranges that are
|
||||
// addressable by both, the CPU and GPU.
|
||||
//
|
||||
// GPU MAX VA < CPU MAX VA GPU MAX VA >= CPU MAX VA
|
||||
// 0xF..F +----------------+ 0xF..F +----------------+
|
||||
// |CCCCCCCCCCCCCCCC| |VVVVVVVVVVVVVVVV|
|
||||
// |CCCCCCCCCCCCCCCC| |VVVVVVVVVVVVVVVV|
|
||||
// |CCCCCCCCCCCCCCCC| |VVVVVVVVVVVVVVVV|
|
||||
// |CCCCCCCCCCCCCCCC| CPU MIN UPPER VA|----------------|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// GPU MIN UPPER VA|----------------| CPU MIN UPPER VA|----------------|
|
||||
// |CCCCCCCCCCCCCCCC| |GGGGGGGGGGGGGGGG|
|
||||
// |CCCCCCCCCCCCCCCC| |GGGGGGGGGGGGGGGG|
|
||||
// CPU MIN UPPER VA|----------------| GPU MIN UPPER VA|----------------|
|
||||
@@ -270,32 +262,83 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
// CPU MAX LOWER VA|----------------| GPU MAX LOWER VA|----------------|
|
||||
// |CCCCCCCCCCCCCCCC| |GGGGGGGGGGGGGGGG|
|
||||
// |CCCCCCCCCCCCCCCC| |GGGGGGGGGGGGGGGG|
|
||||
// GPU MAX VA|----------------| CPU MAX LOWER VA|----------------|
|
||||
// GPU MAX LOWER VA|----------------| CPU MAX LOWER VA|----------------|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// 0 +----------------+ 0 +----------------+
|
||||
|
||||
if (parent_gpu_uses_canonical_form_address(gpu->parent)) {
|
||||
NvU64 min_va_upper = (NvU64)((NvS64)(1ULL << 63) >> (64 - gpu->address_space_tree.hal->num_va_bits()));
|
||||
max_va_lower = 1ULL << (gpu->address_space_tree.hal->num_va_bits() - 1);
|
||||
// On canonical form address platforms and Pascal+ GPUs.
|
||||
if (platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
|
||||
NvU64 min_va_upper;
|
||||
|
||||
// On x86, when cpu_addr_shift > gpu_addr_shift, it means the CPU uses
|
||||
// 5-level paging and the GPU is pre-Hopper. On Pascal-Ada GPUs (49b
|
||||
// wide VA) we set addr_shift to match a 4-level paging x86 (48b wide).
|
||||
// See more details on uvm_parent_gpu_canonical_address(..);
|
||||
if (cpu_addr_shift > gpu_addr_shift)
|
||||
addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
|
||||
else if (gpu_addr_shift == 57)
|
||||
addr_shift = gpu_addr_shift;
|
||||
else
|
||||
addr_shift = cpu_addr_shift;
|
||||
|
||||
min_va_upper = (NvU64)((NvS64)(1ULL << 63) >> (64 - addr_shift));
|
||||
max_va_lower = 1ULL << (addr_shift - 1);
|
||||
return (addr_end < max_va_lower) || (addr >= min_va_upper);
|
||||
}
|
||||
else {
|
||||
max_va_lower = 1ULL << gpu->address_space_tree.hal->num_va_bits();
|
||||
max_va_lower = 1ULL << addr_shift;
|
||||
return addr_end < max_va_lower;
|
||||
}
|
||||
}
|
||||
|
||||
// The internal UVM VAS does not use canonical form addresses.
|
||||
bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
{
|
||||
NvU64 addr_end = addr + size - 1;
|
||||
NvU64 max_gpu_va;
|
||||
|
||||
// Watch out for calling this too early in init
|
||||
UVM_ASSERT(gpu->address_space_tree.hal);
|
||||
UVM_ASSERT(gpu->address_space_tree.hal->num_va_bits() < 64);
|
||||
UVM_ASSERT(addr <= addr_end);
|
||||
UVM_ASSERT(size > 0);
|
||||
|
||||
max_gpu_va = 1ULL << gpu->address_space_tree.hal->num_va_bits();
|
||||
return addr_end < max_gpu_va;
|
||||
}
|
||||
|
||||
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)
|
||||
{
|
||||
NvU32 gpu_va_bits;
|
||||
NvU32 shift;
|
||||
NvU8 gpu_addr_shift;
|
||||
NvU8 cpu_addr_shift;
|
||||
NvU8 addr_shift;
|
||||
NvU64 input_addr = addr;
|
||||
|
||||
if (parent_gpu_uses_canonical_form_address(parent_gpu)) {
|
||||
gpu_va_bits = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
|
||||
shift = 64 - gpu_va_bits;
|
||||
addr = (NvU64)((NvS64)(addr << shift) >> shift);
|
||||
if (platform_uses_canonical_form_address()) {
|
||||
// When the CPU VA width is larger than GPU's, it means that:
|
||||
// On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
|
||||
// On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
|
||||
// We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
|
||||
// behavior of CPUs with smaller (than GPU) VA widths.
|
||||
gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
|
||||
cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
|
||||
|
||||
if (cpu_addr_shift > gpu_addr_shift)
|
||||
addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
|
||||
else if (gpu_addr_shift == 57)
|
||||
addr_shift = gpu_addr_shift;
|
||||
else
|
||||
addr_shift = cpu_addr_shift;
|
||||
|
||||
addr = (NvU64)((NvS64)(addr << (64 - addr_shift)) >> (64 - addr_shift));
|
||||
|
||||
// This protection acts on when the address is not covered by the GPU's
|
||||
// OOR_ADDR_CHECK. This can only happen when OOR_ADDR_CHECK is in
|
||||
// permissive (NO_CHECK) mode.
|
||||
if ((addr << (64 - gpu_addr_shift)) != (input_addr << (64 - gpu_addr_shift)))
|
||||
return input_addr;
|
||||
}
|
||||
|
||||
return addr;
|
||||
@@ -351,7 +394,7 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
|
||||
|
||||
static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 7);
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 6);
|
||||
|
||||
switch (link_type) {
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_INVALID);
|
||||
@@ -360,7 +403,6 @@ static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_2);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_3);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_4);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_C2C);
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
}
|
||||
@@ -694,7 +736,7 @@ static NV_STATUS init_parent_procfs_dir(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
static void deinit_parent_procfs_dir(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(parent_gpu->procfs.dir);
|
||||
proc_remove(parent_gpu->procfs.dir);
|
||||
}
|
||||
|
||||
static NV_STATUS init_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
|
||||
@@ -722,8 +764,8 @@ static NV_STATUS init_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
static void deinit_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(parent_gpu->procfs.access_counters_file);
|
||||
uvm_procfs_destroy_entry(parent_gpu->procfs.fault_stats_file);
|
||||
proc_remove(parent_gpu->procfs.access_counters_file);
|
||||
proc_remove(parent_gpu->procfs.fault_stats_file);
|
||||
}
|
||||
|
||||
static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
@@ -774,9 +816,9 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
// The kernel waits on readers to finish before returning from those calls
|
||||
static void deinit_procfs_dirs(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(gpu->procfs.dir_peers);
|
||||
uvm_procfs_destroy_entry(gpu->procfs.dir_symlink);
|
||||
uvm_procfs_destroy_entry(gpu->procfs.dir);
|
||||
proc_remove(gpu->procfs.dir_peers);
|
||||
proc_remove(gpu->procfs.dir_symlink);
|
||||
proc_remove(gpu->procfs.dir);
|
||||
}
|
||||
|
||||
static NV_STATUS init_procfs_files(uvm_gpu_t *gpu)
|
||||
@@ -790,15 +832,15 @@ static NV_STATUS init_procfs_files(uvm_gpu_t *gpu)
|
||||
|
||||
static void deinit_procfs_files(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(gpu->procfs.info_file);
|
||||
proc_remove(gpu->procfs.info_file);
|
||||
}
|
||||
|
||||
static void deinit_procfs_peer_cap_files(uvm_gpu_peer_t *peer_caps)
|
||||
{
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_symlink_file[0]);
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_symlink_file[1]);
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_file[0]);
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_file[1]);
|
||||
proc_remove(peer_caps->procfs.peer_symlink_file[0]);
|
||||
proc_remove(peer_caps->procfs.peer_symlink_file[1]);
|
||||
proc_remove(peer_caps->procfs.peer_file[0]);
|
||||
proc_remove(peer_caps->procfs.peer_file[1]);
|
||||
}
|
||||
|
||||
static NV_STATUS init_semaphore_pool(uvm_gpu_t *gpu)
|
||||
@@ -866,6 +908,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_parent_gpu_t **parent_gpu_out)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
NV_STATUS status;
|
||||
|
||||
parent_gpu = uvm_kvmalloc_zero(sizeof(*parent_gpu));
|
||||
if (!parent_gpu)
|
||||
@@ -882,11 +925,14 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
|
||||
uvm_rb_tree_init(&parent_gpu->tsg_table);
|
||||
|
||||
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
|
||||
status = errno_to_nv_status(nv_kthread_q_init(&parent_gpu->lazy_free_q, "vidmem lazy free"));
|
||||
|
||||
nv_kref_init(&parent_gpu->gpu_kref);
|
||||
|
||||
*parent_gpu_out = parent_gpu;
|
||||
|
||||
return NV_OK;
|
||||
return status;
|
||||
}
|
||||
|
||||
// Allocates a uvm_gpu_t struct and initializes the basic fields and leaves all
|
||||
@@ -1539,6 +1585,8 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)
|
||||
UVM_ASSERT(parent_gpu->num_retained_gpus == 0);
|
||||
UVM_ASSERT(bitmap_empty(parent_gpu->valid_gpus, UVM_ID_MAX_SUB_PROCESSORS));
|
||||
|
||||
nv_kthread_q_stop(&parent_gpu->lazy_free_q);
|
||||
|
||||
for (sub_processor_index = 0; sub_processor_index < UVM_ID_MAX_SUB_PROCESSORS; sub_processor_index++)
|
||||
UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);
|
||||
|
||||
@@ -2165,12 +2213,9 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(p2p_caps_params->p2pLink != UVM_LINK_TYPE_C2C);
|
||||
|
||||
// check for peer-to-peer compatibility (PCI-E or NvLink).
|
||||
peer_caps->link_type = get_gpu_link_type(p2p_caps_params->p2pLink);
|
||||
if (peer_caps->link_type == UVM_GPU_LINK_INVALID
|
||||
|| peer_caps->link_type == UVM_GPU_LINK_C2C
|
||||
)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
@@ -2553,7 +2598,10 @@ uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu
|
||||
uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu)
|
||||
{
|
||||
// See comment in page_tree_set_location
|
||||
return uvm_gpu_is_virt_mode_sriov_heavy(gpu)? UVM_APERTURE_VID : UVM_APERTURE_DEFAULT;
|
||||
if (uvm_gpu_is_virt_mode_sriov_heavy(gpu))
|
||||
return UVM_APERTURE_VID;
|
||||
|
||||
return UVM_APERTURE_DEFAULT;
|
||||
}
|
||||
|
||||
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr)
|
||||
@@ -2964,9 +3012,6 @@ NV_STATUS uvm_gpu_fault_entry_to_va_space(uvm_gpu_t *gpu,
|
||||
exit_unlock:
|
||||
uvm_spin_unlock(&gpu->parent->instance_ptr_table_lock);
|
||||
|
||||
if (status == NV_OK)
|
||||
UVM_ASSERT(uvm_va_space_initialized(*out_va_space) == NV_OK);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -3005,9 +3050,6 @@ NV_STATUS uvm_gpu_access_counter_entry_to_va_space(uvm_gpu_t *gpu,
|
||||
exit_unlock:
|
||||
uvm_spin_unlock(&gpu->parent->instance_ptr_table_lock);
|
||||
|
||||
if (status == NV_OK)
|
||||
UVM_ASSERT(uvm_va_space_initialized(*out_va_space) == NV_OK);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -3080,41 +3122,41 @@ void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_add
|
||||
atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
|
||||
UVM_ASSERT(PAGE_ALIGNED(size));
|
||||
|
||||
dma_addr = dma_map_page(&gpu->parent->pci_dev->dev, page, 0, size, DMA_BIDIRECTIONAL);
|
||||
if (dma_mapping_error(&gpu->parent->pci_dev->dev, dma_addr))
|
||||
dma_addr = dma_map_page(&parent_gpu->pci_dev->dev, page, 0, size, DMA_BIDIRECTIONAL);
|
||||
if (dma_mapping_error(&parent_gpu->pci_dev->dev, dma_addr))
|
||||
return NV_ERR_OPERATING_SYSTEM;
|
||||
|
||||
if (dma_addr < gpu->parent->dma_addressable_start ||
|
||||
dma_addr + size - 1 > gpu->parent->dma_addressable_limit) {
|
||||
dma_unmap_page(&gpu->parent->pci_dev->dev, dma_addr, size, DMA_BIDIRECTIONAL);
|
||||
if (dma_addr < parent_gpu->dma_addressable_start ||
|
||||
dma_addr + size - 1 > parent_gpu->dma_addressable_limit) {
|
||||
dma_unmap_page(&parent_gpu->pci_dev->dev, dma_addr, size, DMA_BIDIRECTIONAL);
|
||||
UVM_ERR_PRINT_RL("PCI mapped range [0x%llx, 0x%llx) not in the addressable range [0x%llx, 0x%llx), GPU %s\n",
|
||||
dma_addr,
|
||||
dma_addr + (NvU64)size,
|
||||
gpu->parent->dma_addressable_start,
|
||||
gpu->parent->dma_addressable_limit + 1,
|
||||
uvm_gpu_name(gpu));
|
||||
parent_gpu->dma_addressable_start,
|
||||
parent_gpu->dma_addressable_limit + 1,
|
||||
parent_gpu->name);
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
atomic64_add(size, &gpu->parent->mapped_cpu_pages_size);
|
||||
*dma_address_out = dma_addr_to_gpu_addr(gpu->parent, dma_addr);
|
||||
atomic64_add(size, &parent_gpu->mapped_cpu_pages_size);
|
||||
*dma_address_out = dma_addr_to_gpu_addr(parent_gpu, dma_addr);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_gpu_t *gpu, NvU64 dma_address, size_t size)
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size)
|
||||
{
|
||||
UVM_ASSERT(PAGE_ALIGNED(size));
|
||||
|
||||
dma_address = gpu_addr_to_dma_addr(gpu->parent, dma_address);
|
||||
dma_unmap_page(&gpu->parent->pci_dev->dev, dma_address, size, DMA_BIDIRECTIONAL);
|
||||
atomic64_sub(size, &gpu->parent->mapped_cpu_pages_size);
|
||||
dma_address = gpu_addr_to_dma_addr(parent_gpu, dma_address);
|
||||
dma_unmap_page(&parent_gpu->pci_dev->dev, dma_address, size, DMA_BIDIRECTIONAL);
|
||||
atomic64_sub(size, &parent_gpu->mapped_cpu_pages_size);
|
||||
}
|
||||
|
||||
// This function implements the UvmRegisterGpu API call, as described in uvm.h.
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
#include "uvm_va_block_types.h"
|
||||
#include "uvm_perf_module.h"
|
||||
#include "uvm_rb_tree.h"
|
||||
#include "uvm_perf_prefetch.h"
|
||||
#include "nv-kthread-q.h"
|
||||
|
||||
// Buffer length to store uvm gpu id, RM device name and gpu uuid.
|
||||
@@ -159,6 +160,12 @@ struct uvm_service_block_context_struct
|
||||
|
||||
// State used by the VA block routines called by the servicing routine
|
||||
uvm_va_block_context_t block_context;
|
||||
|
||||
// Prefetch state hint
|
||||
uvm_perf_prefetch_hint_t prefetch_hint;
|
||||
|
||||
// Prefetch temporary state.
|
||||
uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
|
||||
};
|
||||
|
||||
struct uvm_fault_service_batch_context_struct
|
||||
@@ -374,6 +381,17 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
// determine at fetch time that all the access counter notifications in the
|
||||
// batch report the same instance_ptr
|
||||
bool is_single_instance_ptr;
|
||||
|
||||
// Scratch space, used to generate artificial physically addressed notifications.
|
||||
// Virtual address notifications are always aligned to 64k. This means up to 16
|
||||
// different physical locations could have been accessed to trigger one notification.
|
||||
// The sub-granularity mask can correspond to any of them.
|
||||
struct
|
||||
{
|
||||
uvm_processor_id_t resident_processors[16];
|
||||
uvm_gpu_phys_address_t phys_addresses[16];
|
||||
uvm_access_counter_buffer_entry_t phys_entry;
|
||||
} scratch;
|
||||
} virt;
|
||||
|
||||
struct
|
||||
@@ -506,7 +524,6 @@ typedef enum
|
||||
UVM_GPU_LINK_NVLINK_2,
|
||||
UVM_GPU_LINK_NVLINK_3,
|
||||
UVM_GPU_LINK_NVLINK_4,
|
||||
UVM_GPU_LINK_C2C,
|
||||
UVM_GPU_LINK_MAX
|
||||
} uvm_gpu_link_type_t;
|
||||
|
||||
@@ -940,6 +957,10 @@ struct uvm_parent_gpu_struct
|
||||
// NUMA info, mainly for ATS
|
||||
uvm_numa_info_t numa_info;
|
||||
|
||||
// PMM lazy free processing queue.
|
||||
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
|
||||
nv_kthread_q_t lazy_free_q;
|
||||
|
||||
// Access counter buffer info. This is only valid if supports_access_counters is set to true
|
||||
uvm_access_counter_buffer_info_t access_counter_buffer_info;
|
||||
|
||||
@@ -1103,7 +1124,8 @@ struct uvm_gpu_peer_struct
|
||||
// deletion.
|
||||
NvHandle p2p_handle;
|
||||
|
||||
struct {
|
||||
struct
|
||||
{
|
||||
struct proc_dir_entry *peer_file[2];
|
||||
struct proc_dir_entry *peer_symlink_file[2];
|
||||
|
||||
@@ -1309,19 +1331,19 @@ NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
|
||||
//
|
||||
// Returns the physical address of the pages that can be used to access them on
|
||||
// the GPU.
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
|
||||
// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_gpu_t *gpu, NvU64 dma_address, size_t size);
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
|
||||
|
||||
static NV_STATUS uvm_gpu_map_cpu_page(uvm_gpu_t *gpu, struct page *page, NvU64 *dma_address_out)
|
||||
static NV_STATUS uvm_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
|
||||
{
|
||||
return uvm_gpu_map_cpu_pages(gpu, page, PAGE_SIZE, dma_address_out);
|
||||
return uvm_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
|
||||
}
|
||||
|
||||
static void uvm_gpu_unmap_cpu_page(uvm_gpu_t *gpu, NvU64 dma_address)
|
||||
static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
|
||||
{
|
||||
uvm_gpu_unmap_cpu_pages(gpu, dma_address, PAGE_SIZE);
|
||||
uvm_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
|
||||
}
|
||||
|
||||
// Allocate and map a page of system DMA memory on the GPU for physical access
|
||||
@@ -1347,6 +1369,16 @@ void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_add
|
||||
// The GPU must be initialized before calling this function.
|
||||
bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
|
||||
// Returns whether the given range is within the GPU's addressable VA ranges in
|
||||
// the internal GPU VA "kernel" address space, which is a linear address space.
|
||||
// Therefore, the input 'addr' must not be in canonical form, even platforms
|
||||
// that use to the canonical form addresses, i.e., ARM64, and x86.
|
||||
// Warning: This only checks whether the GPU's MMU can support the given
|
||||
// address. Some HW units on that GPU might only support a smaller range.
|
||||
//
|
||||
// The GPU must be initialized before calling this function.
|
||||
bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
|
||||
// Returns addr's canonical form for host systems that use canonical form
|
||||
// addresses.
|
||||
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2021 NVIDIA Corporation
|
||||
Copyright (c) 2017-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -41,6 +41,10 @@
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX ((1 << 16) - 1)
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT 256
|
||||
|
||||
#define UVM_ACCESS_COUNTER_ACTION_NOTIFY 0x1
|
||||
#define UVM_ACCESS_COUNTER_ACTION_CLEAR 0x2
|
||||
#define UVM_ACCESS_COUNTER_ON_MANAGED 0x4
|
||||
|
||||
// Each page in a tracked physical range may belong to a different VA Block. We
|
||||
// preallocate an array of reverse map translations. However, access counter
|
||||
// granularity can be set to up to 16G, which would require an array too large
|
||||
@@ -934,25 +938,6 @@ static void preprocess_virt_notifications(uvm_gpu_t *gpu,
|
||||
translate_virt_notifications_instance_ptrs(gpu, batch_context);
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
// TODO: Bug 1990466: Service virtual notifications. Entries with NULL
|
||||
// va_space are simply dropped.
|
||||
if (uvm_enable_builtin_tests) {
|
||||
NvU32 i;
|
||||
|
||||
preprocess_virt_notifications(gpu, batch_context);
|
||||
|
||||
for (i = 0; i < batch_context->virt.num_notifications; ++i) {
|
||||
const bool on_managed = false;
|
||||
uvm_tools_broadcast_access_counter(gpu, batch_context->virt.notifications[i], on_managed);
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// GPA notifications provide a physical address and an aperture. Sort
|
||||
// accesses by aperture to try to coalesce operations on the same target
|
||||
// processor.
|
||||
@@ -1046,9 +1031,19 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
uvm_page_mask_set(&service_context->thrashing_pin_mask, page_index);
|
||||
}
|
||||
|
||||
// If the underlying VMA is gone, skip HMM migrations.
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
status = uvm_hmm_find_vma(&service_context->block_context, address);
|
||||
if (status == NV_ERR_INVALID_ADDRESS)
|
||||
continue;
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
service_context->block_context.policy = uvm_va_policy_get(va_block, address);
|
||||
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&service_context->block_context,
|
||||
page_index,
|
||||
processor,
|
||||
uvm_fault_access_type_mask_bit(UVM_FAULT_ACCESS_TYPE_PREFETCH),
|
||||
@@ -1158,7 +1153,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
bool *clear_counter)
|
||||
unsigned *out_flags)
|
||||
{
|
||||
size_t index;
|
||||
uvm_va_block_t *va_block = reverse_mappings[0].va_block;
|
||||
@@ -1168,7 +1163,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
const uvm_processor_id_t processor = current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC?
|
||||
gpu->id: UVM_ID_CPU;
|
||||
|
||||
*clear_counter = false;
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
UVM_ASSERT(num_reverse_mappings > 0);
|
||||
|
||||
@@ -1217,7 +1212,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
if (status == NV_OK)
|
||||
*clear_counter = true;
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
}
|
||||
|
||||
done:
|
||||
@@ -1238,25 +1233,26 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
bool *clear_counter)
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t index;
|
||||
|
||||
*clear_counter = false;
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
for (index = 0; index < num_reverse_mappings; ++index) {
|
||||
bool clear_counter_local = false;
|
||||
unsigned out_flags_local = 0;
|
||||
status = service_phys_single_va_block(gpu,
|
||||
batch_context,
|
||||
current_entry,
|
||||
reverse_mappings + index,
|
||||
1,
|
||||
&clear_counter_local);
|
||||
&out_flags_local);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
*clear_counter = *clear_counter || clear_counter_local;
|
||||
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
|
||||
*out_flags |= out_flags_local;
|
||||
}
|
||||
|
||||
// In the case of failure, drop the refcounts for the remaining reverse mappings
|
||||
@@ -1267,18 +1263,13 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
// Iterate over all regions set in the given sub_granularity mask
|
||||
#define for_each_sub_granularity_region(region_start, region_end, sub_granularity, config) \
|
||||
for ((region_start) = find_first_bit(&(sub_granularity), (config)->sub_granularity_regions_per_translation), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), \
|
||||
(config)->sub_granularity_regions_per_translation, \
|
||||
(region_start) + 1); \
|
||||
(region_start) < config->sub_granularity_regions_per_translation; \
|
||||
(region_start) = find_next_bit(&(sub_granularity), \
|
||||
(config)->sub_granularity_regions_per_translation, \
|
||||
(region_end) + 1), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), \
|
||||
(config)->sub_granularity_regions_per_translation, \
|
||||
(region_start) + 1))
|
||||
#define for_each_sub_granularity_region(region_start, region_end, sub_granularity, num_regions) \
|
||||
for ((region_start) = find_first_bit(&(sub_granularity), (num_regions)), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1); \
|
||||
(region_start) < (num_regions); \
|
||||
(region_start) = find_next_bit(&(sub_granularity), (num_regions), (region_end) + 1), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1))
|
||||
|
||||
|
||||
static bool are_reverse_mappings_on_single_block(const uvm_reverse_map_t *reverse_mappings, size_t num_reverse_mappings)
|
||||
{
|
||||
@@ -1309,7 +1300,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
NvU64 address,
|
||||
unsigned long sub_granularity,
|
||||
size_t *num_reverse_mappings,
|
||||
bool *clear_counter)
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 region_start, region_end;
|
||||
@@ -1318,7 +1309,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
|
||||
// Get the reverse_map translations for all the regions set in the
|
||||
// sub_granularity field of the counter.
|
||||
for_each_sub_granularity_region(region_start, region_end, sub_granularity, config) {
|
||||
for_each_sub_granularity_region(region_start, region_end, sub_granularity, config->sub_granularity_regions_per_translation) {
|
||||
NvU64 local_address = address + region_start * config->sub_granularity_region_size;
|
||||
NvU32 local_translation_size = (region_end - region_start) * config->sub_granularity_region_size;
|
||||
uvm_reverse_map_t *local_reverse_mappings = batch_context->phys.translations + *num_reverse_mappings;
|
||||
@@ -1350,7 +1341,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
current_entry,
|
||||
batch_context->phys.translations,
|
||||
*num_reverse_mappings,
|
||||
clear_counter);
|
||||
out_flags);
|
||||
}
|
||||
else {
|
||||
status = service_phys_va_blocks(gpu,
|
||||
@@ -1358,7 +1349,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
current_entry,
|
||||
batch_context->phys.translations,
|
||||
*num_reverse_mappings,
|
||||
clear_counter);
|
||||
out_flags);
|
||||
}
|
||||
|
||||
return status;
|
||||
@@ -1366,7 +1357,8 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
|
||||
static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry)
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NvU64 address;
|
||||
NvU64 translation_index;
|
||||
@@ -1377,7 +1369,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
size_t total_reverse_mappings = 0;
|
||||
uvm_gpu_t *resident_gpu = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
bool clear_counter = false;
|
||||
unsigned flags = 0;
|
||||
|
||||
address = current_entry->address.address;
|
||||
UVM_ASSERT(address % config->translation_size == 0);
|
||||
@@ -1405,7 +1397,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
|
||||
for (translation_index = 0; translation_index < config->translations_per_counter; ++translation_index) {
|
||||
size_t num_reverse_mappings;
|
||||
bool clear_counter_local = false;
|
||||
unsigned out_flags_local = 0;
|
||||
status = service_phys_notification_translation(gpu,
|
||||
resident_gpu,
|
||||
batch_context,
|
||||
@@ -1414,9 +1406,11 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
address,
|
||||
sub_granularity,
|
||||
&num_reverse_mappings,
|
||||
&clear_counter_local);
|
||||
&out_flags_local);
|
||||
total_reverse_mappings += num_reverse_mappings;
|
||||
clear_counter = clear_counter || clear_counter_local;
|
||||
|
||||
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
|
||||
flags |= out_flags_local;
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
@@ -1425,17 +1419,14 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
sub_granularity = sub_granularity >> config->sub_granularity_regions_per_translation;
|
||||
}
|
||||
|
||||
// TODO: Bug 1990466: Here we already have virtual addresses and
|
||||
// address spaces. Merge virtual and physical notification handling
|
||||
|
||||
// Currently we only report events for our tests, not for tools
|
||||
if (uvm_enable_builtin_tests) {
|
||||
const bool on_managed = total_reverse_mappings != 0;
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, on_managed);
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_NOTIFY;
|
||||
*out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_ON_MANAGED : 0);
|
||||
}
|
||||
|
||||
if (status == NV_OK && clear_counter)
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -1450,11 +1441,18 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
|
||||
for (i = 0; i < batch_context->phys.num_notifications; ++i) {
|
||||
NV_STATUS status;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->phys.notifications[i];
|
||||
unsigned flags = 0;
|
||||
|
||||
if (!UVM_ID_IS_VALID(current_entry->physical_info.resident_id))
|
||||
continue;
|
||||
|
||||
status = service_phys_notification(gpu, batch_context, current_entry);
|
||||
status = service_phys_notification(gpu, batch_context, current_entry, &flags);
|
||||
if (flags & UVM_ACCESS_COUNTER_ACTION_NOTIFY)
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
|
||||
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
@@ -1462,6 +1460,191 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static int cmp_sort_gpu_phys_addr(const void *_a, const void *_b)
|
||||
{
|
||||
return uvm_gpu_phys_addr_cmp(*(uvm_gpu_phys_address_t*)_a,
|
||||
*(uvm_gpu_phys_address_t*)_b);
|
||||
}
|
||||
|
||||
static bool gpu_phys_same_region(uvm_gpu_phys_address_t a, uvm_gpu_phys_address_t b, NvU64 granularity)
|
||||
{
|
||||
if (a.aperture != b.aperture)
|
||||
return false;
|
||||
|
||||
UVM_ASSERT(is_power_of_2(granularity));
|
||||
|
||||
return UVM_ALIGN_DOWN(a.address, granularity) == UVM_ALIGN_DOWN(b.address, granularity);
|
||||
}
|
||||
|
||||
static bool phys_address_in_accessed_sub_region(uvm_gpu_phys_address_t address,
|
||||
NvU64 region_size,
|
||||
NvU64 sub_region_size,
|
||||
NvU32 accessed_mask)
|
||||
{
|
||||
const unsigned accessed_index = (address.address % region_size) / sub_region_size;
|
||||
|
||||
// accessed_mask is only filled for tracking granularities larger than 64K
|
||||
if (region_size == UVM_PAGE_SIZE_64K)
|
||||
return true;
|
||||
|
||||
UVM_ASSERT(accessed_index < 32);
|
||||
return ((1 << accessed_index) & accessed_mask) != 0;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notification(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU64 notification_size;
|
||||
NvU64 address;
|
||||
uvm_processor_id_t *resident_processors = batch_context->virt.scratch.resident_processors;
|
||||
uvm_gpu_phys_address_t *phys_addresses = batch_context->virt.scratch.phys_addresses;
|
||||
int num_addresses = 0;
|
||||
int i;
|
||||
|
||||
// Virtual address notifications are always 64K aligned
|
||||
NvU64 region_start = current_entry->address.address;
|
||||
NvU64 region_end = current_entry->address.address + UVM_PAGE_SIZE_64K;
|
||||
|
||||
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
uvm_access_counter_type_t counter_type = current_entry->counter_type;
|
||||
|
||||
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters, counter_type);
|
||||
|
||||
uvm_va_space_t *va_space = current_entry->virtual_info.va_space;
|
||||
|
||||
UVM_ASSERT(counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC);
|
||||
|
||||
// Entries with NULL va_space are simply dropped.
|
||||
if (!va_space)
|
||||
return NV_OK;
|
||||
|
||||
status = config_granularity_to_bytes(config->rm.granularity, ¬ification_size);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// Collect physical locations that could have been touched
|
||||
// in the reported 64K VA region. The notification mask can
|
||||
// correspond to any of them.
|
||||
uvm_va_space_down_read(va_space);
|
||||
for (address = region_start; address < region_end;) {
|
||||
uvm_va_block_t *va_block;
|
||||
|
||||
NV_STATUS local_status = uvm_va_block_find(va_space, address, &va_block);
|
||||
if (local_status == NV_ERR_INVALID_ADDRESS || local_status == NV_ERR_OBJECT_NOT_FOUND) {
|
||||
address += PAGE_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
while (address < va_block->end && address < region_end) {
|
||||
const unsigned page_index = uvm_va_block_cpu_page_index(va_block, address);
|
||||
|
||||
// UVM va_block always maps the closest resident location to processor
|
||||
const uvm_processor_id_t res_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
|
||||
|
||||
// Add physical location if it's valid and not local vidmem
|
||||
if (UVM_ID_IS_VALID(res_id) && !uvm_id_equal(res_id, gpu->id)) {
|
||||
uvm_gpu_phys_address_t phys_address = uvm_va_block_res_phys_page_address(va_block, page_index, res_id, gpu);
|
||||
if (phys_address_in_accessed_sub_region(phys_address,
|
||||
notification_size,
|
||||
config->sub_granularity_region_size,
|
||||
current_entry->sub_granularity)) {
|
||||
resident_processors[num_addresses] = res_id;
|
||||
phys_addresses[num_addresses] = phys_address;
|
||||
++num_addresses;
|
||||
}
|
||||
else {
|
||||
UVM_DBG_PRINT_RL("Skipping phys address %llx:%s, because it couldn't have been accessed in mask %x",
|
||||
phys_address.address,
|
||||
uvm_aperture_string(phys_address.aperture),
|
||||
current_entry->sub_granularity);
|
||||
}
|
||||
}
|
||||
|
||||
address += PAGE_SIZE;
|
||||
}
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
}
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
// The addresses need to be sorted to aid coalescing.
|
||||
sort(phys_addresses,
|
||||
num_addresses,
|
||||
sizeof(*phys_addresses),
|
||||
cmp_sort_gpu_phys_addr,
|
||||
NULL);
|
||||
|
||||
for (i = 0; i < num_addresses; ++i) {
|
||||
uvm_access_counter_buffer_entry_t *fake_entry = &batch_context->virt.scratch.phys_entry;
|
||||
|
||||
// Skip the current pointer if the physical region was already handled
|
||||
if (i > 0 && gpu_phys_same_region(phys_addresses[i - 1], phys_addresses[i], notification_size)) {
|
||||
UVM_ASSERT(uvm_id_equal(resident_processors[i - 1], resident_processors[i]));
|
||||
continue;
|
||||
}
|
||||
UVM_DBG_PRINT_RL("Faking MIMC address[%i/%i]: %llx (granularity mask: %llx) in aperture %s on device %s\n",
|
||||
i,
|
||||
num_addresses,
|
||||
phys_addresses[i].address,
|
||||
notification_size - 1,
|
||||
uvm_aperture_string(phys_addresses[i].aperture),
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
// Construct a fake phys addr AC entry
|
||||
fake_entry->counter_type = current_entry->counter_type;
|
||||
fake_entry->address.address = UVM_ALIGN_DOWN(phys_addresses[i].address, notification_size);
|
||||
fake_entry->address.aperture = phys_addresses[i].aperture;
|
||||
fake_entry->address.is_virtual = false;
|
||||
fake_entry->physical_info.resident_id = resident_processors[i];
|
||||
fake_entry->counter_value = current_entry->counter_value;
|
||||
fake_entry->sub_granularity = current_entry->sub_granularity;
|
||||
|
||||
status = service_phys_notification(gpu, batch_context, fake_entry, out_flags);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
NvU32 i;
|
||||
NV_STATUS status = NV_OK;
|
||||
preprocess_virt_notifications(gpu, batch_context);
|
||||
|
||||
for (i = 0; i < batch_context->virt.num_notifications; ++i) {
|
||||
unsigned flags = 0;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
|
||||
|
||||
status = service_virt_notification(gpu, batch_context, current_entry, &flags);
|
||||
|
||||
UVM_DBG_PRINT_RL("Processed virt access counter (%d/%d): %sMANAGED (status: %d) clear: %s\n",
|
||||
i + 1,
|
||||
batch_context->virt.num_notifications,
|
||||
(flags & UVM_ACCESS_COUNTER_ON_MANAGED) ? "" : "NOT ",
|
||||
status,
|
||||
(flags & UVM_ACCESS_COUNTER_ACTION_CLEAR) ? "YES" : "NO");
|
||||
|
||||
if (uvm_enable_builtin_tests)
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
|
||||
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
void uvm_gpu_service_access_counters(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@@ -1535,7 +1718,7 @@ bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space)
|
||||
return atomic_read(&va_space_access_counters->params.enable_mimc_migrations);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_perf_access_counters_init()
|
||||
NV_STATUS uvm_perf_access_counters_init(void)
|
||||
{
|
||||
uvm_perf_module_init("perf_access_counters",
|
||||
UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS,
|
||||
@@ -1546,7 +1729,7 @@ NV_STATUS uvm_perf_access_counters_init()
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_perf_access_counters_exit()
|
||||
void uvm_perf_access_counters_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2021 NVIDIA Corporation
|
||||
Copyright (c) 2017-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -338,7 +338,6 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_processor_id_t new_residency;
|
||||
bool read_duplicate;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_va_range_t *va_range = va_block->va_range;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
|
||||
UVM_ASSERT(!fault_entry->is_fatal);
|
||||
@@ -365,8 +364,11 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
// Check logical permissions
|
||||
status = uvm_va_range_check_logical_permissions(va_range,
|
||||
status = uvm_va_block_check_logical_permissions(va_block,
|
||||
&service_context->block_context,
|
||||
gpu->id,
|
||||
uvm_va_block_cpu_page_index(va_block,
|
||||
fault_entry->fault_address),
|
||||
fault_entry->fault_access_type,
|
||||
uvm_range_group_address_migratable(va_space,
|
||||
fault_entry->fault_address));
|
||||
@@ -386,6 +388,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Compute new residency and update the masks
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&service_context->block_context,
|
||||
page_index,
|
||||
gpu->id,
|
||||
fault_entry->access_type_mask,
|
||||
@@ -422,7 +425,6 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||||
struct mm_struct *mm,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_fault_buffer_entry_t *fault_entry)
|
||||
{
|
||||
@@ -432,7 +434,6 @@ static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||||
|
||||
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
|
||||
service_context->num_retries = 0;
|
||||
service_context->block_context.mm = mm;
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
@@ -458,8 +459,6 @@ static void kill_channel_delayed(void *_user_channel)
|
||||
uvm_user_channel_t *user_channel = (uvm_user_channel_t *)_user_channel;
|
||||
uvm_va_space_t *va_space = user_channel->kill_channel.va_space;
|
||||
|
||||
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
|
||||
|
||||
uvm_va_space_down_read_rm(va_space);
|
||||
if (user_channel->gpu_va_space) {
|
||||
// RM handles the fault, which will do the correct fault reporting in the
|
||||
@@ -598,6 +597,7 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
// to remain valid until we release. If no mm is registered, we
|
||||
// can only service managed faults, not ATS/HMM faults.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
va_block_context->mm = mm;
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
@@ -622,12 +622,11 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
|
||||
if (!fault_entry->is_fatal) {
|
||||
status = uvm_va_block_find_create(fault_entry->va_space,
|
||||
mm,
|
||||
fault_entry->fault_address,
|
||||
va_block_context,
|
||||
&va_block);
|
||||
if (status == NV_OK)
|
||||
status = service_managed_fault_in_block(gpu_va_space->gpu, mm, va_block, fault_entry);
|
||||
status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry);
|
||||
else
|
||||
status = service_non_managed_fault(gpu_va_space, mm, fault_entry, status);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -1034,6 +1034,61 @@ static NV_STATUS preprocess_fault_batch(uvm_gpu_t *gpu, uvm_fault_service_batch_
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static bool check_fault_entry_duplicate(const uvm_fault_buffer_entry_t *current_entry,
|
||||
const uvm_fault_buffer_entry_t *previous_entry)
|
||||
{
|
||||
bool is_duplicate = false;
|
||||
|
||||
if (previous_entry) {
|
||||
is_duplicate = (current_entry->va_space == previous_entry->va_space) &&
|
||||
(current_entry->fault_address == previous_entry->fault_address);
|
||||
}
|
||||
|
||||
return is_duplicate;
|
||||
}
|
||||
|
||||
static void fault_entry_duplicate_flags(uvm_fault_buffer_entry_t *current_entry,
|
||||
const uvm_fault_buffer_entry_t *previous_entry)
|
||||
{
|
||||
UVM_ASSERT(previous_entry);
|
||||
UVM_ASSERT(check_fault_entry_duplicate(current_entry, previous_entry));
|
||||
|
||||
// Propagate the is_invalid_prefetch flag across all prefetch faults
|
||||
// on the page
|
||||
if (previous_entry->is_invalid_prefetch)
|
||||
current_entry->is_invalid_prefetch = true;
|
||||
|
||||
// If a page is throttled, all faults on the page must be skipped
|
||||
if (previous_entry->is_throttled)
|
||||
current_entry->is_throttled = true;
|
||||
}
|
||||
|
||||
static void update_batch_context(uvm_fault_service_batch_context_t *batch_context,
|
||||
uvm_fault_buffer_entry_t *current_entry,
|
||||
const uvm_fault_buffer_entry_t *previous_entry)
|
||||
{
|
||||
bool is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
|
||||
uvm_fault_utlb_info_t *utlb = &batch_context->utlbs[current_entry->fault_source.utlb_id];
|
||||
|
||||
UVM_ASSERT(utlb->num_pending_faults > 0);
|
||||
|
||||
if (is_duplicate)
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances;
|
||||
else
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances - 1;
|
||||
|
||||
if (current_entry->is_invalid_prefetch)
|
||||
batch_context->num_invalid_prefetch_faults += current_entry->num_instances;
|
||||
|
||||
if (current_entry->is_fatal) {
|
||||
utlb->has_fatal_faults = true;
|
||||
batch_context->has_fatal_faults = true;
|
||||
}
|
||||
|
||||
if (current_entry->is_throttled)
|
||||
batch_context->has_throttled_faults = true;
|
||||
}
|
||||
|
||||
// This function computes the maximum access type that can be serviced for the
|
||||
// reported fault instances given the logical permissions of the VA range. If
|
||||
// none of the fault instances can be serviced UVM_FAULT_ACCESS_TYPE_COUNT is
|
||||
@@ -1055,13 +1110,17 @@ static NV_STATUS preprocess_fault_batch(uvm_gpu_t *gpu, uvm_fault_service_batch_
|
||||
// - service_access_type: highest access type that can be serviced.
|
||||
static uvm_fault_access_type_t check_fault_access_permissions(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_fault_buffer_entry_t *fault_entry,
|
||||
bool allow_migration)
|
||||
{
|
||||
NV_STATUS perm_status;
|
||||
|
||||
perm_status = uvm_va_range_check_logical_permissions(va_block->va_range,
|
||||
perm_status = uvm_va_block_check_logical_permissions(va_block,
|
||||
va_block_context,
|
||||
gpu->id,
|
||||
uvm_va_block_cpu_page_index(va_block,
|
||||
fault_entry->fault_address),
|
||||
fault_entry->fault_access_type,
|
||||
allow_migration);
|
||||
if (perm_status == NV_OK)
|
||||
@@ -1083,8 +1142,11 @@ static uvm_fault_access_type_t check_fault_access_permissions(uvm_gpu_t *gpu,
|
||||
// service them before we can cancel the write/atomic faults. So we
|
||||
// retry with read fault access type.
|
||||
if (uvm_fault_access_type_mask_test(fault_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
|
||||
perm_status = uvm_va_range_check_logical_permissions(va_block->va_range,
|
||||
perm_status = uvm_va_block_check_logical_permissions(va_block,
|
||||
va_block_context,
|
||||
gpu->id,
|
||||
uvm_va_block_cpu_page_index(va_block,
|
||||
fault_entry->fault_address),
|
||||
UVM_FAULT_ACCESS_TYPE_READ,
|
||||
allow_migration);
|
||||
if (perm_status == NV_OK)
|
||||
@@ -1115,12 +1177,12 @@ static uvm_fault_access_type_t check_fault_access_permissions(uvm_gpu_t *gpu,
|
||||
// - NV_ERR_MORE_PROCESSING_REQUIRED if servicing needs allocation retry
|
||||
// - NV_ERR_NO_MEMORY if the faults could not be serviced due to OOM
|
||||
// - Any other value is a UVM-global error
|
||||
static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
NvU32 first_fault_index,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 *block_faults)
|
||||
static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 i;
|
||||
@@ -1156,14 +1218,16 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
UVM_ASSERT(ordered_fault_cache[first_fault_index]->fault_address >= va_block->start);
|
||||
UVM_ASSERT(ordered_fault_cache[first_fault_index]->fault_address <= va_block->end);
|
||||
|
||||
end = va_block->end;
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_hmm_find_policy_end(va_block,
|
||||
&block_context->block_context,
|
||||
ordered_fault_cache[first_fault_index]->fault_address,
|
||||
&end);
|
||||
else
|
||||
}
|
||||
else {
|
||||
block_context->block_context.policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
end = va_block->end;
|
||||
}
|
||||
|
||||
// Scan the sorted array and notify the fault event for all fault entries
|
||||
// in the block
|
||||
@@ -1191,7 +1255,7 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
if (i > first_fault_index) {
|
||||
previous_entry = ordered_fault_cache[i - 1];
|
||||
is_duplicate = current_entry->fault_address == previous_entry->fault_address;
|
||||
is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
|
||||
}
|
||||
|
||||
if (block_context->num_retries == 0) {
|
||||
@@ -1206,12 +1270,7 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Service the most intrusive fault per page, only. Waive the rest
|
||||
if (is_duplicate) {
|
||||
// Propagate the is_invalid_prefetch flag across all prefetch
|
||||
// faults on the page
|
||||
current_entry->is_invalid_prefetch = previous_entry->is_invalid_prefetch;
|
||||
|
||||
// If a page is throttled, all faults on the page must be skipped
|
||||
current_entry->is_throttled = previous_entry->is_throttled;
|
||||
fault_entry_duplicate_flags(current_entry, previous_entry);
|
||||
|
||||
// The previous fault was non-fatal so the page has been already
|
||||
// serviced
|
||||
@@ -1226,7 +1285,11 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
UVM_ASSERT(iter.start <= current_entry->fault_address && iter.end >= current_entry->fault_address);
|
||||
|
||||
service_access_type = check_fault_access_permissions(gpu, va_block, current_entry, iter.migratable);
|
||||
service_access_type = check_fault_access_permissions(gpu,
|
||||
va_block,
|
||||
&block_context->block_context,
|
||||
current_entry,
|
||||
iter.migratable);
|
||||
|
||||
// Do not exit early due to logical errors such as access permission
|
||||
// violation.
|
||||
@@ -1269,6 +1332,7 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Compute new residency and update the masks
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&block_context->block_context,
|
||||
page_index,
|
||||
gpu->id,
|
||||
service_access_type_mask,
|
||||
@@ -1302,25 +1366,8 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
// Only update counters the first time since logical permissions cannot
|
||||
// change while we hold the VA space lock
|
||||
// TODO: Bug 1750144: That might not be true with HMM.
|
||||
if (block_context->num_retries == 0) {
|
||||
uvm_fault_utlb_info_t *utlb = &batch_context->utlbs[current_entry->fault_source.utlb_id];
|
||||
|
||||
if (current_entry->is_invalid_prefetch)
|
||||
batch_context->num_invalid_prefetch_faults += current_entry->num_instances;
|
||||
|
||||
if (is_duplicate)
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances;
|
||||
else
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances - 1;
|
||||
|
||||
if (current_entry->is_throttled)
|
||||
batch_context->has_throttled_faults = true;
|
||||
|
||||
if (current_entry->is_fatal) {
|
||||
utlb->has_fatal_faults = true;
|
||||
batch_context->has_fatal_faults = true;
|
||||
}
|
||||
}
|
||||
if (block_context->num_retries == 0)
|
||||
update_batch_context(batch_context, current_entry, previous_entry);
|
||||
}
|
||||
|
||||
// Apply the changes computed in the fault service block context, if there
|
||||
@@ -1347,12 +1394,11 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
//
|
||||
// See the comments for function service_fault_batch_block_locked for
|
||||
// implementation details and error codes.
|
||||
static NV_STATUS service_batch_managed_faults_in_block(uvm_gpu_t *gpu,
|
||||
struct mm_struct *mm,
|
||||
uvm_va_block_t *va_block,
|
||||
NvU32 first_fault_index,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 *block_faults)
|
||||
static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
@@ -1361,17 +1407,16 @@ static NV_STATUS service_batch_managed_faults_in_block(uvm_gpu_t *gpu,
|
||||
|
||||
fault_block_context->operation = UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS;
|
||||
fault_block_context->num_retries = 0;
|
||||
fault_block_context->block_context.mm = mm;
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, &va_block_retry,
|
||||
service_batch_managed_faults_in_block_locked(gpu,
|
||||
va_block,
|
||||
&va_block_retry,
|
||||
first_fault_index,
|
||||
batch_context,
|
||||
block_faults));
|
||||
service_fault_batch_block_locked(gpu,
|
||||
va_block,
|
||||
&va_block_retry,
|
||||
batch_context,
|
||||
first_fault_index,
|
||||
block_faults));
|
||||
|
||||
tracker_status = uvm_tracker_add_tracker_safe(&batch_context->tracker, &va_block->tracker);
|
||||
|
||||
@@ -1390,94 +1435,128 @@ typedef enum
|
||||
FAULT_SERVICE_MODE_CANCEL,
|
||||
} fault_service_mode_t;
|
||||
|
||||
static NV_STATUS service_non_managed_fault(uvm_fault_buffer_entry_t *current_entry,
|
||||
const uvm_fault_buffer_entry_t *previous_entry,
|
||||
NV_STATUS lookup_status,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate,
|
||||
uvm_fault_utlb_info_t *utlb)
|
||||
static NV_STATUS service_fault_batch_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
NV_STATUS status = lookup_status;
|
||||
bool is_duplicate = false;
|
||||
UVM_ASSERT(utlb->num_pending_faults > 0);
|
||||
UVM_ASSERT(lookup_status != NV_OK);
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[first_fault_index];
|
||||
const uvm_fault_buffer_entry_t *previous_entry = first_fault_index > 0 ?
|
||||
batch_context->ordered_fault_cache[first_fault_index - 1] : NULL;
|
||||
bool is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
|
||||
|
||||
if (previous_entry) {
|
||||
is_duplicate = (current_entry->va_space == previous_entry->va_space) &&
|
||||
(current_entry->fault_address == previous_entry->fault_address);
|
||||
|
||||
if (is_duplicate) {
|
||||
// Propagate the is_invalid_prefetch flag across all prefetch faults
|
||||
// on the page
|
||||
if (previous_entry->is_invalid_prefetch)
|
||||
current_entry->is_invalid_prefetch = true;
|
||||
|
||||
// If a page is throttled, all faults on the page must be skipped
|
||||
if (previous_entry->is_throttled)
|
||||
current_entry->is_throttled = true;
|
||||
}
|
||||
}
|
||||
if (is_duplicate)
|
||||
fault_entry_duplicate_flags(current_entry, previous_entry);
|
||||
|
||||
// Generate fault events for all fault packets
|
||||
uvm_perf_event_notify_gpu_fault(¤t_entry->va_space->perf_events,
|
||||
NULL,
|
||||
gpu_va_space->gpu->id,
|
||||
gpu->id,
|
||||
UVM_ID_INVALID,
|
||||
current_entry,
|
||||
batch_context->batch_id,
|
||||
is_duplicate);
|
||||
|
||||
if (status != NV_ERR_INVALID_ADDRESS)
|
||||
return status;
|
||||
// The VA isn't managed. See if ATS knows about it, unless it is a
|
||||
// duplicate and the previous fault was non-fatal so the page has
|
||||
// already been serviced
|
||||
//
|
||||
// TODO: Bug 2103669: Service more than one ATS fault at a time so we
|
||||
// don't do an unconditional VA range lookup for every ATS fault.
|
||||
if (!is_duplicate || previous_entry->is_fatal)
|
||||
status = uvm_ats_service_fault_entry(gpu_va_space, current_entry, ats_invalidate);
|
||||
else
|
||||
status = NV_OK;
|
||||
|
||||
if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
|
||||
// The VA isn't managed. See if ATS knows about it, unless it is a
|
||||
// duplicate and the previous fault was non-fatal so the page has
|
||||
// already been serviced
|
||||
if (!is_duplicate || previous_entry->is_fatal)
|
||||
status = uvm_ats_service_fault_entry(gpu_va_space, current_entry, ats_invalidate);
|
||||
else
|
||||
status = NV_OK;
|
||||
(*block_faults)++;
|
||||
|
||||
update_batch_context(batch_context, current_entry, previous_entry);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void service_fault_batch_fatal(uvm_gpu_t *gpu,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
NV_STATUS status,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[first_fault_index];
|
||||
const uvm_fault_buffer_entry_t *previous_entry = first_fault_index > 0 ?
|
||||
batch_context->ordered_fault_cache[first_fault_index - 1] : NULL;
|
||||
bool is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
|
||||
|
||||
if (is_duplicate)
|
||||
fault_entry_duplicate_flags(current_entry, previous_entry);
|
||||
|
||||
// The VA block cannot be found, set the fatal fault flag,
|
||||
// unless it is a prefetch fault
|
||||
if (current_entry->fault_access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) {
|
||||
current_entry->is_invalid_prefetch = true;
|
||||
}
|
||||
else {
|
||||
// If the VA block cannot be found, set the fatal fault flag,
|
||||
// unless it is a prefetch fault
|
||||
if (current_entry->fault_access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) {
|
||||
current_entry->is_invalid_prefetch = true;
|
||||
}
|
||||
else {
|
||||
current_entry->is_fatal = true;
|
||||
current_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||||
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
}
|
||||
current_entry->is_fatal = true;
|
||||
current_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||||
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
}
|
||||
|
||||
update_batch_context(batch_context, current_entry, previous_entry);
|
||||
|
||||
uvm_perf_event_notify_gpu_fault(¤t_entry->va_space->perf_events,
|
||||
NULL,
|
||||
gpu->id,
|
||||
UVM_ID_INVALID,
|
||||
current_entry,
|
||||
batch_context->batch_id,
|
||||
is_duplicate);
|
||||
|
||||
(*block_faults)++;
|
||||
}
|
||||
|
||||
static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_va_block_context_t *va_block_context =
|
||||
&gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[first_fault_index];
|
||||
struct mm_struct *mm = va_block_context->mm;
|
||||
NvU64 fault_address = current_entry->fault_address;
|
||||
|
||||
(*block_faults) = 0;
|
||||
|
||||
va_range = uvm_va_range_find(va_space, fault_address);
|
||||
status = uvm_va_block_find_create_in_range(va_space, va_range, fault_address, va_block_context, &va_block);
|
||||
if (status == NV_OK) {
|
||||
status = service_fault_batch_block(gpu, va_block, batch_context, first_fault_index, block_faults);
|
||||
}
|
||||
else if ((status == NV_ERR_INVALID_ADDRESS) && uvm_ats_can_service_faults(gpu_va_space, mm)) {
|
||||
status = service_fault_batch_ats(gpu_va_space, mm, batch_context, first_fault_index, block_faults);
|
||||
}
|
||||
else {
|
||||
service_fault_batch_fatal(gpu_va_space->gpu, batch_context, first_fault_index, status, block_faults);
|
||||
|
||||
// Do not fail due to logical errors
|
||||
status = NV_OK;
|
||||
}
|
||||
|
||||
if (is_duplicate)
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances;
|
||||
else
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances - 1;
|
||||
|
||||
if (current_entry->is_invalid_prefetch)
|
||||
batch_context->num_invalid_prefetch_faults += current_entry->num_instances;
|
||||
|
||||
if (current_entry->is_fatal) {
|
||||
utlb->has_fatal_faults = true;
|
||||
batch_context->has_fatal_faults = true;
|
||||
}
|
||||
|
||||
if (current_entry->is_throttled)
|
||||
batch_context->has_throttled_faults = true;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Scan the ordered view of faults and group them by different va_blocks.
|
||||
// Service faults for each va_block, in batch.
|
||||
// Scan the ordered view of faults and group them by different va_blocks
|
||||
// (managed faults) and service faults for each va_block, in batch.
|
||||
// Service non-managed faults one at a time as they are encountered during the
|
||||
// scan.
|
||||
//
|
||||
// This function returns NV_WARN_MORE_PROCESSING_REQUIRED if the fault buffer
|
||||
// was flushed because the needs_fault_buffer_flush flag was set on some GPU VA
|
||||
@@ -1491,9 +1570,9 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
uvm_gpu_va_space_t *gpu_va_space = NULL;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
struct mm_struct *mm = NULL;
|
||||
const bool replay_per_va_block = service_mode != FAULT_SERVICE_MODE_CANCEL &&
|
||||
gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
|
||||
struct mm_struct *mm = NULL;
|
||||
uvm_va_block_context_t *va_block_context =
|
||||
&gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
|
||||
|
||||
@@ -1502,7 +1581,6 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
ats_invalidate->write_faults_in_batch = false;
|
||||
|
||||
for (i = 0; i < batch_context->num_coalesced_faults;) {
|
||||
uvm_va_block_t *va_block;
|
||||
NvU32 block_faults;
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
|
||||
uvm_fault_utlb_info_t *utlb = &batch_context->utlbs[current_entry->fault_source.utlb_id];
|
||||
@@ -1531,18 +1609,16 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
// to remain valid until we release. If no mm is registered, we
|
||||
// can only service managed faults, not ATS/HMM faults.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
va_block_context->mm = mm;
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
|
||||
if (gpu_va_space && gpu_va_space->needs_fault_buffer_flush) {
|
||||
// flush if required and clear the flush flag
|
||||
if (uvm_processor_mask_test_and_clear_atomic(&va_space->needs_fault_buffer_flush, gpu->id)) {
|
||||
status = fault_buffer_flush_locked(gpu,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
|
||||
UVM_FAULT_REPLAY_TYPE_START,
|
||||
batch_context);
|
||||
gpu_va_space->needs_fault_buffer_flush = false;
|
||||
|
||||
if (status == NV_OK)
|
||||
status = NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
|
||||
@@ -1573,51 +1649,22 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
continue;
|
||||
}
|
||||
|
||||
// TODO: Bug 2103669: Service more than one ATS fault at a time so we
|
||||
// don't do an unconditional VA range lookup for every ATS fault.
|
||||
status = uvm_va_block_find_create(va_space,
|
||||
mm,
|
||||
current_entry->fault_address,
|
||||
va_block_context,
|
||||
&va_block);
|
||||
if (status == NV_OK) {
|
||||
status = service_batch_managed_faults_in_block(gpu_va_space->gpu,
|
||||
mm,
|
||||
va_block,
|
||||
i,
|
||||
batch_context,
|
||||
&block_faults);
|
||||
|
||||
// When service_batch_managed_faults_in_block returns != NV_OK
|
||||
// something really bad happened
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
||||
i += block_faults;
|
||||
}
|
||||
else {
|
||||
const uvm_fault_buffer_entry_t *previous_entry = i == 0? NULL : batch_context->ordered_fault_cache[i - 1];
|
||||
|
||||
status = service_non_managed_fault(current_entry,
|
||||
previous_entry,
|
||||
status,
|
||||
gpu_va_space,
|
||||
mm,
|
||||
batch_context,
|
||||
ats_invalidate,
|
||||
utlb);
|
||||
|
||||
// When service_non_managed_fault returns != NV_OK something really
|
||||
// bad happened
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
||||
++i;
|
||||
status = service_fault_batch_dispatch(va_space, gpu_va_space, batch_context, i, &block_faults);
|
||||
// TODO: Bug 3900733: clean up locking in service_fault_batch().
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
mm = NULL;
|
||||
va_space = NULL;
|
||||
continue;
|
||||
}
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
||||
i += block_faults;
|
||||
|
||||
// Don't issue replays in cancel mode
|
||||
if (replay_per_va_block) {
|
||||
if (replay_per_va_block && !batch_context->has_fatal_faults) {
|
||||
status = push_replay_on_gpu(gpu, UVM_FAULT_REPLAY_TYPE_START, batch_context);
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
||||
@@ -118,12 +118,20 @@ static bool is_canary(NvU32 val)
|
||||
return (val & ~UVM_SEMAPHORE_CANARY_MASK) == UVM_SEMAPHORE_CANARY_BASE;
|
||||
}
|
||||
|
||||
// Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore
|
||||
// pool?
|
||||
static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
|
||||
{
|
||||
return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
|
||||
}
|
||||
|
||||
static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_semaphore_pool_page_t *pool_page;
|
||||
NvU32 *payloads;
|
||||
size_t i;
|
||||
uvm_rm_mem_type_t rm_mem_type = UVM_RM_MEM_TYPE_SYS;
|
||||
|
||||
uvm_assert_mutex_locked(&pool->mutex);
|
||||
|
||||
@@ -135,13 +143,16 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
pool_page->pool = pool;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_all(pool->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
rm_mem_type,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
0,
|
||||
&pool_page->memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
// Verify the GPU can access the semaphore pool.
|
||||
UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
|
||||
|
||||
// All semaphores are initially free
|
||||
bitmap_fill(pool_page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -44,6 +44,10 @@
|
||||
#include "clc86f.h"
|
||||
#include "clc8b5.h"
|
||||
|
||||
static int uvm_downgrade_force_membar_sys = 1;
|
||||
module_param(uvm_downgrade_force_membar_sys, uint, 0644);
|
||||
MODULE_PARM_DESC(uvm_downgrade_force_membar_sys, "Force all TLB invalidation downgrades to use MEMBAR_SYS");
|
||||
|
||||
#define CE_OP_COUNT (sizeof(uvm_ce_hal_t) / sizeof(void *))
|
||||
#define HOST_OP_COUNT (sizeof(uvm_host_hal_t) / sizeof(void *))
|
||||
#define ARCH_OP_COUNT (sizeof(uvm_arch_hal_t) / sizeof(void *))
|
||||
@@ -61,7 +65,7 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.id = MAXWELL_DMA_COPY_A,
|
||||
.u.ce_ops = {
|
||||
.init = uvm_hal_maxwell_ce_init,
|
||||
.method_validate = uvm_hal_method_validate_stub,
|
||||
.method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.semaphore_release = uvm_hal_maxwell_ce_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_maxwell_ce_semaphore_timestamp,
|
||||
.semaphore_reduction_inc = uvm_hal_maxwell_ce_semaphore_reduction_inc,
|
||||
@@ -69,11 +73,11 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.offset_in_out = uvm_hal_maxwell_ce_offset_in_out,
|
||||
.phys_mode = uvm_hal_maxwell_ce_phys_mode,
|
||||
.plc_mode = uvm_hal_maxwell_ce_plc_mode,
|
||||
.memcopy_validate = uvm_hal_ce_memcopy_validate_stub,
|
||||
.memcopy_is_valid = uvm_hal_ce_memcopy_is_valid_stub,
|
||||
.memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
|
||||
.memcopy = uvm_hal_maxwell_ce_memcopy,
|
||||
.memcopy_v_to_v = uvm_hal_maxwell_ce_memcopy_v_to_v,
|
||||
.memset_validate = uvm_hal_ce_memset_validate_stub,
|
||||
.memset_is_valid = uvm_hal_ce_memset_is_valid_stub,
|
||||
.memset_1 = uvm_hal_maxwell_ce_memset_1,
|
||||
.memset_4 = uvm_hal_maxwell_ce_memset_4,
|
||||
.memset_8 = uvm_hal_maxwell_ce_memset_8,
|
||||
@@ -99,7 +103,15 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
{
|
||||
.id = VOLTA_DMA_COPY_A,
|
||||
.parent_id = PASCAL_DMA_COPY_B,
|
||||
.u.ce_ops = {},
|
||||
.u.ce_ops = {
|
||||
.semaphore_release = uvm_hal_volta_ce_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_volta_ce_semaphore_timestamp,
|
||||
.semaphore_reduction_inc = uvm_hal_volta_ce_semaphore_reduction_inc,
|
||||
.memcopy = uvm_hal_volta_ce_memcopy,
|
||||
.memset_1 = uvm_hal_volta_ce_memset_1,
|
||||
.memset_4 = uvm_hal_volta_ce_memset_4,
|
||||
.memset_8 = uvm_hal_volta_ce_memset_8,
|
||||
},
|
||||
},
|
||||
{
|
||||
.id = TURING_DMA_COPY_A,
|
||||
@@ -110,22 +122,22 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.id = AMPERE_DMA_COPY_A,
|
||||
.parent_id = TURING_DMA_COPY_A,
|
||||
.u.ce_ops = {
|
||||
.method_validate = uvm_hal_ampere_ce_method_validate_c6b5,
|
||||
.method_is_valid = uvm_hal_ampere_ce_method_is_valid_c6b5,
|
||||
.phys_mode = uvm_hal_ampere_ce_phys_mode,
|
||||
.memcopy_validate = uvm_hal_ampere_ce_memcopy_validate_c6b5,
|
||||
.memcopy_is_valid = uvm_hal_ampere_ce_memcopy_is_valid_c6b5,
|
||||
.memcopy_patch_src = uvm_hal_ampere_ce_memcopy_patch_src_c6b5,
|
||||
.memset_validate = uvm_hal_ampere_ce_memset_validate_c6b5,
|
||||
.memset_is_valid = uvm_hal_ampere_ce_memset_is_valid_c6b5,
|
||||
},
|
||||
},
|
||||
{
|
||||
.id = AMPERE_DMA_COPY_B,
|
||||
.parent_id = AMPERE_DMA_COPY_A,
|
||||
.u.ce_ops = {
|
||||
.method_validate = uvm_hal_method_validate_stub,
|
||||
.method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.plc_mode = uvm_hal_ampere_ce_plc_mode_c7b5,
|
||||
.memcopy_validate = uvm_hal_ce_memcopy_validate_stub,
|
||||
.memcopy_is_valid = uvm_hal_ce_memcopy_is_valid_stub,
|
||||
.memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
|
||||
.memset_validate = uvm_hal_ce_memset_validate_stub,
|
||||
.memset_is_valid = uvm_hal_ce_memset_is_valid_stub,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -140,6 +152,8 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.memset_1 = uvm_hal_hopper_ce_memset_1,
|
||||
.memset_4 = uvm_hal_hopper_ce_memset_4,
|
||||
.memset_8 = uvm_hal_hopper_ce_memset_8,
|
||||
.memcopy_is_valid = uvm_hal_hopper_ce_memcopy_is_valid,
|
||||
.memset_is_valid = uvm_hal_hopper_ce_memset_is_valid,
|
||||
},
|
||||
},
|
||||
};
|
||||
@@ -152,8 +166,8 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.id = KEPLER_CHANNEL_GPFIFO_B,
|
||||
.u.host_ops = {
|
||||
.init = uvm_hal_maxwell_host_init_noop,
|
||||
.method_validate = uvm_hal_method_validate_stub,
|
||||
.sw_method_validate = uvm_hal_method_validate_stub,
|
||||
.method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.sw_method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.wait_for_idle = uvm_hal_maxwell_host_wait_for_idle,
|
||||
.membar_sys = uvm_hal_maxwell_host_membar_sys,
|
||||
// No MEMBAR GPU until Pascal, just do a MEMBAR SYS.
|
||||
@@ -235,8 +249,8 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.id = AMPERE_CHANNEL_GPFIFO_A,
|
||||
.parent_id = TURING_CHANNEL_GPFIFO_A,
|
||||
.u.host_ops = {
|
||||
.method_validate = uvm_hal_ampere_host_method_validate,
|
||||
.sw_method_validate = uvm_hal_ampere_host_sw_method_validate,
|
||||
.method_is_valid = uvm_hal_ampere_host_method_is_valid,
|
||||
.sw_method_is_valid = uvm_hal_ampere_host_sw_method_is_valid,
|
||||
.clear_faulted_channel_sw_method = uvm_hal_ampere_host_clear_faulted_channel_sw_method,
|
||||
.clear_faulted_channel_register = uvm_hal_ampere_host_clear_faulted_channel_register,
|
||||
.tlb_invalidate_all = uvm_hal_ampere_host_tlb_invalidate_all,
|
||||
@@ -248,8 +262,8 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.id = HOPPER_CHANNEL_GPFIFO_A,
|
||||
.parent_id = AMPERE_CHANNEL_GPFIFO_A,
|
||||
.u.host_ops = {
|
||||
.method_validate = uvm_hal_method_validate_stub,
|
||||
.sw_method_validate = uvm_hal_method_validate_stub,
|
||||
.method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.sw_method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.semaphore_acquire = uvm_hal_hopper_host_semaphore_acquire,
|
||||
.semaphore_release = uvm_hal_hopper_host_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_hopper_host_semaphore_timestamp,
|
||||
@@ -637,14 +651,20 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
// Access counters are currently not supported in vGPU.
|
||||
//
|
||||
// TODO: Bug 200692962: Add support for access counters in vGPU
|
||||
if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE)
|
||||
parent_gpu->access_counters_supported = false;
|
||||
}
|
||||
|
||||
void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
parent_gpu->arch_hal->init_properties(parent_gpu);
|
||||
|
||||
// Override the HAL when in non-passthrough virtualization
|
||||
// TODO: Bug 200692962: [UVM] Add support for access counters in UVM on SR-IOV configurations
|
||||
if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE)
|
||||
parent_gpu->access_counters_supported = false;
|
||||
hal_override_properties(parent_gpu);
|
||||
}
|
||||
|
||||
void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar)
|
||||
@@ -663,6 +683,44 @@ void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar)
|
||||
uvm_hal_membar(gpu, push, membar);
|
||||
}
|
||||
|
||||
bool uvm_hal_membar_before_semaphore(uvm_push_t *push)
|
||||
{
|
||||
uvm_membar_t membar = uvm_push_get_and_reset_membar_flag(push);
|
||||
|
||||
if (membar == UVM_MEMBAR_NONE) {
|
||||
// No MEMBAR requested, don't use a flush.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (membar == UVM_MEMBAR_GPU) {
|
||||
// MEMBAR GPU requested, do it on the HOST and skip the engine flush as
|
||||
// it doesn't have this capability.
|
||||
uvm_hal_wfi_membar(push, UVM_MEMBAR_GPU);
|
||||
return false;
|
||||
}
|
||||
|
||||
// By default do a MEMBAR SYS and for that we can just use flush on the
|
||||
// semaphore operation.
|
||||
return true;
|
||||
}
|
||||
|
||||
uvm_membar_t uvm_hal_downgrade_membar_type(uvm_gpu_t *gpu, bool is_local_vidmem)
|
||||
{
|
||||
// If the mapped memory was local, and we're not using a coherence protocol,
|
||||
// we only need a GPU-local membar. This is because all accesses to this
|
||||
// memory, including those from other processors like the CPU or peer GPUs,
|
||||
// must come through this GPU's L2. In all current architectures, MEMBAR_GPU
|
||||
// is sufficient to resolve ordering at the L2 level.
|
||||
if (is_local_vidmem && !gpu->parent->numa_info.enabled && !uvm_downgrade_force_membar_sys)
|
||||
return UVM_MEMBAR_GPU;
|
||||
|
||||
// If the mapped memory was remote, or if a coherence protocol can cache
|
||||
// this GPU's memory, then there are external ways for other processors to
|
||||
// access the memory without always going the local GPU L2, so we must use a
|
||||
// MEMBAR_SYS.
|
||||
return UVM_MEMBAR_SYS;
|
||||
}
|
||||
|
||||
const char *uvm_aperture_string(uvm_aperture_t aperture)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_APERTURE_MAX != 12);
|
||||
@@ -823,12 +881,12 @@ void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_e
|
||||
UVM_DBG_PRINT(" tag %x\n", entry->tag);
|
||||
}
|
||||
|
||||
bool uvm_hal_method_validate_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_hal_method_is_valid_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_hal_ce_memcopy_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
bool uvm_hal_ce_memcopy_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
@@ -837,7 +895,7 @@ void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
|
||||
{
|
||||
}
|
||||
|
||||
bool uvm_hal_ce_memset_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
|
||||
// A dummy method validation that always returns true; it can be used to skip
|
||||
// CE/Host/SW method validations for a given architecture
|
||||
bool uvm_hal_method_validate_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_method_is_valid_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
|
||||
typedef void (*uvm_hal_init_t)(uvm_push_t *push);
|
||||
void uvm_hal_maxwell_ce_init(uvm_push_t *push);
|
||||
@@ -42,12 +42,12 @@ void uvm_hal_maxwell_host_init_noop(uvm_push_t *push);
|
||||
void uvm_hal_pascal_host_init(uvm_push_t *push);
|
||||
|
||||
// Host method validation
|
||||
typedef bool (*uvm_hal_host_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
typedef bool (*uvm_hal_host_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
|
||||
// SW method validation
|
||||
typedef bool (*uvm_hal_host_sw_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_host_sw_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
typedef bool (*uvm_hal_host_sw_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_host_sw_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
|
||||
// Wait for idle
|
||||
typedef void (*uvm_hal_wait_for_idle_t)(uvm_push_t *push);
|
||||
@@ -208,6 +208,7 @@ typedef void (*uvm_hal_semaphore_release_t)(uvm_push_t *push, NvU64 gpu_va, NvU3
|
||||
void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_volta_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
@@ -220,6 +221,7 @@ void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32
|
||||
typedef void (*uvm_hal_semaphore_timestamp_t)(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_volta_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
|
||||
void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
@@ -272,16 +274,17 @@ NvU32 uvm_hal_maxwell_ce_plc_mode(void);
|
||||
NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void);
|
||||
|
||||
// CE method validation
|
||||
typedef bool (*uvm_hal_ce_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
typedef bool (*uvm_hal_ce_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
|
||||
// Memcopy validation.
|
||||
// The validation happens at the start of the memcopy (uvm_hal_memcopy_t)
|
||||
// execution. Use uvm_hal_ce_memcopy_validate_stub to skip the validation for
|
||||
// execution. Use uvm_hal_ce_memcopy_is_valid_stub to skip the validation for
|
||||
// a given architecture.
|
||||
typedef bool (*uvm_hal_ce_memcopy_validate)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_ce_memcopy_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
typedef bool (*uvm_hal_ce_memcopy_is_valid)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_ce_memcopy_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
|
||||
// Patching of the memcopy source; if not needed for a given architecture use
|
||||
// the (empty) uvm_hal_ce_memcopy_patch_src_stub implementation
|
||||
@@ -296,6 +299,7 @@ void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_
|
||||
// UVM_PUSH_FLAG_NEXT_CE_* flags with uvm_push_set_flag().
|
||||
typedef void (*uvm_hal_memcopy_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
|
||||
void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
|
||||
void uvm_hal_volta_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
|
||||
|
||||
// Simple wrapper for uvm_hal_memcopy_t with both addresses being virtual
|
||||
typedef void (*uvm_hal_memcopy_v_to_v_t)(uvm_push_t *push, NvU64 dst, NvU64 src, size_t size);
|
||||
@@ -303,11 +307,12 @@ void uvm_hal_maxwell_ce_memcopy_v_to_v(uvm_push_t *push, NvU64 dst, NvU64 src, s
|
||||
|
||||
// Memset validation.
|
||||
// The validation happens at the start of the memset (uvm_hal_memset_*_t)
|
||||
// execution. Use uvm_hal_ce_memset_validate_stub to skip the validation for
|
||||
// execution. Use uvm_hal_ce_memset_is_valid_stub to skip the validation for
|
||||
// a given architecture.
|
||||
typedef bool (*uvm_hal_ce_memset_validate)(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_ce_memset_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
typedef bool (*uvm_hal_ce_memset_is_valid)(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
|
||||
// Memset size bytes at dst to a given N-byte input value.
|
||||
//
|
||||
@@ -329,6 +334,10 @@ void uvm_hal_maxwell_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32
|
||||
void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
|
||||
void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);
|
||||
|
||||
void uvm_hal_volta_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
|
||||
void uvm_hal_volta_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
|
||||
void uvm_hal_volta_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
|
||||
|
||||
void uvm_hal_hopper_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
|
||||
void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
|
||||
void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
|
||||
@@ -342,6 +351,7 @@ void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 v
|
||||
typedef void (*uvm_hal_semaphore_reduction_inc_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_volta_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
// Initialize GPU architecture dependent properties
|
||||
@@ -579,8 +589,8 @@ void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_
|
||||
struct uvm_host_hal_struct
|
||||
{
|
||||
uvm_hal_init_t init;
|
||||
uvm_hal_host_method_validate method_validate;
|
||||
uvm_hal_host_sw_method_validate sw_method_validate;
|
||||
uvm_hal_host_method_is_valid method_is_valid;
|
||||
uvm_hal_host_sw_method_is_valid sw_method_is_valid;
|
||||
uvm_hal_wait_for_idle_t wait_for_idle;
|
||||
uvm_hal_membar_sys_t membar_sys;
|
||||
uvm_hal_membar_gpu_t membar_gpu;
|
||||
@@ -612,18 +622,18 @@ struct uvm_host_hal_struct
|
||||
struct uvm_ce_hal_struct
|
||||
{
|
||||
uvm_hal_init_t init;
|
||||
uvm_hal_ce_method_validate method_validate;
|
||||
uvm_hal_ce_method_is_valid method_is_valid;
|
||||
uvm_hal_semaphore_release_t semaphore_release;
|
||||
uvm_hal_semaphore_timestamp_t semaphore_timestamp;
|
||||
uvm_hal_ce_offset_out_t offset_out;
|
||||
uvm_hal_ce_offset_in_out_t offset_in_out;
|
||||
uvm_hal_ce_phys_mode_t phys_mode;
|
||||
uvm_hal_ce_plc_mode_t plc_mode;
|
||||
uvm_hal_ce_memcopy_validate memcopy_validate;
|
||||
uvm_hal_ce_memcopy_is_valid memcopy_is_valid;
|
||||
uvm_hal_ce_memcopy_patch_src memcopy_patch_src;
|
||||
uvm_hal_memcopy_t memcopy;
|
||||
uvm_hal_memcopy_v_to_v_t memcopy_v_to_v;
|
||||
uvm_hal_ce_memset_validate memset_validate;
|
||||
uvm_hal_ce_memset_is_valid memset_is_valid;
|
||||
uvm_hal_memset_1_t memset_1;
|
||||
uvm_hal_memset_4_t memset_4;
|
||||
uvm_hal_memset_8_t memset_8;
|
||||
@@ -726,4 +736,20 @@ static void uvm_hal_wfi_membar(uvm_push_t *push, uvm_membar_t membar)
|
||||
// appropriate Host membar(s) after a TLB invalidate.
|
||||
void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar);
|
||||
|
||||
// Internal helper used by architectures/engines that don't support a FLUSH
|
||||
// operation with a FLUSH_TYPE on the semaphore release method, e.g., pre-Volta
|
||||
// CE. It inspects and clears the MEMBAR push flags, issues a Host WFI +
|
||||
// membar.gpu for MEMBAR_GPU or returns true to indicate the caller to use the
|
||||
// engine's FLUSH for MEMBAR_SYS.
|
||||
bool uvm_hal_membar_before_semaphore(uvm_push_t *push);
|
||||
|
||||
// Determine the appropriate membar to use on TLB invalidates for GPU PTE
|
||||
// permissions downgrades.
|
||||
//
|
||||
// gpu is the GPU on which the TLB invalidate is happening.
|
||||
//
|
||||
// is_local_vidmem indicates whether all mappings being invalidated pointed to
|
||||
// the local GPU's memory.
|
||||
uvm_membar_t uvm_hal_downgrade_membar_type(uvm_gpu_t *gpu, bool is_local_vidmem);
|
||||
|
||||
#endif // __UVM_HAL_H__
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -37,19 +37,10 @@ typedef struct
|
||||
// This stores pointers to uvm_va_block_t for HMM blocks.
|
||||
uvm_range_tree_t blocks;
|
||||
uvm_mutex_t blocks_lock;
|
||||
|
||||
// TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
|
||||
// This flag is set true by default for each va_space so most processes
|
||||
// don't see partially implemented UVM-HMM behavior but can be enabled by
|
||||
// test code for a given va_space so the test process can do some interim
|
||||
// testing. It needs to be a separate flag instead of modifying
|
||||
// uvm_disable_hmm or va_space->flags since those are user inputs and are
|
||||
// visible/checked by test code.
|
||||
// Remove this when UVM-HMM is fully integrated into chips_a.
|
||||
bool disable;
|
||||
} uvm_hmm_va_space_t;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
||||
// Tells whether HMM is enabled for the given va_space.
|
||||
// If it is not enabled, all of the functions below are no-ops.
|
||||
bool uvm_hmm_is_enabled(uvm_va_space_t *va_space);
|
||||
@@ -62,15 +53,25 @@ typedef struct
|
||||
// and the va_space lock must be held in write mode.
|
||||
NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space);
|
||||
|
||||
// Initialize HMM for the given the va_space for testing.
|
||||
// Bug 1750144: UVM: Add HMM (Heterogeneous Memory Management) support to
|
||||
// the UVM driver. Remove this when enough HMM functionality is implemented.
|
||||
NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space);
|
||||
|
||||
// Destroy any HMM state for the given the va_space.
|
||||
// Locking: va_space lock must be held in write mode.
|
||||
void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space);
|
||||
|
||||
// Unmap all page tables in this VA space which map memory owned by this
|
||||
// GPU. Any memory still resident on this GPU will be evicted to system
|
||||
// memory. Note that 'mm' can be NULL (e.g., when closing the UVM file)
|
||||
// in which case any GPU memory is simply freed.
|
||||
// Locking: if mm is not NULL, the caller must hold mm->mmap_lock in at
|
||||
// least read mode and the va_space lock must be held in write mode.
|
||||
void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_struct *mm);
|
||||
|
||||
// Destroy the VA space's mappings on the GPU, if it has any.
|
||||
// Locking: if mm is not NULL, the caller must hold mm->mmap_lock in at
|
||||
// least read mode and the va_space lock must be held in write mode.
|
||||
void uvm_hmm_remove_gpu_va_space(uvm_va_space_t *va_space,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm);
|
||||
|
||||
// Find an existing HMM va_block.
|
||||
// This function can be called without having retained and locked the mm,
|
||||
// but in that case, the only allowed operations on the returned block are
|
||||
@@ -83,10 +84,33 @@ typedef struct
|
||||
NvU64 addr,
|
||||
uvm_va_block_t **va_block_ptr);
|
||||
|
||||
// Find an existing HMM va_block when processing a CPU fault and try to
|
||||
// isolate and lock the faulting page.
|
||||
// Return NV_ERR_INVALID_ADDRESS if the block is not found,
|
||||
// NV_ERR_BUSY_RETRY if the page could not be locked, and
|
||||
// NV_OK if the block is found and the page is locked. Also,
|
||||
// uvm_hmm_cpu_fault_finish() must be called if NV_OK is returned.
|
||||
// Locking: This must be called with the vma->vm_mm locked and the va_space
|
||||
// read locked.
|
||||
NV_STATUS uvm_hmm_va_block_cpu_find(uvm_va_space_t *va_space,
|
||||
uvm_service_block_context_t *service_context,
|
||||
struct vm_fault *vmf,
|
||||
uvm_va_block_t **va_block_ptr);
|
||||
|
||||
// This must be called after uvm_va_block_cpu_fault() if
|
||||
// uvm_hmm_va_block_cpu_find() returns NV_OK.
|
||||
// Locking: This must be called with the vma->vm_mm locked and the va_space
|
||||
// read locked.
|
||||
void uvm_hmm_cpu_fault_finish(uvm_service_block_context_t *service_context);
|
||||
|
||||
// Find or create a new HMM va_block.
|
||||
//
|
||||
// Return NV_ERR_INVALID_ADDRESS if there is no VMA associated with the
|
||||
// address 'addr' or the VMA does not have at least PROT_READ permission.
|
||||
// The caller is also responsible for checking that there is no UVM
|
||||
// va_range covering the given address before calling this function.
|
||||
// If va_block_context is not NULL, the VMA is cached in
|
||||
// va_block_context->hmm.vma.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read and the va_space lock at least for read.
|
||||
NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
@@ -94,23 +118,53 @@ typedef struct
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_t **va_block_ptr);
|
||||
|
||||
// Find the VMA for the given address and set va_block_context->hmm.vma.
|
||||
// Return NV_ERR_INVALID_ADDRESS if va_block_context->mm is NULL or there
|
||||
// is no VMA associated with the address 'addr' or the VMA does not have at
|
||||
// least PROT_READ permission.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read or mm equal to NULL.
|
||||
NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr);
|
||||
|
||||
// If va_block is a HMM va_block, check that va_block_context->hmm.vma is
|
||||
// not NULL and covers the given region. This always returns true and is
|
||||
// intended to only be used with UVM_ASSERT().
|
||||
// Locking: This function must be called with the va_block lock held and if
|
||||
// va_block is a HMM block, va_block_context->mm must be retained and
|
||||
// locked for at least read.
|
||||
bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Find or create a HMM va_block and mark it so the next va_block split
|
||||
// will fail for testing purposes.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read and the va_space lock at least for read.
|
||||
NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr);
|
||||
|
||||
// Reclaim any HMM va_blocks that overlap the given range.
|
||||
// Note that 'end' is inclusive.
|
||||
// A HMM va_block can be reclaimed if it doesn't contain any "valid" VMAs.
|
||||
// See uvm_hmm_vma_is_valid() for details.
|
||||
// Note that 'end' is inclusive. If mm is NULL, any HMM va_block in the
|
||||
// range will be reclaimed which assumes that the mm is being torn down
|
||||
// and was not retained.
|
||||
// Return values:
|
||||
// NV_ERR_NO_MEMORY: Reclaim required a block split, which failed.
|
||||
// NV_OK: There were no HMM blocks in the range, or all HMM
|
||||
// blocks in the range were successfully reclaimed.
|
||||
// Locking: If mm is not NULL, it must equal va_space_mm.mm, the caller
|
||||
// must hold a reference on it, and it must be locked for at least read
|
||||
// mode. Also, the va_space lock must be held in write mode.
|
||||
// must retain it with uvm_va_space_mm_or_current_retain() or be sure that
|
||||
// mm->mm_users is not zero, and it must be locked for at least read mode.
|
||||
// Also, the va_space lock must be held in write mode.
|
||||
// TODO: Bug 3372166: add asynchronous va_block reclaim.
|
||||
NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 start,
|
||||
NvU64 end);
|
||||
|
||||
// This is called to update the va_space tree of HMM va_blocks after an
|
||||
// existing va_block is split.
|
||||
// Locking: the va_space lock must be held in write mode.
|
||||
void uvm_hmm_va_block_split_tree(uvm_va_block_t *existing_va_block, uvm_va_block_t *new_block);
|
||||
|
||||
// Find a HMM policy range that needs to be split. The callback function
|
||||
// 'split_needed_cb' returns true if the policy range needs to be split.
|
||||
// If a policy range is split, the existing range is truncated to
|
||||
@@ -132,7 +186,8 @@ typedef struct
|
||||
NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
|
||||
uvm_processor_id_t preferred_location,
|
||||
NvU64 base,
|
||||
NvU64 last_address);
|
||||
NvU64 last_address,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// Set the accessed by policy for the given range. This also tries to
|
||||
// map the range. Note that 'last_address' is inclusive.
|
||||
@@ -142,13 +197,23 @@ typedef struct
|
||||
uvm_processor_id_t processor_id,
|
||||
bool set_bit,
|
||||
NvU64 base,
|
||||
NvU64 last_address);
|
||||
NvU64 last_address,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// Deferred work item to reestablish accessed by mappings after eviction. On
|
||||
// GPUs with access counters enabled, the evicted GPU will also get remote
|
||||
// mappings.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked
|
||||
// and the va_space lock must be held in at least read mode.
|
||||
void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *block_context);
|
||||
|
||||
// Set the read duplication policy for the given range.
|
||||
// Note that 'last_address' is inclusive.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
|
||||
// and the va_space lock must be held in write mode.
|
||||
// TODO: Bug 2046423: need to implement read duplication support in Linux.
|
||||
// TODO: Bug 3660922: need to implement HMM read duplication support.
|
||||
static NV_STATUS uvm_hmm_set_read_duplication(uvm_va_space_t *va_space,
|
||||
uvm_read_duplication_policy_t new_policy,
|
||||
NvU64 base,
|
||||
@@ -159,10 +224,11 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Set va_block_context->policy to the policy covering the given address
|
||||
// 'addr' and update the ending address '*endp' to the minimum of *endp,
|
||||
// va_block_context->hmm.vma->vm_end - 1, and the ending address of the
|
||||
// policy range.
|
||||
// This function assigns va_block_context->policy to the policy covering
|
||||
// the given address 'addr' and assigns the ending address '*endp' to the
|
||||
// minimum of va_block->end, va_block_context->hmm.vma->vm_end - 1, and the
|
||||
// ending address of the policy range. Note that va_block_context->hmm.vma
|
||||
// is expected to be initialized before calling this function.
|
||||
// Locking: This function must be called with
|
||||
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
|
||||
// the va_block lock held.
|
||||
@@ -171,11 +237,11 @@ typedef struct
|
||||
unsigned long addr,
|
||||
NvU64 *endp);
|
||||
|
||||
// Find the VMA for the page index 'page_index',
|
||||
// set va_block_context->policy to the policy covering the given address,
|
||||
// and update the ending page range '*outerp' to the minimum of *outerp,
|
||||
// va_block_context->hmm.vma->vm_end - 1, and the ending address of the
|
||||
// policy range.
|
||||
// This function finds the VMA for the page index 'page_index' and assigns
|
||||
// it to va_block_context->vma, sets va_block_context->policy to the policy
|
||||
// covering the given address, and sets the ending page range '*outerp'
|
||||
// to the minimum of *outerp, va_block_context->hmm.vma->vm_end - 1, the
|
||||
// ending address of the policy range, and va_block->end.
|
||||
// Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise, NV_OK.
|
||||
// Locking: This function must be called with
|
||||
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
|
||||
@@ -189,6 +255,145 @@ typedef struct
|
||||
// Locking: va_space lock must be held in write mode.
|
||||
NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space);
|
||||
|
||||
// Return the expanded region around 'address' limited to the intersection
|
||||
// of va_block start/end, vma start/end, and policy start/end.
|
||||
// va_block_context must not be NULL, va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()), and
|
||||
// va_block_context->policy must be valid.
|
||||
// Locking: the caller must hold mm->mmap_lock in at least read mode, the
|
||||
// va_space lock must be held in at least read mode, and the va_block lock
|
||||
// held.
|
||||
uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 address);
|
||||
|
||||
// Return the logical protection allowed of a HMM va_block for the page at
|
||||
// the given address.
|
||||
// va_block_context must not be NULL and va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()).
|
||||
// Locking: the caller must hold va_block_context->mm mmap_lock in at least
|
||||
// read mode.
|
||||
uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 addr);
|
||||
|
||||
// This is called to service a GPU fault.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked,
|
||||
// the va_space read lock must be held, and the va_block lock held.
|
||||
NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
uvm_processor_id_t new_residency,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_service_block_context_t *service_context);
|
||||
|
||||
// This is called to migrate a region within a HMM va_block.
|
||||
// va_block_context must not be NULL and va_block_context->policy and
|
||||
// va_block_context->hmm.vma must be valid.
|
||||
// Locking: the va_block_context->mm must be retained, mmap_lock must be
|
||||
// locked, and the va_block lock held.
|
||||
NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_make_resident_cause_t cause);
|
||||
|
||||
// This is called to migrate an address range of HMM allocations via
|
||||
// UvmMigrate().
|
||||
//
|
||||
// va_block_context must not be NULL. The caller is not required to set
|
||||
// va_block_context->policy or va_block_context->hmm.vma.
|
||||
//
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked and
|
||||
// the va_space read lock must be held.
|
||||
NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 base,
|
||||
NvU64 length,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_migrate_mode_t mode,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// This sets the va_block_context->hmm.src_pfns[] to the ZONE_DEVICE private
|
||||
// PFN for the GPU chunk memory.
|
||||
NV_STATUS uvm_hmm_va_block_evict_chunk_prep(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_gpu_chunk_t *gpu_chunk,
|
||||
uvm_va_block_region_t chunk_region);
|
||||
|
||||
// Migrate pages to system memory for the given page mask.
|
||||
// Note that the mmap lock is not held and there is no MM retained.
|
||||
// This must be called after uvm_hmm_va_block_evict_chunk_prep() has
|
||||
// initialized va_block_context->hmm.src_pfns[] for the source GPU physical
|
||||
// PFNs being migrated. Note that the input mask 'pages_to_evict' can be
|
||||
// modified. If any of the evicted pages has the accessed by policy set,
|
||||
// then record that by setting out_accessed_by_set.
|
||||
// Locking: the va_block lock must be locked.
|
||||
NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region,
|
||||
bool *out_accessed_by_set);
|
||||
|
||||
// Migrate pages from the given GPU to system memory for the given page
|
||||
// mask and region. va_block_context must not be NULL.
|
||||
// Note that the mmap lock is not held and there is no MM retained.
|
||||
// Locking: the va_block lock must be locked.
|
||||
NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Migrate a GPU chunk to system memory. This called to remove CPU page
|
||||
// table references to device private struct pages for the given GPU after
|
||||
// all other references in va_blocks have been released and the GPU is
|
||||
// in the process of being removed/torn down. Note that there is no mm,
|
||||
// VMA, va_block or any user channel activity on this GPU.
|
||||
NV_STATUS uvm_hmm_pmm_gpu_evict_chunk(uvm_gpu_t *gpu,
|
||||
uvm_gpu_chunk_t *gpu_chunk);
|
||||
|
||||
// This returns what would be the intersection of va_block start/end and
|
||||
// VMA start/end-1 for the given 'lookup_address' if
|
||||
// uvm_hmm_va_block_find_create() was called.
|
||||
// Locking: the caller must hold mm->mmap_lock in at least read mode and
|
||||
// the va_space lock must be held in at least read mode.
|
||||
NV_STATUS uvm_hmm_va_block_range_bounds(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 lookup_address,
|
||||
NvU64 *startp,
|
||||
NvU64 *endp,
|
||||
UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params);
|
||||
|
||||
// This updates the HMM va_block CPU residency information for a single
|
||||
// page at 'lookup_address' by calling hmm_range_fault(). If 'populate' is
|
||||
// true, the CPU page will be faulted in read/write or read-only
|
||||
// (depending on the permission of the underlying VMA at lookup_address).
|
||||
// Locking: the caller must hold mm->mmap_lock in at least read mode and
|
||||
// the va_space lock must be held in at least read mode.
|
||||
NV_STATUS uvm_hmm_va_block_update_residency_info(uvm_va_block_t *va_block,
|
||||
struct mm_struct *mm,
|
||||
NvU64 lookup_address,
|
||||
bool populate);
|
||||
|
||||
NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params,
|
||||
struct file *filp);
|
||||
|
||||
NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
UVM_TEST_VA_RANGE_INFO_PARAMS *params);
|
||||
|
||||
// Return true if GPU fault new residency location should be system memory.
|
||||
// va_block_context must not be NULL and va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()).
|
||||
// TODO: Bug 3660968: Remove this hack as soon as HMM migration is
|
||||
// implemented for VMAs other than anonymous memory.
|
||||
// Locking: the va_block lock must be held. If the va_block is a HMM
|
||||
// va_block, the va_block_context->mm must be retained and locked for least
|
||||
// read.
|
||||
bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context);
|
||||
|
||||
#else // UVM_IS_CONFIG_HMM()
|
||||
|
||||
static bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
|
||||
@@ -206,12 +411,17 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space)
|
||||
static void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
{
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
}
|
||||
|
||||
static void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
static void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
static void uvm_hmm_remove_gpu_va_space(uvm_va_space_t *va_space,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -222,6 +432,18 @@ typedef struct
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_cpu_find(uvm_va_space_t *va_space,
|
||||
uvm_service_block_context_t *service_context,
|
||||
struct vm_fault *vmf,
|
||||
uvm_va_block_t **va_block_ptr)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static void uvm_hmm_cpu_fault_finish(uvm_service_block_context_t *service_context)
|
||||
{
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
@@ -230,6 +452,23 @@ typedef struct
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 start,
|
||||
@@ -238,6 +477,10 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_hmm_va_block_split_tree(uvm_va_block_t *existing_va_block, uvm_va_block_t *new_block)
|
||||
{
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_split_as_needed(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_policy_is_split_needed_t split_needed_cb,
|
||||
@@ -249,7 +492,8 @@ typedef struct
|
||||
static NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
|
||||
uvm_processor_id_t preferred_location,
|
||||
NvU64 base,
|
||||
NvU64 last_address)
|
||||
NvU64 last_address,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
@@ -258,11 +502,18 @@ typedef struct
|
||||
uvm_processor_id_t processor_id,
|
||||
bool set_bit,
|
||||
NvU64 base,
|
||||
NvU64 last_address)
|
||||
NvU64 last_address,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *block_context)
|
||||
{
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_set_read_duplication(uvm_va_space_t *va_space,
|
||||
uvm_read_duplication_policy_t new_policy,
|
||||
NvU64 base,
|
||||
@@ -291,6 +542,119 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 address)
|
||||
{
|
||||
return (uvm_va_block_region_t){};
|
||||
}
|
||||
|
||||
static uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 addr)
|
||||
{
|
||||
return UVM_PROT_NONE;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
uvm_processor_id_t new_residency,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_service_block_context_t *service_context)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_make_resident_cause_t cause)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 base,
|
||||
NvU64 length,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_migrate_mode_t mode,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_evict_chunk_prep(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_gpu_chunk_t *gpu_chunk,
|
||||
uvm_va_block_region_t chunk_region)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region,
|
||||
bool *out_accessed_by_set)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_pmm_gpu_evict_chunk(uvm_gpu_t *gpu,
|
||||
uvm_gpu_chunk_t *gpu_chunk)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_range_bounds(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 lookup_address,
|
||||
NvU64 *startp,
|
||||
NvU64 *endp,
|
||||
UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_update_residency_info(uvm_va_block_t *va_block,
|
||||
struct mm_struct *mm,
|
||||
NvU64 lookup_address,
|
||||
bool populate)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params,
|
||||
struct file *filp)
|
||||
{
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
UVM_TEST_VA_RANGE_INFO_PARAMS *params)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
|
||||
#endif // _UVM_HMM_H_
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_test.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_va_range.h"
|
||||
#include "uvm_hmm.h"
|
||||
|
||||
NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
struct mm_struct *mm;
|
||||
uvm_va_block_t *hmm_block = NULL;
|
||||
NV_STATUS status;
|
||||
|
||||
mm = uvm_va_space_mm_retain(va_space);
|
||||
if (!mm)
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
uvm_down_write_mmap_lock(mm);
|
||||
uvm_va_space_down_write(va_space);
|
||||
|
||||
// TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
|
||||
// By default, HMM is enabled system wide but disabled per va_space.
|
||||
// This will initialize the va_space for HMM.
|
||||
status = uvm_hmm_va_space_initialize_test(va_space);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_up_write_mmap_lock(mm);
|
||||
|
||||
uvm_down_read_mmap_lock(mm);
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// Try to create an HMM va_block to virtual address zero (NULL).
|
||||
// It should fail. There should be no VMA but a va_block for range
|
||||
// [0x0 0x1fffff] is possible.
|
||||
status = uvm_hmm_va_block_find_create(va_space, 0UL, NULL, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
|
||||
|
||||
// Try to create an HMM va_block which overlaps a UVM managed block.
|
||||
// It should fail.
|
||||
status = uvm_hmm_va_block_find_create(va_space, params->uvm_address, NULL, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
|
||||
|
||||
// Try to create an HMM va_block; it should succeed.
|
||||
status = uvm_hmm_va_block_find_create(va_space, params->hmm_address, NULL, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
// Try to find an existing HMM va_block; it should succeed.
|
||||
status = uvm_hmm_va_block_find(va_space, params->hmm_address, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
done:
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_up_read_mmap_lock(mm);
|
||||
uvm_va_space_mm_release(va_space);
|
||||
|
||||
return status;
|
||||
|
||||
out:
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_up_write_mmap_lock(mm);
|
||||
uvm_va_space_mm_release(va_space);
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -54,6 +54,7 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
|
||||
// All GR context buffers may be mapped to 57b wide VAs. All "compute" units
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020 NVIDIA Corporation
|
||||
Copyright (c) 2020-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -23,25 +23,9 @@
|
||||
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "clc8b5.h"
|
||||
|
||||
static void hopper_membar_after_transfer(uvm_push_t *push)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
|
||||
return;
|
||||
|
||||
// TODO: [UVM-Volta] Remove Host WFI + Membar WAR for CE flush-only bug
|
||||
// http://nvbugs/1734761
|
||||
gpu->parent->host_hal->wait_for_idle(push);
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
else
|
||||
gpu->parent->host_hal->membar_sys(push);
|
||||
}
|
||||
|
||||
static NvU32 ce_aperture(uvm_aperture_t aperture)
|
||||
{
|
||||
BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB) !=
|
||||
@@ -78,45 +62,32 @@ void uvm_hal_hopper_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 of
|
||||
OFFSET_OUT_LOWER, HWVALUE(C8B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
|
||||
}
|
||||
|
||||
// Perform an appropriate membar before a semaphore operation. Returns whether
|
||||
// the semaphore operation should include a flush.
|
||||
static bool hopper_membar_before_semaphore(uvm_push_t *push)
|
||||
// Return the flush type and the flush enablement.
|
||||
static NvU32 hopper_get_flush_value(uvm_push_t *push)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
NvU32 flush_value;
|
||||
uvm_membar_t membar = uvm_push_get_and_reset_membar_flag(push);
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE)) {
|
||||
if (membar == UVM_MEMBAR_NONE) {
|
||||
// No MEMBAR requested, don't use a flush.
|
||||
return false;
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
|
||||
}
|
||||
else {
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
flush_value |= HWCONST(C8B5, LAUNCH_DMA, FLUSH_TYPE, GL);
|
||||
else
|
||||
flush_value |= HWCONST(C8B5, LAUNCH_DMA, FLUSH_TYPE, SYS);
|
||||
}
|
||||
|
||||
if (!uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU)) {
|
||||
// By default do a MEMBAR SYS and for that we can just use flush on the
|
||||
// semaphore operation.
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO: Bug 1734761: Remove the HOST WFI+membar WAR, i.e, perform the CE
|
||||
// flush when MEMBAR GPU is requested.
|
||||
gpu = uvm_push_get_gpu(push);
|
||||
gpu->parent->host_hal->wait_for_idle(push);
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
|
||||
return false;
|
||||
return flush_value;
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
NvU32 flush_value;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = hopper_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
else
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
|
||||
|
||||
NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
|
||||
SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
|
||||
@@ -124,7 +95,7 @@ void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 p
|
||||
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, flush_value |
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, hopper_get_flush_value(push) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE) |
|
||||
launch_dma_plc_mode);
|
||||
@@ -133,16 +104,7 @@ void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 p
|
||||
void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
NvU32 flush_value;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = hopper_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
else
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
|
||||
|
||||
NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
|
||||
SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
|
||||
@@ -150,7 +112,7 @@ void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, N
|
||||
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, flush_value |
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, hopper_get_flush_value(push) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_REDUCTION, INC) |
|
||||
@@ -162,16 +124,7 @@ void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, N
|
||||
void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
NvU32 flush_value;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = hopper_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
else
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
|
||||
|
||||
NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
|
||||
SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
|
||||
@@ -180,7 +133,7 @@ void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
gpu = uvm_push_get_gpu(push);
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, flush_value |
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, hopper_get_flush_value(push) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_FOUR_WORD_SEMAPHORE) |
|
||||
launch_dma_plc_mode);
|
||||
@@ -218,8 +171,9 @@ static void hopper_memset_common(uvm_push_t *push,
|
||||
NvU32 launch_dma_plc_mode;
|
||||
NvU32 launch_dma_remap_enable;
|
||||
NvU32 launch_dma_scrub_enable;
|
||||
NvU32 flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_validate(push, dst, memset_element_size),
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_is_valid(push, dst, memset_element_size),
|
||||
"Memset validation failed in channel %s, GPU %s",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(gpu));
|
||||
@@ -252,6 +206,10 @@ static void hopper_memset_common(uvm_push_t *push,
|
||||
do {
|
||||
NvU32 memset_this_time = (NvU32)min(num_elements, max_single_memset);
|
||||
|
||||
// In the last operation, a flush/membar may be issued after the memset.
|
||||
if (num_elements == memset_this_time)
|
||||
flush_value = hopper_get_flush_value(push);
|
||||
|
||||
gpu->parent->ce_hal->offset_out(push, dst.address);
|
||||
|
||||
NV_PUSH_1U(C8B5, LINE_LENGTH_IN, memset_this_time);
|
||||
@@ -260,7 +218,7 @@ static void hopper_memset_common(uvm_push_t *push,
|
||||
HWCONST(C8B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE) |
|
||||
flush_value |
|
||||
launch_dma_remap_enable |
|
||||
launch_dma_scrub_enable |
|
||||
launch_dma_dst_type |
|
||||
@@ -269,10 +227,8 @@ static void hopper_memset_common(uvm_push_t *push,
|
||||
|
||||
dst.address += memset_this_time * memset_element_size;
|
||||
num_elements -= memset_this_time;
|
||||
pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
|
||||
pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
|
||||
} while (num_elements > 0);
|
||||
|
||||
hopper_membar_after_transfer(push);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size)
|
||||
@@ -337,3 +293,16 @@ void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 v
|
||||
|
||||
hopper_memset_common(push, dst, size, 4);
|
||||
}
|
||||
|
||||
bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
{
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -34,31 +34,6 @@
|
||||
// the (out-of-tree) UVM driver from changes to the upstream Linux kernel.
|
||||
//
|
||||
|
||||
#if !defined(NV_ADDRESS_SPACE_INIT_ONCE_PRESENT)
|
||||
void address_space_init_once(struct address_space *mapping)
|
||||
{
|
||||
memset(mapping, 0, sizeof(*mapping));
|
||||
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
|
||||
|
||||
#if defined(NV_ADDRESS_SPACE_HAS_RWLOCK_TREE_LOCK)
|
||||
//
|
||||
// The .tree_lock member variable was changed from type rwlock_t, to
|
||||
// spinlock_t, on 25 July 2008, by mainline commit
|
||||
// 19fd6231279be3c3bdd02ed99f9b0eb195978064.
|
||||
//
|
||||
rwlock_init(&mapping->tree_lock);
|
||||
#else
|
||||
spin_lock_init(&mapping->tree_lock);
|
||||
#endif
|
||||
|
||||
spin_lock_init(&mapping->i_mmap_lock);
|
||||
INIT_LIST_HEAD(&mapping->private_list);
|
||||
spin_lock_init(&mapping->private_lock);
|
||||
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
|
||||
INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if UVM_CGROUP_ACCOUNTING_SUPPORTED()
|
||||
void uvm_memcg_context_start(uvm_memcg_context_t *context, struct mm_struct *mm)
|
||||
{
|
||||
|
||||
@@ -88,7 +88,7 @@
|
||||
|
||||
#include "nv-kthread-q.h"
|
||||
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1 && defined(NV_CPUMASK_OF_NODE_PRESENT)
|
||||
#if defined(NV_CPUMASK_OF_NODE_PRESENT)
|
||||
#define UVM_THREAD_AFFINITY_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_THREAD_AFFINITY_SUPPORTED() 0
|
||||
@@ -108,7 +108,7 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(CONFIG_HMM_MIRROR) && defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MAKE_DEVICE_EXCLUSIVE_RANGE_PRESENT)
|
||||
#if defined(CONFIG_HMM_MIRROR) && defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MIGRATE_DEVICE_RANGE_PRESENT)
|
||||
#define UVM_IS_CONFIG_HMM() 1
|
||||
#else
|
||||
#define UVM_IS_CONFIG_HMM() 0
|
||||
@@ -136,8 +136,8 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
#endif
|
||||
|
||||
// See bug 1707453 for further details about setting the minimum kernel version.
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
|
||||
# error This driver does not support kernels older than 2.6.32!
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
|
||||
# error This driver does not support kernels older than 3.10!
|
||||
#endif
|
||||
|
||||
#if !defined(VM_RESERVED)
|
||||
@@ -217,10 +217,6 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
|
||||
#define NV_UVM_GFP_FLAGS (GFP_KERNEL)
|
||||
|
||||
#if !defined(NV_ADDRESS_SPACE_INIT_ONCE_PRESENT)
|
||||
void address_space_init_once(struct address_space *mapping);
|
||||
#endif
|
||||
|
||||
// Develop builds define DEBUG but enable optimization
|
||||
#if defined(DEBUG) && !defined(NVIDIA_UVM_DEVELOP)
|
||||
// Wrappers for functions not building correctly without optimizations on,
|
||||
@@ -352,23 +348,6 @@ static inline NvU64 NV_GETTIME(void)
|
||||
(bit) = find_next_zero_bit((addr), (size), (bit) + 1))
|
||||
#endif
|
||||
|
||||
// bitmap_clear was added in 2.6.33 via commit c1a2a962a2ad103846e7950b4591471fabecece7
|
||||
#if !defined(NV_BITMAP_CLEAR_PRESENT)
|
||||
static inline void bitmap_clear(unsigned long *map, unsigned int start, int len)
|
||||
{
|
||||
unsigned int index = start;
|
||||
for_each_set_bit_from(index, map, start + len)
|
||||
__clear_bit(index, map);
|
||||
}
|
||||
|
||||
static inline void bitmap_set(unsigned long *map, unsigned int start, int len)
|
||||
{
|
||||
unsigned int index = start;
|
||||
for_each_clear_bit_from(index, map, start + len)
|
||||
__set_bit(index, map);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Added in 2.6.24
|
||||
#ifndef ACCESS_ONCE
|
||||
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
|
||||
@@ -425,6 +404,7 @@ static inline NvU64 NV_GETTIME(void)
|
||||
// 654672d4ba1a6001c365833be895f9477c4d5eab ("locking/atomics:
|
||||
// Add _{acquire|release|relaxed}() variants of some atomic operations") in v4.3
|
||||
// (2015-08-06).
|
||||
// TODO: Bug 3849079: We always use this definition on newer kernels.
|
||||
#ifndef atomic_read_acquire
|
||||
#define atomic_read_acquire(p) smp_load_acquire(&(p)->counter)
|
||||
#endif
|
||||
@@ -433,23 +413,30 @@ static inline NvU64 NV_GETTIME(void)
|
||||
#define atomic_set_release(p, v) smp_store_release(&(p)->counter, v)
|
||||
#endif
|
||||
|
||||
// atomic_long_read_acquire and atomic_long_set_release were added in commit
|
||||
// b5d47ef9ea5c5fe31d7eabeb79f697629bd9e2cb ("locking/atomics: Switch to
|
||||
// generated atomic-long") in v5.1 (2019-05-05).
|
||||
// TODO: Bug 3849079: We always use these definitions on newer kernels.
|
||||
#define atomic_long_read_acquire uvm_atomic_long_read_acquire
|
||||
static inline long uvm_atomic_long_read_acquire(atomic_long_t *p)
|
||||
{
|
||||
long val = atomic_long_read(p);
|
||||
smp_mb();
|
||||
return val;
|
||||
}
|
||||
|
||||
#define atomic_long_set_release uvm_atomic_long_set_release
|
||||
static inline void uvm_atomic_long_set_release(atomic_long_t *p, long v)
|
||||
{
|
||||
smp_mb();
|
||||
atomic_long_set(p, v);
|
||||
}
|
||||
|
||||
// Added in 3.11
|
||||
#ifndef PAGE_ALIGNED
|
||||
#define PAGE_ALIGNED(addr) (((addr) & (PAGE_SIZE - 1)) == 0)
|
||||
#endif
|
||||
|
||||
// Added in 2.6.37 via commit e1ca7788dec6773b1a2bce51b7141948f2b8bccf
|
||||
#if !defined(NV_VZALLOC_PRESENT)
|
||||
static inline void *vzalloc(unsigned long size)
|
||||
{
|
||||
void *p = vmalloc(size);
|
||||
if (p)
|
||||
memset(p, 0, size);
|
||||
return p;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Changed in 3.17 via commit 743162013d40ca612b4cb53d3a200dff2d9ab26e
|
||||
#if (NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT == 3)
|
||||
#define UVM_WAIT_ON_BIT_LOCK(word, bit, mode) \
|
||||
@@ -505,21 +492,6 @@ static bool radix_tree_empty(struct radix_tree_root *tree)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(NV_USLEEP_RANGE_PRESENT)
|
||||
static void __sched usleep_range(unsigned long min, unsigned long max)
|
||||
{
|
||||
unsigned min_msec = min / 1000;
|
||||
unsigned max_msec = max / 1000;
|
||||
|
||||
if (min_msec != 0)
|
||||
msleep(min_msec);
|
||||
else if (max_msec != 0)
|
||||
msleep(max_msec);
|
||||
else
|
||||
msleep(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
struct mem_cgroup *new_memcg;
|
||||
|
||||
@@ -334,7 +334,7 @@ bool __uvm_check_all_unlocked(uvm_thread_context_lock_t *uvm_context)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool __uvm_thread_check_all_unlocked()
|
||||
bool __uvm_thread_check_all_unlocked(void)
|
||||
{
|
||||
return __uvm_check_all_unlocked(uvm_thread_context_lock_get());
|
||||
}
|
||||
|
||||
@@ -75,26 +75,29 @@
|
||||
//
|
||||
// Protects:
|
||||
// - gpu->parent->isr.replayable_faults.service_lock:
|
||||
// Changes to the state of a GPU as it transitions from top-half to bottom-half
|
||||
// interrupt handler for replayable faults. This lock is acquired for that GPU,
|
||||
// in the ISR top-half. Then a bottom-half is scheduled (to run in a workqueue).
|
||||
// Then the bottom-half releases the lock when that GPU's processing appears to
|
||||
// be done.
|
||||
// Changes to the state of a GPU as it transitions from top-half to
|
||||
// bottom-half interrupt handler for replayable faults. This lock is
|
||||
// acquired for that GPU, in the ISR top-half. Then a bottom-half is
|
||||
// scheduled (to run in a workqueue). Then the bottom-half releases the
|
||||
// lock when that GPU's processing appears to be done.
|
||||
//
|
||||
// - gpu->parent->isr.non_replayable_faults.service_lock:
|
||||
// Changes to the state of a GPU in the bottom-half for non-replayable faults.
|
||||
// Non-replayable faults are handed-off from RM instead of directly from the GPU
|
||||
// hardware. This means that we do not keep receiving interrupts after RM pops
|
||||
// out the faults from the HW buffer. In order not to miss fault notifications,
|
||||
// we will always schedule a bottom-half for non-replayable faults if there are
|
||||
// faults ready to be consumed in the buffer, even if there already is some
|
||||
// bottom-half running or scheduled. This lock serializes all scheduled bottom
|
||||
// halves per GPU which service non-replayable faults.
|
||||
// Changes to the state of a GPU in the bottom-half for non-replayable
|
||||
// faults. Non-replayable faults are handed-off from RM instead of
|
||||
// directly from the GPU hardware. This means that we do not keep
|
||||
// receiving interrupts after RM pops out the faults from the HW buffer.
|
||||
// In order not to miss fault notifications, we will always schedule a
|
||||
// bottom-half for non-replayable faults if there are faults ready to be
|
||||
// consumed in the buffer, even if there already is some bottom-half
|
||||
// running or scheduled. This lock serializes all scheduled bottom halves
|
||||
// per GPU which service non-replayable faults.
|
||||
//
|
||||
// - gpu->parent->isr.access_counters.service_lock:
|
||||
// Changes to the state of a GPU as it transitions from top-half to bottom-half
|
||||
// interrupt handler for access counter notifications. This lock is acquired for
|
||||
// that GPU, in the ISR top-half. Then a bottom-half is scheduled (to run in a
|
||||
// workqueue). Then the bottom-half releases the lock when that GPU's processing
|
||||
// appears to be done.
|
||||
// Changes to the state of a GPU as it transitions from top-half to
|
||||
// bottom-half interrupt handler for access counter notifications. This
|
||||
// lock is acquired for that GPU, in the ISR top-half. Then a bottom-half
|
||||
// is scheduled (to run in a workqueue). Then the bottom-half releases
|
||||
// the lock when that GPU's processing appears to be done.
|
||||
//
|
||||
// - mmap_lock (mmap_sem in kernels < 5.8)
|
||||
// Order: UVM_LOCK_ORDER_MMAP_LOCK
|
||||
@@ -337,7 +340,11 @@
|
||||
//
|
||||
// - Channel lock
|
||||
// Order: UVM_LOCK_ORDER_CHANNEL
|
||||
// Spinlock (uvm_spinlock_t)
|
||||
// Spinlock (uvm_spinlock_t) or exclusive lock (mutex)
|
||||
//
|
||||
// Lock protecting the state of all the channels in a channel pool. The
|
||||
// channel pool lock documentation contains the guidelines about which lock
|
||||
// type (mutex or spinlock) to use.
|
||||
//
|
||||
// - Tools global VA space list lock (g_tools_va_space_list_lock)
|
||||
// Order: UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -106,6 +106,9 @@ static NV_STATUS uvm_pte_buffer_init(uvm_va_range_t *va_range,
|
||||
pte_buffer->mapping_info.formatType = map_rm_params->format_type;
|
||||
pte_buffer->mapping_info.elementBits = map_rm_params->element_bits;
|
||||
pte_buffer->mapping_info.compressionType = map_rm_params->compression_type;
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL)
|
||||
pte_buffer->mapping_info.mappingPageSize = page_size;
|
||||
|
||||
pte_buffer->page_size = page_size;
|
||||
pte_buffer->pte_size = uvm_mmu_pte_size(tree, page_size);
|
||||
num_all_ptes = uvm_div_pow2_64(length, page_size);
|
||||
@@ -341,9 +344,8 @@ static NV_STATUS map_rm_pt_range(uvm_page_tree_t *tree,
|
||||
static uvm_membar_t va_range_downgrade_membar(uvm_va_range_t *va_range, uvm_ext_gpu_map_t *ext_gpu_map)
|
||||
{
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_CHANNEL) {
|
||||
if (va_range->channel.aperture == UVM_APERTURE_VID)
|
||||
return UVM_MEMBAR_GPU;
|
||||
return UVM_MEMBAR_SYS;
|
||||
return uvm_hal_downgrade_membar_type(va_range->channel.gpu_va_space->gpu,
|
||||
va_range->channel.aperture == UVM_APERTURE_VID);
|
||||
}
|
||||
|
||||
// If there is no mem_handle, this is a sparse mapping.
|
||||
@@ -353,9 +355,8 @@ static uvm_membar_t va_range_downgrade_membar(uvm_va_range_t *va_range, uvm_ext_
|
||||
if (!ext_gpu_map->mem_handle)
|
||||
return UVM_MEMBAR_GPU;
|
||||
|
||||
if (ext_gpu_map->is_sysmem || ext_gpu_map->gpu != ext_gpu_map->owning_gpu)
|
||||
return UVM_MEMBAR_SYS;
|
||||
return UVM_MEMBAR_GPU;
|
||||
return uvm_hal_downgrade_membar_type(ext_gpu_map->gpu,
|
||||
!ext_gpu_map->is_sysmem && ext_gpu_map->gpu == ext_gpu_map->owning_gpu);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_va_range_map_rm_allocation(uvm_va_range_t *va_range,
|
||||
@@ -398,9 +399,7 @@ NV_STATUS uvm_va_range_map_rm_allocation(uvm_va_range_t *va_range,
|
||||
|
||||
page_tree = &gpu_va_space->page_tables;
|
||||
|
||||
// Verify that the GPU VA space supports this page size
|
||||
if ((mem_info->pageSize & page_tree->hal->page_sizes()) == 0)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
UVM_ASSERT(uvm_mmu_page_size_supported(page_tree, mem_info->pageSize));
|
||||
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL) {
|
||||
// We should be never called with ext_gpu_map == NULL
|
||||
@@ -414,13 +413,12 @@ NV_STATUS uvm_va_range_map_rm_allocation(uvm_va_range_t *va_range,
|
||||
pt_range_vec = &va_range->channel.pt_range_vec;
|
||||
}
|
||||
|
||||
if (!IS_ALIGNED(map_offset, mem_info->pageSize) ||
|
||||
map_offset + uvm_range_tree_node_size(node) > mem_info->size)
|
||||
if (map_offset + uvm_range_tree_node_size(node) > mem_info->size)
|
||||
return NV_ERR_INVALID_OFFSET;
|
||||
|
||||
// Consolidate input checks for API-level callers
|
||||
if (!IS_ALIGNED(node->start, mem_info->pageSize) || !IS_ALIGNED(node->end + 1, mem_info->pageSize))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
UVM_ASSERT(IS_ALIGNED(node->start, mem_info->pageSize) &&
|
||||
IS_ALIGNED(node->end + 1, mem_info->pageSize) &&
|
||||
IS_ALIGNED(map_offset, mem_info->pageSize));
|
||||
|
||||
status = uvm_pte_buffer_init(va_range,
|
||||
mapping_gpu,
|
||||
@@ -605,7 +603,7 @@ static NV_STATUS uvm_create_external_range(uvm_va_space_t *va_space, UVM_CREATE_
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
// The mm needs to be locked in order to remove stale HMM va_blocks.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
mm = uvm_va_space_mm_or_current_retain_lock(va_space);
|
||||
uvm_va_space_down_write(va_space);
|
||||
|
||||
// Create the new external VA range.
|
||||
@@ -619,7 +617,7 @@ static NV_STATUS uvm_create_external_range(uvm_va_space_t *va_space, UVM_CREATE_
|
||||
}
|
||||
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -636,6 +634,11 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
{
|
||||
uvm_gpu_t *owning_gpu;
|
||||
|
||||
if (!mem_info->deviceDescendant && !mem_info->sysmem) {
|
||||
ext_gpu_map->owning_gpu = NULL;
|
||||
ext_gpu_map->is_sysmem = false;
|
||||
return NV_OK;
|
||||
}
|
||||
// This is a local or peer allocation, so the owning GPU must have been
|
||||
// registered.
|
||||
owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
|
||||
@@ -840,6 +843,10 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
uvm_ext_gpu_map_t *ext_gpu_map = NULL;
|
||||
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
|
||||
UvmGpuMemoryInfo mem_info;
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, mapping_gpu);
|
||||
NvU32 mapping_page_size;
|
||||
NvU64 alignments;
|
||||
NvU32 smallest_alignment;
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_assert_rwsem_locked_read(&va_space->lock);
|
||||
@@ -910,12 +917,25 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
status = uvm_va_range_map_rm_allocation(va_range,
|
||||
mapping_gpu,
|
||||
&mem_info,
|
||||
map_rm_params,
|
||||
ext_gpu_map,
|
||||
out_tracker);
|
||||
// Determine the proper mapping page size.
|
||||
// This will be the largest supported page size less than or equal to the
|
||||
// smallest of the base VA address, length, offset, and allocation page size
|
||||
// alignments.
|
||||
alignments = mem_info.pageSize | base | length | map_rm_params->map_offset;
|
||||
smallest_alignment = alignments & ~(alignments - 1);
|
||||
|
||||
// Check that alignment bits did not get truncated.
|
||||
UVM_ASSERT(smallest_alignment);
|
||||
|
||||
mapping_page_size = uvm_mmu_biggest_page_size_up_to(&gpu_va_space->page_tables, smallest_alignment);
|
||||
if (!mapping_page_size) {
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
goto error;
|
||||
}
|
||||
|
||||
mem_info.pageSize = mapping_page_size;
|
||||
|
||||
status = uvm_va_range_map_rm_allocation(va_range, mapping_gpu, &mem_info, map_rm_params, ext_gpu_map, out_tracker);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2021-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -50,38 +50,12 @@ void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 o
|
||||
OFFSET_OUT_LOWER, HWVALUE(B0B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
|
||||
}
|
||||
|
||||
// Perform an appropriate membar before a semaphore operation. Returns whether
|
||||
// the semaphore operation should include a flush.
|
||||
static bool maxwell_membar_before_semaphore(uvm_push_t *push)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE)) {
|
||||
// No MEMBAR requested, don't use a flush.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU)) {
|
||||
// By default do a MEMBAR SYS and for that we can just use flush on the
|
||||
// semaphore operation.
|
||||
return true;
|
||||
}
|
||||
|
||||
// MEMBAR GPU requested, do it on the HOST and skip the CE flush as CE
|
||||
// doesn't have this capability.
|
||||
gpu = uvm_push_get_gpu(push);
|
||||
gpu->parent->host_hal->wait_for_idle(push);
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
NvU32 flush_value;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = maxwell_membar_before_semaphore(push);
|
||||
use_flush = uvm_hal_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
@@ -102,7 +76,7 @@ void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va,
|
||||
NvU32 flush_value;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = maxwell_membar_before_semaphore(push);
|
||||
use_flush = uvm_hal_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
@@ -126,7 +100,7 @@ void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
NvU32 flush_value;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = maxwell_membar_before_semaphore(push);
|
||||
use_flush = uvm_hal_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
@@ -221,10 +195,9 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
|
||||
NvU32 pipelined_value;
|
||||
NvU32 launch_dma_src_dst_type;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool first_operation = true;
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memcopy_validate(push, dst, src),
|
||||
"Memcopy validation failed in channel %s, GPU %s",
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memcopy_is_valid(push, dst, src),
|
||||
"Memcopy validation failed in channel %s, GPU %s.\n",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
@@ -233,14 +206,14 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
|
||||
launch_dma_src_dst_type = gpu->parent->ce_hal->phys_mode(push, dst, src);
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
|
||||
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
|
||||
else
|
||||
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
|
||||
|
||||
do {
|
||||
NvU32 copy_this_time = (NvU32)min(size, max_single_copy_size);
|
||||
|
||||
if (first_operation && uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
|
||||
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
|
||||
else
|
||||
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
|
||||
|
||||
gpu->parent->ce_hal->offset_in_out(push, src.address, dst.address);
|
||||
|
||||
NV_PUSH_1U(B0B5, LINE_LENGTH_IN, copy_this_time);
|
||||
@@ -255,10 +228,10 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
|
||||
launch_dma_plc_mode |
|
||||
pipelined_value);
|
||||
|
||||
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
|
||||
dst.address += copy_this_time;
|
||||
src.address += copy_this_time;
|
||||
size -= copy_this_time;
|
||||
first_operation = false;
|
||||
} while (size > 0);
|
||||
|
||||
maxwell_membar_after_transfer(push);
|
||||
@@ -266,11 +239,14 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
|
||||
|
||||
void uvm_hal_maxwell_ce_memcopy_v_to_v(uvm_push_t *push, NvU64 dst_va, NvU64 src_va, size_t size)
|
||||
{
|
||||
uvm_hal_maxwell_ce_memcopy(push, uvm_gpu_address_virtual(dst_va), uvm_gpu_address_virtual(src_va), size);
|
||||
uvm_push_get_gpu(push)->parent->ce_hal->memcopy(push,
|
||||
uvm_gpu_address_virtual(dst_va),
|
||||
uvm_gpu_address_virtual(src_va),
|
||||
size);
|
||||
}
|
||||
|
||||
// Push SET_DST_PHYS mode if needed and return LAUNCH_DMA_DST_TYPE flags
|
||||
static NvU32 memset_push_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst)
|
||||
static NvU32 maxwell_memset_push_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst)
|
||||
{
|
||||
if (dst.is_virtual)
|
||||
return HWCONST(B0B5, LAUNCH_DMA, DST_TYPE, VIRTUAL);
|
||||
@@ -290,12 +266,12 @@ static void memset_common(uvm_push_t *push, uvm_gpu_address_t dst, size_t size,
|
||||
NvU32 launch_dma_dst_type;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_validate(push, dst, memset_element_size),
|
||||
"Memset validation failed in channel %s, GPU %s",
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_is_valid(push, dst, memset_element_size),
|
||||
"Memset validation failed in channel %s, GPU %s.\n",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
launch_dma_dst_type = memset_push_phys_mode(push, dst);
|
||||
launch_dma_dst_type = maxwell_memset_push_phys_mode(push, dst);
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
|
||||
@@ -322,7 +298,7 @@ static void memset_common(uvm_push_t *push, uvm_gpu_address_t dst, size_t size,
|
||||
|
||||
dst.address += memset_this_time * memset_element_size;
|
||||
size -= memset_this_time;
|
||||
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
|
||||
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
|
||||
} while (size > 0);
|
||||
|
||||
maxwell_membar_after_transfer(push);
|
||||
@@ -373,5 +349,6 @@ void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64
|
||||
|
||||
void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size)
|
||||
{
|
||||
uvm_hal_maxwell_ce_memset_4(push, uvm_gpu_address_virtual(dst_va), value, size);
|
||||
uvm_push_get_gpu(push)->parent->ce_hal->memset_4(push, uvm_gpu_address_virtual(dst_va), value, size);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -455,7 +455,7 @@ static gfp_t sysmem_allocation_gfp_flags(int order, bool zero)
|
||||
//
|
||||
// In case of failure, the caller is required to handle cleanup by calling
|
||||
// uvm_mem_free
|
||||
static NV_STATUS mem_alloc_sysmem_dma_chunks(uvm_mem_t *mem, struct mm_struct *mm, gfp_t gfp_flags)
|
||||
static NV_STATUS mem_alloc_sysmem_dma_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
|
||||
{
|
||||
size_t i;
|
||||
NV_STATUS status;
|
||||
@@ -500,7 +500,7 @@ error:
|
||||
|
||||
// In case of failure, the caller is required to handle cleanup by calling
|
||||
// uvm_mem_free
|
||||
static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, struct mm_struct *mm, gfp_t gfp_flags)
|
||||
static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
|
||||
{
|
||||
size_t i;
|
||||
int order;
|
||||
@@ -523,7 +523,7 @@ static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, struct mm_struct *mm, g
|
||||
|
||||
// In case of failure, the caller is required to handle cleanup by calling
|
||||
// uvm_mem_free
|
||||
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
|
||||
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_protected)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
@@ -559,7 +559,7 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero)
|
||||
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero, bool is_protected)
|
||||
{
|
||||
if (uvm_mem_is_sysmem(mem)) {
|
||||
gfp_t gfp_flags;
|
||||
@@ -573,23 +573,15 @@ static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zer
|
||||
|
||||
uvm_memcg_context_start(&memcg_context, mm);
|
||||
if (uvm_mem_is_sysmem_dma(mem))
|
||||
status = mem_alloc_sysmem_dma_chunks(mem, mm, gfp_flags);
|
||||
status = mem_alloc_sysmem_dma_chunks(mem, gfp_flags);
|
||||
else
|
||||
status = mem_alloc_sysmem_chunks(mem, mm, gfp_flags);
|
||||
status = mem_alloc_sysmem_chunks(mem, gfp_flags);
|
||||
|
||||
uvm_memcg_context_end(&memcg_context);
|
||||
return status;
|
||||
}
|
||||
|
||||
return mem_alloc_vidmem_chunks(mem, zero);
|
||||
}
|
||||
|
||||
static const char *mem_physical_source(uvm_mem_t *mem)
|
||||
{
|
||||
if (uvm_mem_is_vidmem(mem))
|
||||
return uvm_gpu_name(mem->backing_gpu);
|
||||
|
||||
return "CPU";
|
||||
return mem_alloc_vidmem_chunks(mem, zero, is_protected);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_mem_map_kernel(uvm_mem_t *mem, const uvm_global_processor_mask_t *mask)
|
||||
@@ -617,7 +609,9 @@ NV_STATUS uvm_mem_map_kernel(uvm_mem_t *mem, const uvm_global_processor_mask_t *
|
||||
NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_out)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU64 physical_size;
|
||||
uvm_mem_t *mem = NULL;
|
||||
bool is_protected = false;
|
||||
|
||||
UVM_ASSERT(params->size > 0);
|
||||
|
||||
@@ -636,10 +630,10 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
|
||||
|
||||
UVM_ASSERT(mem->chunk_size > 0);
|
||||
|
||||
mem->physical_allocation_size = UVM_ALIGN_UP(mem->size, mem->chunk_size);
|
||||
mem->chunks_count = mem->physical_allocation_size / mem->chunk_size;
|
||||
physical_size = UVM_ALIGN_UP(mem->size, mem->chunk_size);
|
||||
mem->chunks_count = physical_size / mem->chunk_size;
|
||||
|
||||
status = mem_alloc_chunks(mem, params->mm, params->zero);
|
||||
status = mem_alloc_chunks(mem, params->mm, params->zero, is_protected);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
@@ -664,7 +658,7 @@ static NV_STATUS mem_init_user_mapping(uvm_mem_t *mem, uvm_va_space_t *user_va_s
|
||||
}
|
||||
|
||||
UVM_ASSERT(IS_ALIGNED((NvU64)user_addr, mem->chunk_size));
|
||||
UVM_ASSERT(mem->physical_allocation_size == mem->size);
|
||||
UVM_ASSERT(uvm_mem_physical_size(mem) == mem->size);
|
||||
|
||||
mem->user = uvm_kvmalloc_zero(sizeof(*mem->user));
|
||||
if (mem->user == NULL)
|
||||
@@ -690,7 +684,7 @@ static void mem_deinit_user_mapping(uvm_mem_t *mem)
|
||||
|
||||
static NvU64 reserved_gpu_va(uvm_mem_t *mem, uvm_gpu_t *gpu)
|
||||
{
|
||||
UVM_ASSERT(mem->kernel.range_alloc.aligned_start + mem->physical_allocation_size < gpu->parent->uvm_mem_va_size);
|
||||
UVM_ASSERT(mem->kernel.range_alloc.aligned_start + uvm_mem_physical_size(mem) < gpu->parent->uvm_mem_va_size);
|
||||
|
||||
return gpu->parent->uvm_mem_va_base + mem->kernel.range_alloc.aligned_start;
|
||||
}
|
||||
@@ -708,7 +702,7 @@ static struct page *mem_cpu_page(uvm_mem_t *mem, NvU64 offset)
|
||||
static NV_STATUS mem_map_cpu_to_sysmem_kernel(uvm_mem_t *mem)
|
||||
{
|
||||
struct page **pages = mem->sysmem.pages;
|
||||
size_t num_pages = mem->physical_allocation_size / PAGE_SIZE;
|
||||
size_t num_pages = uvm_mem_physical_size(mem) / PAGE_SIZE;
|
||||
pgprot_t prot = PAGE_KERNEL;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_sysmem(mem));
|
||||
@@ -742,7 +736,7 @@ static NV_STATUS mem_map_cpu_to_vidmem_kernel(uvm_mem_t *mem)
|
||||
{
|
||||
struct page **pages;
|
||||
size_t num_chunk_pages = mem->chunk_size / PAGE_SIZE;
|
||||
size_t num_pages = mem->physical_allocation_size / PAGE_SIZE;
|
||||
size_t num_pages = uvm_mem_physical_size(mem) / PAGE_SIZE;
|
||||
size_t page_index;
|
||||
size_t chunk_index;
|
||||
|
||||
@@ -797,7 +791,7 @@ static NV_STATUS mem_map_cpu_to_sysmem_user(uvm_mem_t *mem, struct vm_area_struc
|
||||
// compound pages in order to be able to use vm_insert_page on them. This
|
||||
// is not currently being exercised because the only allocations using this
|
||||
// are semaphore pools (which typically use a single page).
|
||||
for (offset = 0; offset < mem->physical_allocation_size; offset += PAGE_SIZE) {
|
||||
for (offset = 0; offset < uvm_mem_physical_size(mem); offset += PAGE_SIZE) {
|
||||
int ret = vm_insert_page(vma, (unsigned long)mem->user->addr + offset, mem_cpu_page(mem, offset));
|
||||
if (ret) {
|
||||
UVM_ASSERT_MSG(ret == -ENOMEM, "ret: %d\n", ret);
|
||||
@@ -809,7 +803,7 @@ static NV_STATUS mem_map_cpu_to_sysmem_user(uvm_mem_t *mem, struct vm_area_struc
|
||||
return NV_OK;
|
||||
|
||||
error:
|
||||
unmap_mapping_range(&mem->user->va_space->mapping, (size_t)mem->user->addr, mem->physical_allocation_size, 1);
|
||||
unmap_mapping_range(mem->user->va_space->mapping, (size_t)mem->user->addr, uvm_mem_physical_size(mem), 1);
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -818,7 +812,7 @@ void uvm_mem_unmap_cpu_user(uvm_mem_t *mem)
|
||||
if (!uvm_mem_mapped_on_cpu_user(mem))
|
||||
return;
|
||||
|
||||
unmap_mapping_range(&mem->user->va_space->mapping, (size_t)mem->user->addr, mem->physical_allocation_size, 1);
|
||||
unmap_mapping_range(mem->user->va_space->mapping, (size_t)mem->user->addr, uvm_mem_physical_size(mem), 1);
|
||||
mem_clear_mapped_on_cpu_user(mem);
|
||||
mem_deinit_user_mapping(mem);
|
||||
}
|
||||
@@ -893,7 +887,7 @@ static void sysmem_unmap_gpu_phys(uvm_mem_t *mem, uvm_gpu_t *gpu)
|
||||
// partial map_gpu_sysmem_iommu() operation.
|
||||
break;
|
||||
}
|
||||
uvm_gpu_unmap_cpu_pages(gpu, dma_addrs[i], mem->chunk_size);
|
||||
uvm_gpu_unmap_cpu_pages(gpu->parent, dma_addrs[i], mem->chunk_size);
|
||||
dma_addrs[i] = 0;
|
||||
}
|
||||
|
||||
@@ -914,7 +908,7 @@ static NV_STATUS sysmem_map_gpu_phys(uvm_mem_t *mem, uvm_gpu_t *gpu)
|
||||
return status;
|
||||
|
||||
for (i = 0; i < mem->chunks_count; ++i) {
|
||||
status = uvm_gpu_map_cpu_pages(gpu,
|
||||
status = uvm_gpu_map_cpu_pages(gpu->parent,
|
||||
mem->sysmem.pages[i],
|
||||
mem->chunk_size,
|
||||
&mem->sysmem.dma_addrs[uvm_global_id_gpu_index(gpu->global_id)][i]);
|
||||
@@ -958,21 +952,17 @@ static uvm_gpu_phys_address_t mem_gpu_physical_sysmem(uvm_mem_t *mem, uvm_gpu_t
|
||||
return uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_addr + offset % mem->chunk_size);
|
||||
}
|
||||
|
||||
static bool mem_check_range(uvm_mem_t *mem, NvU64 offset, NvU64 size)
|
||||
bool uvm_mem_is_physically_contiguous(uvm_mem_t *mem, NvU64 offset, NvU64 size)
|
||||
{
|
||||
UVM_ASSERT(size != 0);
|
||||
UVM_ASSERT_MSG(UVM_ALIGN_DOWN(offset, mem->chunk_size) == UVM_ALIGN_DOWN(offset + size - 1, mem->chunk_size),
|
||||
"offset %llu size %llu page_size %u\n",
|
||||
offset,
|
||||
size,
|
||||
mem->chunk_size);
|
||||
UVM_ASSERT_MSG(offset / mem->chunk_size < mem->chunks_count, "offset %llu\n", offset);
|
||||
return true;
|
||||
UVM_ASSERT((offset + size) <= uvm_mem_physical_size(mem));
|
||||
|
||||
return UVM_ALIGN_DOWN(offset, mem->chunk_size) == UVM_ALIGN_DOWN(offset + size - 1, mem->chunk_size);
|
||||
}
|
||||
|
||||
uvm_gpu_phys_address_t uvm_mem_gpu_physical(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU64 offset, NvU64 size)
|
||||
{
|
||||
UVM_ASSERT(mem_check_range(mem, offset, size));
|
||||
UVM_ASSERT(uvm_mem_is_physically_contiguous(mem, offset, size));
|
||||
|
||||
if (uvm_mem_is_vidmem(mem)) {
|
||||
UVM_ASSERT(uvm_mem_is_local_vidmem(mem, gpu));
|
||||
@@ -989,7 +979,7 @@ uvm_gpu_address_t uvm_mem_gpu_address_copy(uvm_mem_t *mem, uvm_gpu_t *accessing_
|
||||
size_t chunk_offset;
|
||||
uvm_gpu_chunk_t *chunk;
|
||||
|
||||
UVM_ASSERT(mem_check_range(mem, offset, size));
|
||||
UVM_ASSERT(uvm_mem_is_physically_contiguous(mem, offset, size));
|
||||
|
||||
if (uvm_mem_is_sysmem(mem) || uvm_mem_is_local_vidmem(mem, accessing_gpu))
|
||||
return uvm_mem_gpu_address_physical(mem, accessing_gpu, offset, size);
|
||||
@@ -1023,13 +1013,8 @@ static NvU64 mem_pte_maker(uvm_page_table_range_vec_t *range_vec, NvU64 offset,
|
||||
|
||||
static void mem_unmap_gpu(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_table_range_vec_t **range_vec)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_membar_t tlb_membar = UVM_MEMBAR_SYS;
|
||||
|
||||
if (uvm_mem_is_local_vidmem(mem, gpu))
|
||||
tlb_membar = UVM_MEMBAR_GPU;
|
||||
|
||||
status = uvm_page_table_range_vec_clear_ptes(*range_vec, tlb_membar);
|
||||
uvm_membar_t tlb_membar = uvm_hal_downgrade_membar_type(gpu, uvm_mem_is_local_vidmem(mem, gpu));
|
||||
NV_STATUS status = uvm_page_table_range_vec_clear_ptes(*range_vec, tlb_membar);
|
||||
if (status != NV_OK)
|
||||
UVM_ERR_PRINT("Clearing PTEs failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
|
||||
@@ -1053,22 +1038,19 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
|
||||
.attrs = attrs
|
||||
};
|
||||
|
||||
if (!uvm_gpu_can_address(gpu, gpu_va, mem->size))
|
||||
return NV_ERR_OUT_OF_RANGE;
|
||||
|
||||
page_size = mem_pick_gpu_page_size(mem, gpu, tree);
|
||||
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x\n", page_size);
|
||||
|
||||
status = uvm_page_table_range_vec_create(tree,
|
||||
gpu_va,
|
||||
mem->physical_allocation_size,
|
||||
uvm_mem_physical_size(mem),
|
||||
page_size,
|
||||
pmm_flags,
|
||||
range_vec);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init page mapping at [0x%llx, 0x%llx): %s, GPU %s\n",
|
||||
gpu_va,
|
||||
gpu_va + mem->physical_allocation_size,
|
||||
gpu_va + uvm_mem_physical_size(mem),
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu));
|
||||
return status;
|
||||
@@ -1078,7 +1060,7 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to write PTEs for mapping at [0x%llx, 0x%llx): %s, GPU %s\n",
|
||||
gpu_va,
|
||||
gpu_va + mem->physical_allocation_size,
|
||||
gpu_va + uvm_mem_physical_size(mem),
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu));
|
||||
goto error;
|
||||
@@ -1097,7 +1079,7 @@ static NV_STATUS mem_init_gpu_kernel_range(uvm_mem_t *mem)
|
||||
return NV_OK;
|
||||
|
||||
return uvm_range_allocator_alloc(&g_free_ranges,
|
||||
mem->physical_allocation_size,
|
||||
uvm_mem_physical_size(mem),
|
||||
mem->chunk_size,
|
||||
&mem->kernel.range_alloc);
|
||||
}
|
||||
@@ -1138,7 +1120,7 @@ NV_STATUS uvm_mem_map_gpu_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
gpu_va = uvm_parent_gpu_canonical_address(gpu->parent, reserved_gpu_va(mem, gpu));
|
||||
gpu_va = reserved_gpu_va(mem, gpu);
|
||||
range_vec = &mem->kernel.range_vecs[uvm_global_id_gpu_index(gpu->global_id)];
|
||||
|
||||
status = mem_map_gpu(mem, gpu, gpu_va, &gpu->address_space_tree, &attrs, range_vec);
|
||||
@@ -1164,6 +1146,7 @@ NV_STATUS uvm_mem_map_gpu_user(uvm_mem_t *mem,
|
||||
NV_STATUS status;
|
||||
uvm_gpu_va_space_t *gpu_va_space;
|
||||
uvm_page_table_range_vec_t **range_vec;
|
||||
NvU64 gpu_va;
|
||||
|
||||
UVM_ASSERT(mem_can_be_mapped_on_gpu_user(mem, gpu));
|
||||
uvm_assert_rwsem_locked(&user_va_space->lock);
|
||||
@@ -1171,6 +1154,10 @@ NV_STATUS uvm_mem_map_gpu_user(uvm_mem_t *mem,
|
||||
if (uvm_mem_mapped_on_gpu_user(mem, gpu))
|
||||
return NV_OK;
|
||||
|
||||
gpu_va = (NvU64)user_addr;
|
||||
if (!uvm_gpu_can_address(gpu, gpu_va, mem->size))
|
||||
return NV_ERR_OUT_OF_RANGE;
|
||||
|
||||
status = uvm_mem_map_gpu_phys(mem, gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@@ -1182,7 +1169,7 @@ NV_STATUS uvm_mem_map_gpu_user(uvm_mem_t *mem,
|
||||
gpu_va_space = uvm_gpu_va_space_get(mem->user->va_space, gpu);
|
||||
range_vec = &mem->user->range_vecs[uvm_global_id_gpu_index(gpu->global_id)];
|
||||
|
||||
status = mem_map_gpu(mem, gpu, (NvU64)mem->user->addr, &gpu_va_space->page_tables, attrs, range_vec);
|
||||
status = mem_map_gpu(mem, gpu, gpu_va, &gpu_va_space->page_tables, attrs, range_vec);
|
||||
if (status != NV_OK)
|
||||
goto cleanup;
|
||||
|
||||
|
||||
@@ -163,9 +163,6 @@ struct uvm_mem_struct
|
||||
|
||||
uvm_gpu_t *dma_owner;
|
||||
|
||||
// Size of the physical chunks.
|
||||
NvU32 chunk_size;
|
||||
|
||||
union
|
||||
{
|
||||
struct
|
||||
@@ -179,6 +176,8 @@ struct uvm_mem_struct
|
||||
//
|
||||
// There is no equivalent mask for vidmem, because only the backing
|
||||
// GPU can physical access the memory
|
||||
//
|
||||
// TODO: Bug 3723779: Share DMA mappings within a single parent GPU
|
||||
uvm_global_processor_mask_t mapped_on_phys;
|
||||
|
||||
struct page **pages;
|
||||
@@ -192,12 +191,12 @@ struct uvm_mem_struct
|
||||
// Count of chunks (vidmem) or CPU pages (sysmem) above
|
||||
size_t chunks_count;
|
||||
|
||||
// Size of each physical chunk (vidmem) or CPU page (sysmem)
|
||||
NvU32 chunk_size;
|
||||
|
||||
// Size of the allocation
|
||||
NvU64 size;
|
||||
|
||||
// Size of the physical allocation backing
|
||||
NvU64 physical_allocation_size;
|
||||
|
||||
uvm_mem_user_mapping_t *user;
|
||||
|
||||
// Information specific to allocations mapped in UVM internal VA space.
|
||||
@@ -233,6 +232,20 @@ NV_STATUS uvm_mem_translate_gpu_attributes(const UvmGpuMappingAttributes *attrs,
|
||||
|
||||
uvm_chunk_sizes_mask_t uvm_mem_kernel_chunk_sizes(uvm_gpu_t *gpu);
|
||||
|
||||
// Size of all the physical allocations backing the given memory.
|
||||
static inline NvU64 uvm_mem_physical_size(const uvm_mem_t *mem)
|
||||
{
|
||||
NvU64 physical_size = mem->chunks_count * mem->chunk_size;
|
||||
|
||||
UVM_ASSERT(mem->size <= physical_size);
|
||||
|
||||
return physical_size;
|
||||
}
|
||||
|
||||
// Returns true if the memory is physically contiguous in the
|
||||
// [offset, offset + size) interval.
|
||||
bool uvm_mem_is_physically_contiguous(uvm_mem_t *mem, NvU64 offset, NvU64 size);
|
||||
|
||||
// Allocate memory according to the given allocation parameters.
|
||||
//
|
||||
// In the case of sysmem, the memory is immediately physically accessible from
|
||||
|
||||
@@ -62,7 +62,7 @@ static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
|
||||
|
||||
verif_size = UVM_ALIGN_UP(verif_size, sizeof(*sys_verif));
|
||||
|
||||
UVM_ASSERT(mem->physical_allocation_size >= verif_size);
|
||||
UVM_ASSERT(uvm_mem_physical_size(mem) >= verif_size);
|
||||
UVM_ASSERT(verif_size >= sizeof(*sys_verif));
|
||||
|
||||
TEST_NV_CHECK_GOTO(__alloc_map_sysmem(verif_size, gpu, &sys_mem), done);
|
||||
@@ -185,7 +185,7 @@ static NV_STATUS test_map_gpu(uvm_mem_t *mem, uvm_gpu_t *gpu)
|
||||
|
||||
gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu);
|
||||
TEST_CHECK_RET(gpu_va >= gpu->parent->uvm_mem_va_base);
|
||||
TEST_CHECK_RET(gpu_va + mem->physical_allocation_size <= gpu->parent->uvm_mem_va_base + gpu->parent->uvm_mem_va_size);
|
||||
TEST_CHECK_RET(gpu_va + uvm_mem_physical_size(mem) <= gpu->parent->uvm_mem_va_base + gpu->parent->uvm_mem_va_size);
|
||||
|
||||
// Mapping if already mapped is OK
|
||||
TEST_NV_CHECK_RET(uvm_mem_map_gpu_kernel(mem, gpu));
|
||||
@@ -370,6 +370,7 @@ static NV_STATUS test_all(uvm_va_space_t *va_space)
|
||||
static const int max_supported_page_sizes = 4 + 1;
|
||||
int i;
|
||||
|
||||
|
||||
gpu_count = uvm_processor_mask_get_gpu_count(&va_space->registered_gpus);
|
||||
|
||||
// +1 for the CPU
|
||||
|
||||
@@ -86,7 +86,8 @@ static NV_STATUS block_migrate_map_mapped_pages(uvm_va_block_t *va_block,
|
||||
// Only map those pages that are not already mapped on destination
|
||||
for_each_va_block_unset_page_in_region_mask(page_index, pages_mapped_on_destination, region) {
|
||||
prot = uvm_va_block_page_compute_highest_permission(va_block, dest_id, page_index);
|
||||
UVM_ASSERT(prot != UVM_PROT_NONE);
|
||||
if (prot == UVM_PROT_NONE)
|
||||
continue;
|
||||
|
||||
if (va_block_context->mask_by_prot[prot - 1].count++ == 0)
|
||||
uvm_page_mask_zero(&va_block_context->mask_by_prot[prot - 1].page_mask);
|
||||
@@ -206,26 +207,39 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
NV_STATUS status, tracker_status = NV_OK;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context, region));
|
||||
|
||||
if (uvm_va_policy_is_read_duplicate(va_block_context->policy, va_space)) {
|
||||
status = uvm_va_block_make_resident_read_duplicate(va_block,
|
||||
va_block_retry,
|
||||
va_block_context,
|
||||
dest_id,
|
||||
region,
|
||||
NULL,
|
||||
NULL,
|
||||
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE);
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
status = uvm_hmm_va_block_migrate_locked(va_block,
|
||||
va_block_retry,
|
||||
va_block_context,
|
||||
dest_id,
|
||||
region,
|
||||
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE);
|
||||
}
|
||||
else {
|
||||
status = uvm_va_block_make_resident(va_block,
|
||||
va_block_retry,
|
||||
va_block_context,
|
||||
dest_id,
|
||||
region,
|
||||
NULL,
|
||||
NULL,
|
||||
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE);
|
||||
va_block_context->policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
|
||||
if (uvm_va_policy_is_read_duplicate(va_block_context->policy, va_space)) {
|
||||
status = uvm_va_block_make_resident_read_duplicate(va_block,
|
||||
va_block_retry,
|
||||
va_block_context,
|
||||
dest_id,
|
||||
region,
|
||||
NULL,
|
||||
NULL,
|
||||
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE);
|
||||
}
|
||||
else {
|
||||
status = uvm_va_block_make_resident(va_block,
|
||||
va_block_retry,
|
||||
va_block_context,
|
||||
dest_id,
|
||||
region,
|
||||
NULL,
|
||||
NULL,
|
||||
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE);
|
||||
}
|
||||
}
|
||||
|
||||
if (status == NV_OK && mode == UVM_MIGRATE_MODE_MAKE_RESIDENT_AND_MAP) {
|
||||
@@ -314,7 +328,8 @@ static bool migration_should_do_cpu_preunmap(uvm_va_space_t *va_space,
|
||||
// read-duplication is enabled in the VA range. This is because, when migrating
|
||||
// read-duplicated VA blocks, the source processor doesn't need to be unmapped
|
||||
// (though it may need write access revoked).
|
||||
static bool va_range_should_do_cpu_preunmap(uvm_va_policy_t *policy, uvm_va_space_t *va_space)
|
||||
static bool va_range_should_do_cpu_preunmap(const uvm_va_policy_t *policy,
|
||||
uvm_va_space_t *va_space)
|
||||
{
|
||||
return !uvm_va_policy_is_read_duplicate(policy, va_space);
|
||||
}
|
||||
@@ -404,7 +419,7 @@ static void preunmap_multi_block(uvm_va_range_t *va_range,
|
||||
}
|
||||
|
||||
if (num_unmap_pages > 0)
|
||||
unmap_mapping_range(&va_range->va_space->mapping, start, end - start + 1, 1);
|
||||
unmap_mapping_range(va_range->va_space->mapping, start, end - start + 1, 1);
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_va_range_migrate_multi_block(uvm_va_range_t *va_range,
|
||||
@@ -466,6 +481,8 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
||||
{
|
||||
NvU64 preunmap_range_start = start;
|
||||
|
||||
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_range));
|
||||
|
||||
should_do_cpu_preunmap = should_do_cpu_preunmap && va_range_should_do_cpu_preunmap(va_block_context->policy,
|
||||
va_range->va_space);
|
||||
|
||||
@@ -520,6 +537,17 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
NV_STATUS status = NV_OK;
|
||||
bool skipped_migrate = false;
|
||||
|
||||
if (!first_va_range) {
|
||||
// For HMM, we iterate over va_blocks since there is no va_range.
|
||||
return uvm_hmm_migrate_ranges(va_space,
|
||||
va_block_context,
|
||||
base,
|
||||
length,
|
||||
dest_id,
|
||||
mode,
|
||||
out_tracker);
|
||||
}
|
||||
|
||||
UVM_ASSERT(first_va_range == uvm_va_space_iter_first(va_space, base, base));
|
||||
|
||||
va_range_last = NULL;
|
||||
@@ -590,10 +618,10 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
NvU64 length,
|
||||
uvm_processor_id_t dest_id,
|
||||
NvU32 migrate_flags,
|
||||
uvm_va_range_t *first_va_range,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_range_t *first_va_range = uvm_va_space_iter_first(va_space, base, base);
|
||||
uvm_va_block_context_t *va_block_context;
|
||||
bool do_mappings;
|
||||
bool do_two_passes;
|
||||
@@ -602,9 +630,6 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
if (!first_va_range || first_va_range->type != UVM_VA_RANGE_TYPE_MANAGED)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
// If the GPU has its memory disabled, just skip the migration and let
|
||||
// faults take care of things.
|
||||
if (!uvm_va_space_processor_has_memory(va_space, dest_id))
|
||||
@@ -612,6 +637,7 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
|
||||
if (mm)
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
|
||||
va_block_context = uvm_va_block_context_alloc(mm);
|
||||
if (!va_block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
@@ -634,7 +660,9 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
// 1- Transfer all VA blocks (do not add mappings)
|
||||
// 2- Go block by block reexecuting the transfer (in case someone moved it
|
||||
// since the first pass), and adding the mappings.
|
||||
is_single_block = is_migration_single_block(first_va_range, base, length);
|
||||
//
|
||||
// For HMM (!first_va_range), we always do a single pass.
|
||||
is_single_block = !first_va_range || is_migration_single_block(first_va_range, base, length);
|
||||
do_mappings = UVM_ID_IS_GPU(dest_id) || !(migrate_flags & UVM_MIGRATE_FLAG_SKIP_CPU_MAP);
|
||||
do_two_passes = do_mappings && !is_single_block;
|
||||
|
||||
@@ -791,7 +819,7 @@ static NV_STATUS semaphore_release(NvU64 semaphore_address,
|
||||
return semaphore_release_from_gpu(gpu, semaphore_pool, semaphore_address, semaphore_payload, tracker_ptr);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_migrate_init()
|
||||
NV_STATUS uvm_migrate_init(void)
|
||||
{
|
||||
NV_STATUS status = uvm_migrate_pageable_init();
|
||||
if (status != NV_OK)
|
||||
@@ -817,7 +845,7 @@ NV_STATUS uvm_migrate_init()
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_migrate_exit()
|
||||
void uvm_migrate_exit(void)
|
||||
{
|
||||
uvm_migrate_pageable_exit();
|
||||
}
|
||||
@@ -850,6 +878,7 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
|
||||
|
||||
if ((params->flags & UVM_MIGRATE_FLAGS_TEST_ALL) && !uvm_enable_builtin_tests) {
|
||||
UVM_INFO_PRINT("Test flag set for UVM_MIGRATE. Did you mean to insmod with uvm_enable_builtin_tests=1?\n");
|
||||
UVM_INFO_PRINT("TEMP\n");
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
@@ -912,6 +941,9 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
|
||||
params->length,
|
||||
(dest_gpu ? dest_gpu->id : UVM_ID_CPU),
|
||||
params->flags,
|
||||
uvm_va_space_iter_first(va_space,
|
||||
params->base,
|
||||
params->base),
|
||||
tracker_ptr);
|
||||
}
|
||||
else if (status == NV_WARN_NOTHING_TO_DO) {
|
||||
@@ -942,10 +974,8 @@ done:
|
||||
// benchmarks to see if a two-pass approach would be faster (first
|
||||
// pass pushes all GPU work asynchronously, second pass updates CPU
|
||||
// mappings synchronously).
|
||||
if (mm) {
|
||||
if (mm)
|
||||
uvm_up_read_mmap_lock_out_of_order(mm);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
}
|
||||
|
||||
if (tracker_ptr) {
|
||||
// If requested, release semaphore
|
||||
@@ -973,6 +1003,7 @@ done:
|
||||
}
|
||||
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
|
||||
// If the migration is known to be complete, eagerly dispatch the migration
|
||||
// events, instead of processing them on a later event flush. Note that an
|
||||
@@ -1026,10 +1057,26 @@ NV_STATUS uvm_api_migrate_range_group(UVM_MIGRATE_RANGE_GROUP_PARAMS *params, st
|
||||
NvU64 start = rgr->node.start;
|
||||
NvU64 length = rgr->node.end - rgr->node.start + 1;
|
||||
|
||||
if (gpu && !uvm_gpu_can_address(gpu, start, length))
|
||||
if (gpu && !uvm_gpu_can_address(gpu, start, length)) {
|
||||
status = NV_ERR_OUT_OF_RANGE;
|
||||
else
|
||||
status = uvm_migrate(va_space, mm, start, length, dest_id, migrate_flags, &local_tracker);
|
||||
}
|
||||
else {
|
||||
uvm_va_range_t *first_va_range = uvm_va_space_iter_first(va_space, start, start);
|
||||
|
||||
if (!first_va_range || first_va_range->type != UVM_VA_RANGE_TYPE_MANAGED) {
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
goto done;
|
||||
}
|
||||
|
||||
status = uvm_migrate(va_space,
|
||||
mm,
|
||||
start,
|
||||
length,
|
||||
dest_id,
|
||||
migrate_flags,
|
||||
first_va_range,
|
||||
&local_tracker);
|
||||
}
|
||||
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
@@ -1043,13 +1090,12 @@ done:
|
||||
// benchmarks to see if a two-pass approach would be faster (first
|
||||
// pass pushes all GPU work asynchronously, second pass updates CPU
|
||||
// mappings synchronously).
|
||||
if (mm) {
|
||||
if (mm)
|
||||
uvm_up_read_mmap_lock_out_of_order(mm);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
}
|
||||
|
||||
tracker_status = uvm_tracker_wait_deinit(&local_tracker);
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
|
||||
// This API is synchronous, so wait for migrations to finish
|
||||
uvm_tools_flush_events();
|
||||
|
||||
@@ -74,7 +74,7 @@ static NV_STATUS migrate_vma_page_copy_address(struct page *page,
|
||||
}
|
||||
else {
|
||||
// Sysmem/Indirect Peer
|
||||
NV_STATUS status = uvm_gpu_map_cpu_page(copying_gpu, page, &state->dma.addrs[page_index]);
|
||||
NV_STATUS status = uvm_gpu_map_cpu_page(copying_gpu->parent, page, &state->dma.addrs[page_index]);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@@ -628,7 +628,7 @@ void uvm_migrate_vma_finalize_and_map(struct migrate_vma *args, migrate_vma_stat
|
||||
if (state->dma.num_pages > 0) {
|
||||
|
||||
for_each_set_bit(i, state->dma.page_mask, state->num_pages)
|
||||
uvm_gpu_unmap_cpu_page(state->dma.addrs_gpus[i], state->dma.addrs[i]);
|
||||
uvm_gpu_unmap_cpu_page(state->dma.addrs_gpus[i]->parent, state->dma.addrs[i]);
|
||||
}
|
||||
|
||||
UVM_ASSERT(!bitmap_intersects(state->populate_pages_mask, state->allocation_failed_mask, state->num_pages));
|
||||
@@ -1001,7 +1001,7 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_migrate_pageable_init()
|
||||
NV_STATUS uvm_migrate_pageable_init(void)
|
||||
{
|
||||
g_uvm_migrate_vma_state_cache = NV_KMEM_CACHE_CREATE("migrate_vma_state_t", migrate_vma_state_t);
|
||||
if (!g_uvm_migrate_vma_state_cache)
|
||||
@@ -1010,7 +1010,7 @@ NV_STATUS uvm_migrate_pageable_init()
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_migrate_pageable_exit()
|
||||
void uvm_migrate_pageable_exit(void)
|
||||
{
|
||||
kmem_cache_destroy_safe(&g_uvm_migrate_vma_state_cache);
|
||||
}
|
||||
|
||||
@@ -34,8 +34,8 @@ typedef struct
|
||||
{
|
||||
uvm_va_space_t *va_space;
|
||||
struct mm_struct *mm;
|
||||
const unsigned long start;
|
||||
const unsigned long length;
|
||||
unsigned long start;
|
||||
unsigned long length;
|
||||
uvm_processor_id_t dst_id;
|
||||
|
||||
// dst_node_id may be clobbered by uvm_migrate_pageable().
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user