mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-27 11:39:46 +00:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c7d7d95a5a | ||
|
|
dfd3023173 | ||
|
|
17546dbdda | ||
|
|
18b7303c54 | ||
|
|
eb5c7665a1 | ||
|
|
6dd092ddb7 |
42
CHANGELOG.md
42
CHANGELOG.md
@@ -1,15 +1,53 @@
|
||||
# Changelog
|
||||
|
||||
## Release 535 Entries
|
||||
|
||||
### [535.43.11] 2023-10-05
|
||||
|
||||
### [535.43.10] 2023-09-27
|
||||
|
||||
### [535.43.09] 2023-09-01
|
||||
|
||||
### [535.43.08] 2023-08-17
|
||||
|
||||
### [535.43.02] 2023-05-30
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed console restore with traditional VGA consoles.
|
||||
|
||||
#### Added
|
||||
|
||||
- Added support for Run Time D3 (RTD3) on Ampere and later GPUs.
|
||||
- Added support for G-Sync on desktop GPUs.
|
||||
|
||||
## Release 530 Entries
|
||||
|
||||
|
||||
### [530.41.03] 2023-03-23
|
||||
|
||||
### [530.30.02] 2023-02-28
|
||||
|
||||
|
||||
#### Changed
|
||||
|
||||
- GSP firmware is now distributed as `gsp_tu10x.bin` and `gsp_ga10x.bin` to better reflect the GPU architectures supported by each firmware file in this release.
|
||||
- The .run installer will continue to install firmware to /lib/firmware/nvidia/<version> and the nvidia.ko kernel module will load the appropriate firmware for each GPU at runtime.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Add support for resizable BAR on Linux when NVreg_EnableResizableBar=1 module param is set. [#3](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/3) by @sjkelly
|
||||
|
||||
#### Added
|
||||
|
||||
- Support for power management features like Suspend, Hibernate and Resume.
|
||||
|
||||
## Release 525 Entries
|
||||
|
||||
### [525.116.04] 2023-05-09
|
||||
|
||||
### [525.116.03] 2023-04-25
|
||||
|
||||
### [525.105.17] 2023-03-30
|
||||
|
||||
### [525.89.02] 2023-02-08
|
||||
|
||||
### [525.85.12] 2023-01-30
|
||||
|
||||
69
README.md
69
README.md
@@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 530.30.02.
|
||||
version 535.43.11.
|
||||
|
||||
|
||||
## How to Build
|
||||
@@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
530.30.02 driver release. This can be achieved by installing
|
||||
535.43.11 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@@ -162,12 +162,25 @@ for the target kernel.
|
||||
- `src/nvidia/` The OS-agnostic code for nvidia.ko
|
||||
- `src/nvidia-modeset/` The OS-agnostic code for nvidia-modeset.ko
|
||||
- `src/common/` Utility code used by one or more of nvidia.ko and nvidia-modeset.ko
|
||||
- `nouveau/` Tools for integration with the Nouveau device driver
|
||||
|
||||
|
||||
## Nouveau device driver integration
|
||||
|
||||
The Python script in the 'nouveau' directory is used to extract some of the
|
||||
firmware binary images (and related data) encoded in the source code and
|
||||
store them as distinct files. These files are used by the Nouveau device
|
||||
driver to load and communicate with the GSP firmware.
|
||||
|
||||
The layout of the binary files is described in nouveau_firmware_layout.ods,
|
||||
which is an OpenDocument Spreadsheet file, compatible with most spreadsheet
|
||||
software applications.
|
||||
|
||||
|
||||
## Compatible GPUs
|
||||
|
||||
The open-gpu-kernel-modules can be used on any Turing or later GPU
|
||||
(see the table below). However, in the 530.30.02 release,
|
||||
(see the table below). However, in the 535.43.11 release,
|
||||
GeForce and Workstation support is still considered alpha-quality.
|
||||
|
||||
To enable use of the open kernel modules on GeForce and Workstation GPUs,
|
||||
@@ -175,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
|
||||
parameter to 1. For more details, see the NVIDIA GPU driver end user
|
||||
README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/530.30.02/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.43.11/README/kernel_open.html
|
||||
|
||||
In the below table, if three IDs are listed, the first is the PCI Device
|
||||
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
|
||||
@@ -651,6 +664,8 @@ Subsystem Device ID.
|
||||
| NVIDIA A100 80GB PCIe | 20B5 10DE 1642 |
|
||||
| NVIDIA PG506-232 | 20B6 10DE 1492 |
|
||||
| NVIDIA A30 | 20B7 10DE 1532 |
|
||||
| NVIDIA A30 | 20B7 10DE 1804 |
|
||||
| NVIDIA A800-SXM4-40GB | 20BD 10DE 17F4 |
|
||||
| NVIDIA A100-PCIE-40GB | 20F1 10DE 145F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179C |
|
||||
@@ -662,6 +677,10 @@ Subsystem Device ID.
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A2 |
|
||||
| NVIDIA A800 80GB PCIe | 20F5 10DE 1799 |
|
||||
| NVIDIA A800 80GB PCIe LC | 20F5 10DE 179A |
|
||||
| NVIDIA A800 40GB Active | 20F6 1028 180A |
|
||||
| NVIDIA A800 40GB Active | 20F6 103C 180A |
|
||||
| NVIDIA A800 40GB Active | 20F6 10DE 180A |
|
||||
| NVIDIA A800 40GB Active | 20F6 17AA 180A |
|
||||
| NVIDIA GeForce GTX 1660 Ti | 2182 |
|
||||
| NVIDIA GeForce GTX 1660 | 2184 |
|
||||
| NVIDIA GeForce GTX 1650 SUPER | 2187 |
|
||||
@@ -720,9 +739,15 @@ Subsystem Device ID.
|
||||
| NVIDIA A10 | 2236 10DE 1482 |
|
||||
| NVIDIA A10G | 2237 10DE 152F |
|
||||
| NVIDIA A10M | 2238 10DE 1677 |
|
||||
| NVIDIA H100 NVL | 2321 10DE 1839 |
|
||||
| NVIDIA H800 PCIe | 2322 10DE 17A4 |
|
||||
| NVIDIA H800 | 2324 10DE 17A6 |
|
||||
| NVIDIA H800 | 2324 10DE 17A8 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
|
||||
| NVIDIA H100 PCIe | 2331 10DE 1626 |
|
||||
| NVIDIA H100 | 2339 10DE 17FC |
|
||||
| NVIDIA H800 NVL | 233A 10DE 183A |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 2414 |
|
||||
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
|
||||
| NVIDIA RTX A5500 Laptop GPU | 2438 |
|
||||
@@ -750,6 +775,7 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX A3000 12GB Laptop GPU | 24B9 |
|
||||
| NVIDIA RTX A4500 Laptop GPU | 24BA |
|
||||
| NVIDIA RTX A3000 12GB Laptop GPU | 24BB |
|
||||
| NVIDIA GeForce RTX 3060 | 24C7 |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 24C9 |
|
||||
| NVIDIA GeForce RTX 3080 Laptop GPU | 24DC |
|
||||
| NVIDIA GeForce RTX 3070 Laptop GPU | 24DD |
|
||||
@@ -795,6 +821,8 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX A1000 Laptop GPU | 25B9 |
|
||||
| NVIDIA RTX A2000 8GB Laptop GPU | 25BA |
|
||||
| NVIDIA RTX A500 Laptop GPU | 25BB |
|
||||
| NVIDIA RTX A1000 6GB Laptop GPU | 25BC |
|
||||
| NVIDIA RTX A500 Laptop GPU | 25BD |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25E0 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E2 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E5 |
|
||||
@@ -808,16 +836,49 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 17AA 16A1 |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 1028 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 103C 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 10DE 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 17AA 17FA |
|
||||
| NVIDIA L40 | 26B5 10DE 169D |
|
||||
| NVIDIA L40 | 26B5 10DE 17DA |
|
||||
| NVIDIA L40S | 26B9 10DE 1851 |
|
||||
| NVIDIA L40S | 26B9 10DE 18CF |
|
||||
| NVIDIA GeForce RTX 4080 | 2704 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
|
||||
| NVIDIA RTX 5000 Ada Generation Laptop GPU | 2730 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2757 |
|
||||
| NVIDIA RTX 5000 Ada Generation Embedded GPU | 2770 |
|
||||
| NVIDIA GeForce RTX 4070 Ti | 2782 |
|
||||
| NVIDIA GeForce RTX 4070 | 2786 |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27A0 |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 1028 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 10DE 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 17AA 16FA |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 1028 180C |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 103C 180C |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 10DE 180C |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 17AA 180C |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 1028 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 103C 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 10DE 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 17AA 181B |
|
||||
| NVIDIA L4 | 27B8 10DE 16CA |
|
||||
| NVIDIA L4 | 27B8 10DE 16EE |
|
||||
| NVIDIA RTX 4000 Ada Generation Laptop GPU | 27BA |
|
||||
| NVIDIA RTX 3500 Ada Generation Laptop GPU | 27BB |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27E0 |
|
||||
| NVIDIA RTX 3500 Ada Generation Embedded GPU | 27FB |
|
||||
| NVIDIA GeForce RTX 4060 Ti | 2803 |
|
||||
| NVIDIA GeForce RTX 4060 Ti | 2805 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2820 |
|
||||
| NVIDIA RTX 3000 Ada Generation Laptop GPU | 2838 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2860 |
|
||||
| NVIDIA GeForce RTX 4060 | 2882 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
|
||||
| NVIDIA RTX 2000 Ada Generation Laptop GPU | 28B8 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
|
||||
| NVIDIA RTX 2000 Ada Generation Embedded GPU | 28F8 |
|
||||
|
||||
@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"530.30.02\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.43.11\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
@@ -91,7 +91,8 @@ ifeq ($(ARCH),arm64)
|
||||
endif
|
||||
|
||||
ifeq ($(NV_BUILD_TYPE),debug)
|
||||
EXTRA_CFLAGS += -g -gsplit-dwarf
|
||||
EXTRA_CFLAGS += -g
|
||||
EXTRA_CFLAGS += $(call cc-option,-gsplit-dwarf,)
|
||||
endif
|
||||
|
||||
EXTRA_CFLAGS += -ffreestanding
|
||||
@@ -216,8 +217,10 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_auth.h \
|
||||
drm/drm_gem.h \
|
||||
drm/drm_crtc.h \
|
||||
drm/drm_color_mgmt.h \
|
||||
drm/drm_atomic.h \
|
||||
drm/drm_atomic_helper.h \
|
||||
drm/drm_atomic_state_helper.h \
|
||||
drm/drm_encoder.h \
|
||||
drm/drm_atomic_uapi.h \
|
||||
drm/drm_drv.h \
|
||||
@@ -241,7 +244,6 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/efi.h \
|
||||
linux/kconfig.h \
|
||||
linux/platform/tegra/mc_utils.h \
|
||||
linux/semaphore.h \
|
||||
linux/printk.h \
|
||||
linux/ratelimit.h \
|
||||
linux/prio_tree.h \
|
||||
@@ -273,6 +275,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
asm/opal-api.h \
|
||||
sound/hdaudio.h \
|
||||
asm/pgtable_types.h \
|
||||
asm/page.h \
|
||||
linux/stringhash.h \
|
||||
linux/dma-map-ops.h \
|
||||
rdma/peer_mem.h \
|
||||
@@ -296,8 +299,11 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/iosys-map.h \
|
||||
asm/coco.h \
|
||||
linux/vfio_pci_core.h \
|
||||
linux/mdev.h \
|
||||
soc/tegra/bpmp-abi.h \
|
||||
soc/tegra/bpmp.h
|
||||
soc/tegra/bpmp.h \
|
||||
linux/cc_platform.h \
|
||||
asm/cpufeature.h
|
||||
|
||||
# Filename to store the define for the header in $(1); this is only consumed by
|
||||
# the rule below that concatenates all of these together.
|
||||
|
||||
83
kernel-open/common/inc/nv-firmware-registry.h
Normal file
83
kernel-open/common/inc/nv-firmware-registry.h
Normal file
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
//
|
||||
// This file holds GPU firmware related registry key definitions that are
|
||||
// shared between Windows and Unix
|
||||
//
|
||||
|
||||
#ifndef NV_FIRMWARE_REGISTRY_H
|
||||
#define NV_FIRMWARE_REGISTRY_H
|
||||
|
||||
//
|
||||
// Registry key that when enabled, will enable use of GPU firmware.
|
||||
//
|
||||
// Possible mode values:
|
||||
// 0 - Do not enable GPU firmware
|
||||
// 1 - Enable GPU firmware
|
||||
// 2 - (Default) Use the default enablement policy for GPU firmware
|
||||
//
|
||||
// Setting this to anything other than 2 will alter driver firmware-
|
||||
// enablement policies, possibly disabling GPU firmware where it would
|
||||
// have otherwise been enabled by default.
|
||||
//
|
||||
// Policy bits:
|
||||
//
|
||||
// POLICY_ALLOW_FALLBACK:
|
||||
// As the normal behavior is to fail GPU initialization if this registry
|
||||
// entry is set in such a way that results in an invalid configuration, if
|
||||
// instead the user would like the driver to automatically try to fallback
|
||||
// to initializing the failing GPU with firmware disabled, then this bit can
|
||||
// be set (ex: 0x11 means try to enable GPU firmware but fall back if needed).
|
||||
// Note that this can result in a mixed mode configuration (ex: GPU0 has
|
||||
// firmware enabled, but GPU1 does not).
|
||||
//
|
||||
#define NV_REG_STR_ENABLE_GPU_FIRMWARE "EnableGpuFirmware"
|
||||
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_MASK 0x0000000F
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_DISABLED 0x00000000
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_ENABLED 0x00000001
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_DEFAULT 0x00000002
|
||||
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_POLICY_MASK 0x000000F0
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_POLICY_ALLOW_FALLBACK 0x00000010
|
||||
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_DEFAULT_VALUE 0x00000012
|
||||
|
||||
//
|
||||
// Registry key that when enabled, will send GPU firmware logs
|
||||
// to the system log, when possible.
|
||||
//
|
||||
// Possible values:
|
||||
// 0 - Do not send GPU firmware logs to the system log
|
||||
// 1 - Enable sending of GPU firmware logs to the system log
|
||||
// 2 - (Default) Enable sending of GPU firmware logs to the system log for
|
||||
// the debug kernel driver build only
|
||||
//
|
||||
#define NV_REG_STR_ENABLE_GPU_FIRMWARE_LOGS "EnableGpuFirmwareLogs"
|
||||
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_LOGS_DISABLE 0x00000000
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_LOGS_ENABLE 0x00000001
|
||||
#define NV_REG_ENABLE_GPU_FIRMWARE_LOGS_ENABLE_ON_DEBUG 0x00000002
|
||||
|
||||
#endif // NV_FIRMWARE_REGISTRY_H
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -81,11 +81,11 @@ static inline const char *nv_firmware_path(
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_ga10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
@@ -100,11 +100,11 @@ static inline const char *nv_firmware_path(
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_ga10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
|
||||
@@ -41,6 +41,7 @@ typedef enum _HYPERVISOR_TYPE
|
||||
#define CMD_VGPU_VFIO_INJECT_INTERRUPT 1
|
||||
#define CMD_VGPU_VFIO_REGISTER_MDEV 2
|
||||
#define CMD_VGPU_VFIO_PRESENT 3
|
||||
#define CMD_VFIO_PCI_CORE_PRESENT 4
|
||||
|
||||
#define MAX_VF_COUNT_PER_GPU 64
|
||||
|
||||
@@ -57,6 +58,7 @@ typedef struct
|
||||
void *waitQueue;
|
||||
void *nv;
|
||||
NvU32 *vgpuTypeIds;
|
||||
NvU8 **vgpuNames;
|
||||
NvU32 numVgpuTypes;
|
||||
NvU32 domain;
|
||||
NvU8 bus;
|
||||
|
||||
@@ -62,6 +62,7 @@ typedef struct nv_ioctl_numa_info
|
||||
uint64_t memblock_size __aligned(8);
|
||||
uint64_t numa_mem_addr __aligned(8);
|
||||
uint64_t numa_mem_size __aligned(8);
|
||||
uint8_t use_auto_online;
|
||||
nv_offline_addresses_t offline_addresses __aligned(8);
|
||||
} nv_ioctl_numa_info_t;
|
||||
|
||||
|
||||
@@ -28,15 +28,10 @@
|
||||
#include <linux/list.h> // list
|
||||
#include <linux/sched.h> // task_struct
|
||||
#include <linux/numa.h> // NUMA_NO_NODE
|
||||
#include <linux/semaphore.h>
|
||||
|
||||
#include "conftest.h"
|
||||
|
||||
#if defined(NV_LINUX_SEMAPHORE_H_PRESENT)
|
||||
#include <linux/semaphore.h>
|
||||
#else
|
||||
#include <asm/semaphore.h>
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// nv_kthread_q:
|
||||
//
|
||||
|
||||
@@ -211,6 +211,7 @@
|
||||
#include <linux/highmem.h>
|
||||
|
||||
#include <linux/nodemask.h>
|
||||
#include <linux/memory.h>
|
||||
|
||||
#include <linux/workqueue.h> /* workqueue */
|
||||
#include "nv-kthread-q.h" /* kthread based queue */
|
||||
@@ -510,7 +511,11 @@ static inline void nv_vfree(void *ptr, NvU64 size)
|
||||
|
||||
static inline void *nv_ioremap(NvU64 phys, NvU64 size)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_PRESENT)
|
||||
void *ptr = ioremap_driver_hardened(phys, size);
|
||||
#else
|
||||
void *ptr = ioremap(phys, size);
|
||||
#endif
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
@@ -523,11 +528,11 @@ static inline void *nv_ioremap_nocache(NvU64 phys, NvU64 size)
|
||||
|
||||
static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
|
||||
{
|
||||
#if defined(NV_IOREMAP_CACHE_PRESENT)
|
||||
void *ptr = ioremap_cache(phys, size);
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
void *ptr = NULL;
|
||||
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_CACHE_SHARED_PRESENT)
|
||||
ptr = ioremap_cache_shared(phys, size);
|
||||
#elif defined(NV_IOREMAP_CACHE_PRESENT)
|
||||
ptr = ioremap_cache(phys, size);
|
||||
#elif defined(NVCPU_PPC64LE)
|
||||
//
|
||||
// ioremap_cache() has been only implemented correctly for ppc64le with
|
||||
@@ -542,25 +547,32 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
|
||||
// (commit 40f1ce7fb7e8, kernel 3.0+) and that covers all kernels we
|
||||
// support on power.
|
||||
//
|
||||
void *ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
|
||||
#else
|
||||
return nv_ioremap(phys, size);
|
||||
#endif
|
||||
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
|
||||
{
|
||||
#if defined(NV_IOREMAP_WC_PRESENT)
|
||||
void *ptr = ioremap_wc(phys, size);
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
void *ptr = NULL;
|
||||
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT)
|
||||
ptr = ioremap_driver_hardened_wc(phys, size);
|
||||
#elif defined(NV_IOREMAP_WC_PRESENT)
|
||||
ptr = ioremap_wc(phys, size);
|
||||
#else
|
||||
return nv_ioremap_nocache(phys, size);
|
||||
#endif
|
||||
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void nv_iounmap(void *ptr, NvU64 size)
|
||||
@@ -633,24 +645,24 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
free_pages(ptr, order); \
|
||||
}
|
||||
|
||||
static inline pgprot_t nv_sme_clr(pgprot_t prot)
|
||||
{
|
||||
#if defined(__sme_clr)
|
||||
return __pgprot(__sme_clr(pgprot_val(prot)));
|
||||
#else
|
||||
return prot;
|
||||
#endif // __sme_clr
|
||||
}
|
||||
|
||||
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
|
||||
{
|
||||
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
|
||||
#if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(NV_PGPROT_DECRYPTED_PRESENT)
|
||||
/*
|
||||
* When AMD memory encryption is enabled, device memory mappings with the
|
||||
* C-bit set read as 0xFF, so ensure the bit is cleared for user mappings.
|
||||
*
|
||||
* If cc_mkdec() is present, then pgprot_decrypted() can't be used.
|
||||
*/
|
||||
#if defined(NV_CC_MKDEC_PRESENT)
|
||||
prot = __pgprot(__sme_clr(pgprot_val(vm_prot)));
|
||||
#else
|
||||
prot = pgprot_decrypted(prot);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return prot;
|
||||
#if defined(pgprot_decrypted)
|
||||
return pgprot_decrypted(prot);
|
||||
#else
|
||||
return nv_sme_clr(prot);
|
||||
#endif // pgprot_decrypted
|
||||
}
|
||||
|
||||
#if defined(PAGE_KERNEL_NOENC)
|
||||
@@ -1310,7 +1322,7 @@ nv_dma_maps_swiotlb(struct device *dev)
|
||||
* SEV memory encryption") forces SWIOTLB to be enabled when AMD SEV
|
||||
* is active in all cases.
|
||||
*/
|
||||
if (os_sev_enabled)
|
||||
if (os_cc_enabled)
|
||||
swiotlb_in_use = NV_TRUE;
|
||||
#endif
|
||||
|
||||
@@ -1438,6 +1450,17 @@ struct nv_dma_device {
|
||||
NvBool nvlink;
|
||||
};
|
||||
|
||||
/* Properties of the coherent link */
|
||||
typedef struct coherent_link_info_s {
|
||||
/* Physical Address of the GPU memory in SOC AMAP. In the case of
|
||||
* baremetal OS environment it is System Physical Address(SPA) and in the case
|
||||
* of virutalized OS environment it is Intermediate Physical Address(IPA) */
|
||||
NvU64 gpu_mem_pa;
|
||||
/* Bitmap of NUMA node ids, corresponding to the reserved PXMs,
|
||||
* available for adding GPU memory to the kernel as system RAM */
|
||||
DECLARE_BITMAP(free_node_bitmap, MAX_NUMNODES);
|
||||
} coherent_link_info_t;
|
||||
|
||||
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
|
||||
/*
|
||||
* acpi data storage structure
|
||||
@@ -1471,6 +1494,13 @@ typedef struct nv_linux_state_s {
|
||||
/* IBM-NPU info associated with this GPU */
|
||||
nv_ibmnpu_info_t *npu;
|
||||
|
||||
/* coherent link information */
|
||||
coherent_link_info_t coherent_link_info;
|
||||
|
||||
/* Dedicated queue to be used for removing FB memory which is onlined
|
||||
* to kernel as a NUMA node. Refer Bug : 3879845*/
|
||||
nv_kthread_q_t remove_numa_memory_q;
|
||||
|
||||
/* NUMA node information for the platforms where GPU memory is presented
|
||||
* as a NUMA node to the kernel */
|
||||
struct {
|
||||
@@ -1481,6 +1511,7 @@ typedef struct nv_linux_state_s {
|
||||
/* NUMA online/offline status for platforms that support GPU memory as
|
||||
* NUMA node */
|
||||
atomic_t status;
|
||||
NvBool use_auto_online;
|
||||
} numa_info;
|
||||
|
||||
nvidia_stack_t *sp[NV_DEV_STACK_COUNT];
|
||||
@@ -1946,6 +1977,36 @@ static inline int nv_set_numa_status(nv_linux_state_t *nvl, int status)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline NvBool nv_platform_use_auto_online(nv_linux_state_t *nvl)
|
||||
{
|
||||
return nvl->numa_info.use_auto_online;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
NvU64 base;
|
||||
NvU64 size;
|
||||
NvU32 nodeId;
|
||||
int ret;
|
||||
} remove_numa_memory_info_t;
|
||||
|
||||
static void offline_numa_memory_callback
|
||||
(
|
||||
void *args
|
||||
)
|
||||
{
|
||||
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
|
||||
remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
|
||||
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
|
||||
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->nodeId,
|
||||
pNumaInfo->base,
|
||||
pNumaInfo->size);
|
||||
#else
|
||||
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->base,
|
||||
pNumaInfo->size);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_NUMA_STATUS_DISABLED = 0,
|
||||
|
||||
@@ -29,17 +29,12 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/sched.h> /* signal_pending, cond_resched */
|
||||
#include <linux/semaphore.h>
|
||||
|
||||
#if defined(NV_LINUX_SCHED_SIGNAL_H_PRESENT)
|
||||
#include <linux/sched/signal.h> /* signal_pending for kernels >= 4.11 */
|
||||
#endif
|
||||
|
||||
#if defined(NV_LINUX_SEMAPHORE_H_PRESENT)
|
||||
#include <linux/semaphore.h>
|
||||
#else
|
||||
#include <asm/semaphore.h>
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL)
|
||||
typedef raw_spinlock_t nv_spinlock_t;
|
||||
#define NV_SPIN_LOCK_INIT(lock) raw_spin_lock_init(lock)
|
||||
@@ -62,20 +57,7 @@ typedef spinlock_t nv_spinlock_t;
|
||||
#define NV_SPIN_UNLOCK_WAIT(lock) spin_unlock_wait(lock)
|
||||
#endif
|
||||
|
||||
#if defined(NV_CONFIG_PREEMPT_RT)
|
||||
#define NV_INIT_SEMA(sema, val) sema_init(sema,val)
|
||||
#else
|
||||
#if !defined(__SEMAPHORE_INITIALIZER) && defined(__COMPAT_SEMAPHORE_INITIALIZER)
|
||||
#define __SEMAPHORE_INITIALIZER __COMPAT_SEMAPHORE_INITIALIZER
|
||||
#endif
|
||||
#define NV_INIT_SEMA(sema, val) \
|
||||
{ \
|
||||
struct semaphore __sema = \
|
||||
__SEMAPHORE_INITIALIZER(*(sema), val); \
|
||||
*(sema) = __sema; \
|
||||
}
|
||||
#endif
|
||||
#define NV_INIT_MUTEX(mutex) NV_INIT_SEMA(mutex, 1)
|
||||
#define NV_INIT_MUTEX(mutex) sema_init(mutex, 1)
|
||||
|
||||
static inline int nv_down_read_interruptible(struct rw_semaphore *lock)
|
||||
{
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -36,12 +36,21 @@ typedef int vm_fault_t;
|
||||
* pin_user_pages() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6-rc1 (2020-01-30)
|
||||
*
|
||||
* Removed vmas parameter from pin_user_pages() by commit 40896a02751
|
||||
* ("mm/gup: remove vmas parameter from pin_user_pages()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-17)
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#if defined(NV_PIN_USER_PAGES_PRESENT)
|
||||
#define NV_PIN_USER_PAGES pin_user_pages
|
||||
#if defined(NV_PIN_USER_PAGES_HAS_ARGS_VMAS)
|
||||
#define NV_PIN_USER_PAGES pin_user_pages
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES(start, nr_pages, gup_flags, pages, vmas) \
|
||||
pin_user_pages(start, nr_pages, gup_flags, pages)
|
||||
#endif // NV_PIN_USER_PAGES_HAS_ARGS_VMAS
|
||||
#define NV_UNPIN_USER_PAGE unpin_user_page
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES NV_GET_USER_PAGES
|
||||
@@ -64,11 +73,18 @@ typedef int vm_fault_t;
|
||||
* commit 8e50b8b07f462ab4b91bc1491b1c91bd75e4ad40 which cherry-picked the
|
||||
* replacement of the write and force parameters with gup_flags
|
||||
*
|
||||
* Removed vmas parameter from get_user_pages() by commit 7bbf9c8c99
|
||||
* ("mm/gup: remove unused vmas parameter from get_user_pages()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-17)
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages, vmas) \
|
||||
get_user_pages(start, nr_pages, flags, pages)
|
||||
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS)
|
||||
#define NV_GET_USER_PAGES get_user_pages
|
||||
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS)
|
||||
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS_VMAS)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages, vmas) \
|
||||
get_user_pages(current, current->mm, start, nr_pages, flags, pages, vmas)
|
||||
#else
|
||||
@@ -81,13 +97,13 @@ typedef int vm_fault_t;
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE)
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE_VMAS)
|
||||
return get_user_pages(start, nr_pages, write, force, pages, vmas);
|
||||
#else
|
||||
// NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE
|
||||
// NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS
|
||||
return get_user_pages(current, current->mm, start, nr_pages, write,
|
||||
force, pages, vmas);
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE_VMAS
|
||||
}
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS
|
||||
|
||||
@@ -100,15 +116,22 @@ typedef int vm_fault_t;
|
||||
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
|
||||
* in v5.9-rc1 (2020-08-11). *
|
||||
*
|
||||
* Removed unused vmas parameter from pin_user_pages_remote() by commit
|
||||
* 83bcc2e132("mm/gup: remove unused vmas parameter from pin_user_pages_remote()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-14)
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(NV_PIN_USER_PAGES_REMOTE_PRESENT)
|
||||
#if defined (NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK)
|
||||
#if defined(NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK_VMAS)
|
||||
#define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
pin_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
|
||||
#else
|
||||
#elif defined(NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_VMAS)
|
||||
#define NV_PIN_USER_PAGES_REMOTE pin_user_pages_remote
|
||||
#endif // NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
pin_user_pages_remote(mm, start, nr_pages, flags, pages, locked)
|
||||
#endif // NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK_VMAS
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES_REMOTE NV_GET_USER_PAGES_REMOTE
|
||||
#endif // NV_PIN_USER_PAGES_REMOTE_PRESENT
|
||||
@@ -135,22 +158,30 @@ typedef int vm_fault_t;
|
||||
* commit 64019a2e467a ("mm/gup: remove task_struct pointer for
|
||||
* all gup code") in v5.9-rc1 (2020-08-11).
|
||||
*
|
||||
* Removed vmas parameter from get_user_pages_remote() by commit a4bde14d549
|
||||
* ("mm/gup: remove vmas parameter from get_user_pages_remote()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-14)
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_PRESENT)
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(mm, start, nr_pages, flags, pages, locked)
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED_VMAS)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED)
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED_VMAS)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS)
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_VMAS)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas)
|
||||
|
||||
#else
|
||||
// NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE
|
||||
// NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE_VMAS
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
@@ -167,7 +198,7 @@ typedef int vm_fault_t;
|
||||
}
|
||||
#endif // NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED
|
||||
#else
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE)
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS)
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
@@ -185,7 +216,7 @@ typedef int vm_fault_t;
|
||||
#else
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages(NULL, mm, start, nr_pages, flags, pages, vmas)
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS
|
||||
#endif // NV_GET_USER_PAGES_REMOTE_PRESENT
|
||||
|
||||
/*
|
||||
@@ -261,4 +292,22 @@ static inline struct rw_semaphore *nv_mmap_get_lock(struct mm_struct *mm)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
{
|
||||
#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
vm_flags_set(vma, flags);
|
||||
#else
|
||||
vma->vm_flags |= flags;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
{
|
||||
#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
vm_flags_clear(vma, flags);
|
||||
#else
|
||||
vma->vm_flags &= ~flags;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // __NV_MM_H__
|
||||
|
||||
@@ -347,6 +347,12 @@ typedef struct nv_soc_irq_info_s {
|
||||
/* DMA-capable device data, defined by kernel interface layer */
|
||||
typedef struct nv_dma_device nv_dma_device_t;
|
||||
|
||||
typedef struct nv_phys_addr_range
|
||||
{
|
||||
NvU64 addr;
|
||||
NvU64 len;
|
||||
} nv_phys_addr_range_t;
|
||||
|
||||
typedef struct nv_state_t
|
||||
{
|
||||
void *priv; /* private data */
|
||||
@@ -466,6 +472,9 @@ typedef struct nv_state_t
|
||||
/* Bool to check if ISO iommu enabled */
|
||||
NvBool iso_iommu_present;
|
||||
|
||||
/* Bool to check if NISO iommu enabled */
|
||||
NvBool niso_iommu_present;
|
||||
|
||||
/* Bool to check if dma-buf is supported */
|
||||
NvBool dma_buf_supported;
|
||||
|
||||
@@ -477,6 +486,8 @@ typedef struct nv_state_t
|
||||
/* Bool to check if the device received a shutdown notification */
|
||||
NvBool is_shutdown;
|
||||
|
||||
/* Bool to check if the GPU has a coherent sysmem link */
|
||||
NvBool coherent;
|
||||
} nv_state_t;
|
||||
|
||||
// These define need to be in sync with defines in system.h
|
||||
@@ -499,12 +510,20 @@ struct nv_file_private_t
|
||||
nv_file_private_t *ctl_nvfp;
|
||||
void *ctl_nvfp_priv;
|
||||
NvU32 register_or_refcount;
|
||||
|
||||
//
|
||||
// True if a client or an event was ever allocated on this fd.
|
||||
// If false, RMAPI cleanup is skipped.
|
||||
//
|
||||
NvBool bCleanupRmapi;
|
||||
};
|
||||
|
||||
// Forward define the gpu ops structures
|
||||
typedef struct gpuSession *nvgpuSessionHandle_t;
|
||||
typedef struct gpuDevice *nvgpuDeviceHandle_t;
|
||||
typedef struct gpuAddressSpace *nvgpuAddressSpaceHandle_t;
|
||||
typedef struct gpuTsg *nvgpuTsgHandle_t;
|
||||
typedef struct UvmGpuTsgAllocParams_tag nvgpuTsgAllocParams_t;
|
||||
typedef struct gpuChannel *nvgpuChannelHandle_t;
|
||||
typedef struct UvmGpuChannelInfo_tag *nvgpuChannelInfo_t;
|
||||
typedef struct UvmGpuChannelAllocParams_tag nvgpuChannelAllocParams_t;
|
||||
@@ -531,7 +550,7 @@ typedef struct UvmGpuPagingChannelAllocParams_tag nvgpuPagingChannelAllocPara
|
||||
typedef struct UvmGpuPagingChannel_tag *nvgpuPagingChannelHandle_t;
|
||||
typedef struct UvmGpuPagingChannelInfo_tag *nvgpuPagingChannelInfo_t;
|
||||
typedef enum UvmPmaGpuMemoryType_tag nvgpuGpuMemoryType_t;
|
||||
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU32, NvU64 *, NvU32, NvU64, NvU64, nvgpuGpuMemoryType_t);
|
||||
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU64, NvU64 *, NvU32, NvU64, NvU64, nvgpuGpuMemoryType_t);
|
||||
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemoryType_t);
|
||||
|
||||
/*
|
||||
@@ -599,6 +618,8 @@ typedef enum
|
||||
#define NV_SOC_IS_ISO_IOMMU_PRESENT(nv) \
|
||||
((nv)->iso_iommu_present)
|
||||
|
||||
#define NV_SOC_IS_NISO_IOMMU_PRESENT(nv) \
|
||||
((nv)->niso_iommu_present)
|
||||
/*
|
||||
* GPU add/remove events
|
||||
*/
|
||||
@@ -813,6 +834,7 @@ nv_file_private_t* NV_API_CALL nv_get_file_private(NvS32, NvBool, void **);
|
||||
void NV_API_CALL nv_put_file_private(void *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU32 *, NvS32 *);
|
||||
NV_STATUS NV_API_CALL nv_get_egm_info(nv_state_t *, NvU64 *, NvU64 *, NvS32 *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_ibmnpu_genreg_info(nv_state_t *, NvU64 *, NvU64 *, void**);
|
||||
NV_STATUS NV_API_CALL nv_get_ibmnpu_relaxed_ordering_mode(nv_state_t *nv, NvBool *mode);
|
||||
@@ -920,6 +942,7 @@ NV_STATUS NV_API_CALL rm_write_registry_string (nvidia_stack_t *, nv_state_t *
|
||||
void NV_API_CALL rm_parse_option_string (nvidia_stack_t *, const char *);
|
||||
char* NV_API_CALL rm_remove_spaces (const char *);
|
||||
char* NV_API_CALL rm_string_token (char **, const char);
|
||||
void NV_API_CALL rm_vgpu_vfio_set_driver_vm(nvidia_stack_t *, NvBool);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_run_rc_callback (nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_execute_work_item (nvidia_stack_t *, void *);
|
||||
@@ -942,6 +965,8 @@ NV_STATUS NV_API_CALL rm_perform_version_check (nvidia_stack_t *, void *, NvU
|
||||
|
||||
void NV_API_CALL rm_power_source_change_event (nvidia_stack_t *, NvU32);
|
||||
|
||||
void NV_API_CALL rm_request_dnotifier_state (nvidia_stack_t *, nv_state_t *);
|
||||
|
||||
void NV_API_CALL rm_disable_gpu_state_persistence (nvidia_stack_t *sp, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU64 *, NvU64 *, NvU64 *, NvU64 *, NvU64, NvU64, NvU64, NvU64, void (*)(void *), void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_destroy_mapping (nvidia_stack_t *, NvU64);
|
||||
@@ -951,12 +976,12 @@ NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, N
|
||||
NV_STATUS NV_API_CALL rm_p2p_register_callback (nvidia_stack_t *, NvU64, NvU64, NvU64, void *, void (*)(void *), void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU32, NvU32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
|
||||
void NV_API_CALL rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64, NvU64 *);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_get_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle *, NvHandle *, NvHandle *, void **);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64, void *, nv_phys_addr_range_t **, NvU32 *);
|
||||
void NV_API_CALL rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, nv_phys_addr_range_t **, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_get_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle *, NvHandle *, NvHandle *, void **, NvBool *);
|
||||
void NV_API_CALL rm_dma_buf_put_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, void *);
|
||||
NV_STATUS NV_API_CALL rm_log_gpu_crash (nv_stack_t *, nv_state_t *);
|
||||
|
||||
@@ -994,7 +1019,7 @@ void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
|
||||
NvBool NV_API_CALL rm_is_altstack_in_use(void);
|
||||
|
||||
/* vGPU VFIO specific functions */
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32, NvBool *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -327,7 +327,7 @@ NV_STATUS nvUvmInterfaceGetPmaObject(uvmGpuDeviceHandle device,
|
||||
|
||||
// Mirrors pmaEvictPagesCb_t, see its documentation in pma.h.
|
||||
typedef NV_STATUS (*uvmPmaEvictPagesCallback)(void *callbackData,
|
||||
NvU32 pageSize,
|
||||
NvU64 pageSize,
|
||||
NvU64 *pPages,
|
||||
NvU32 count,
|
||||
NvU64 physBegin,
|
||||
@@ -390,7 +390,7 @@ void nvUvmInterfacePmaUnregisterEvictionCallbacks(void *pPma);
|
||||
*/
|
||||
NV_STATUS nvUvmInterfacePmaAllocPages(void *pPma,
|
||||
NvLength pageCount,
|
||||
NvU32 pageSize,
|
||||
NvU64 pageSize,
|
||||
UvmPmaAllocationOptions *pPmaAllocOptions,
|
||||
NvU64 *pPages);
|
||||
|
||||
@@ -419,7 +419,7 @@ NV_STATUS nvUvmInterfacePmaAllocPages(void *pPma,
|
||||
NV_STATUS nvUvmInterfacePmaPinPages(void *pPma,
|
||||
NvU64 *pPages,
|
||||
NvLength pageCount,
|
||||
NvU32 pageSize,
|
||||
NvU64 pageSize,
|
||||
NvU32 flags);
|
||||
|
||||
/*******************************************************************************
|
||||
@@ -447,7 +447,7 @@ NV_STATUS nvUvmInterfacePmaPinPages(void *pPma,
|
||||
NV_STATUS nvUvmInterfacePmaUnpinPages(void *pPma,
|
||||
NvU64 *pPages,
|
||||
NvLength pageCount,
|
||||
NvU32 pageSize);
|
||||
NvU64 pageSize);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceMemoryFree
|
||||
@@ -488,7 +488,7 @@ void nvUvmInterfaceMemoryFree(uvmGpuAddressSpaceHandle vaSpace,
|
||||
void nvUvmInterfacePmaFreePages(void *pPma,
|
||||
NvU64 *pPages,
|
||||
NvLength pageCount,
|
||||
NvU32 pageSize,
|
||||
NvU64 pageSize,
|
||||
NvU32 flags);
|
||||
|
||||
/*******************************************************************************
|
||||
@@ -507,7 +507,7 @@ void nvUvmInterfacePmaFreePages(void *pPma,
|
||||
NV_STATUS nvUvmInterfaceMemoryCpuMap(uvmGpuAddressSpaceHandle vaSpace,
|
||||
UvmGpuPointer gpuPointer,
|
||||
NvLength length, void **cpuPtr,
|
||||
NvU32 pageSize);
|
||||
NvU64 pageSize);
|
||||
|
||||
/*******************************************************************************
|
||||
uvmGpuMemoryCpuUnmap
|
||||
@@ -517,16 +517,59 @@ NV_STATUS nvUvmInterfaceMemoryCpuMap(uvmGpuAddressSpaceHandle vaSpace,
|
||||
void nvUvmInterfaceMemoryCpuUnMap(uvmGpuAddressSpaceHandle vaSpace,
|
||||
void *cpuPtr);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceTsgAllocate
|
||||
|
||||
This function allocates a Time-Slice Group (TSG).
|
||||
|
||||
allocParams must contain an engineIndex as TSGs need to be bound to an
|
||||
engine type at allocation time. The possible values are [0,
|
||||
UVM_COPY_ENGINE_COUNT_MAX) for CE engine type. Notably only the copy engines
|
||||
that have UvmGpuCopyEngineCaps::supported set to true can be allocated.
|
||||
|
||||
Note that TSG is not supported on all GPU architectures for all engine
|
||||
types, e.g., pre-Volta GPUs only support TSG for the GR/Compute engine type.
|
||||
On devices that do not support HW TSGs on the requested engine, this API is
|
||||
still required, i.e., a TSG handle is required in
|
||||
nvUvmInterfaceChannelAllocate(), due to information stored in it necessary
|
||||
for channel allocation. However, when HW TSGs aren't supported, a TSG handle
|
||||
is essentially a "fake" TSG with no HW scheduling impact.
|
||||
|
||||
tsg is filled with the address of the corresponding TSG handle.
|
||||
|
||||
Arguments:
|
||||
vaSpace[IN] - VA space linked to a client and a device under which
|
||||
the TSG is allocated.
|
||||
allocParams[IN] - structure with allocation settings.
|
||||
tsg[OUT] - pointer to the new TSG handle.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_GENERIC
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
NV_ERR_NO_MEMORY
|
||||
NV_ERR_NOT_SUPPORTED
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceTsgAllocate(uvmGpuAddressSpaceHandle vaSpace,
|
||||
const UvmGpuTsgAllocParams *allocParams,
|
||||
uvmGpuTsgHandle *tsg);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceTsgDestroy
|
||||
|
||||
This function destroys a given TSG.
|
||||
|
||||
Arguments:
|
||||
tsg[IN] - Tsg handle
|
||||
*/
|
||||
void nvUvmInterfaceTsgDestroy(uvmGpuTsgHandle tsg);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceChannelAllocate
|
||||
|
||||
This function will allocate a channel bound to a copy engine
|
||||
This function will allocate a channel bound to a copy engine(CE) or a SEC2
|
||||
engine.
|
||||
|
||||
allocParams must contain an engineIndex as channels need to be bound to an
|
||||
engine type at allocation time. The possible values are [0,
|
||||
UVM_COPY_ENGINE_COUNT_MAX), but notably only the copy engines that have
|
||||
UvmGpuCopyEngineCaps::supported set to true can be allocated. This struct
|
||||
also contains information relative to GPFIFO and GPPut.
|
||||
allocParams contains information relative to GPFIFO and GPPut.
|
||||
|
||||
channel is filled with the address of the corresponding channel handle.
|
||||
|
||||
@@ -536,17 +579,18 @@ void nvUvmInterfaceMemoryCpuUnMap(uvmGpuAddressSpaceHandle vaSpace,
|
||||
Host channel submission doorbell.
|
||||
|
||||
Arguments:
|
||||
vaSpace[IN] - VA space linked to a client and a device under which
|
||||
the channel will be allocated
|
||||
tsg[IN] - Time-Slice Group that the channel will be a member.
|
||||
allocParams[IN] - structure with allocation settings
|
||||
channel[OUT] - pointer to the new channel handle
|
||||
channelInfo[OUT] - structure filled with channel information
|
||||
|
||||
Error codes:
|
||||
NV_ERR_GENERIC
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
NV_ERR_NO_MEMORY
|
||||
NV_ERR_NOT_SUPPORTED
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceChannelAllocate(uvmGpuAddressSpaceHandle vaSpace,
|
||||
NV_STATUS nvUvmInterfaceChannelAllocate(const uvmGpuTsgHandle tsg,
|
||||
const UvmGpuChannelAllocParams *allocParams,
|
||||
uvmGpuChannelHandle *channel,
|
||||
UvmGpuChannelInfo *channelInfo);
|
||||
@@ -554,7 +598,7 @@ NV_STATUS nvUvmInterfaceChannelAllocate(uvmGpuAddressSpaceHandle vaSpace,
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceChannelDestroy
|
||||
|
||||
This function destroys a given channel
|
||||
This function destroys a given channel.
|
||||
|
||||
Arguments:
|
||||
channel[IN] - channel handle
|
||||
@@ -575,7 +619,7 @@ void nvUvmInterfaceChannelDestroy(uvmGpuChannelHandle channel);
|
||||
NV_ERR_NO_MEMORY
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceQueryCaps(uvmGpuDeviceHandle device,
|
||||
UvmGpuCaps * caps);
|
||||
UvmGpuCaps *caps);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceQueryCopyEnginesCaps
|
||||
@@ -946,13 +990,15 @@ NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
|
||||
Arguments:
|
||||
device[IN] - Device handle associated with the gpu
|
||||
pAccessCntrInfo[OUT] - Information provided by RM for access counter handling
|
||||
accessCntrIndex[IN] - Access counter index
|
||||
|
||||
Error codes:
|
||||
NV_ERR_GENERIC
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceInitAccessCntrInfo(uvmGpuDeviceHandle device,
|
||||
UvmGpuAccessCntrInfo *pAccessCntrInfo);
|
||||
UvmGpuAccessCntrInfo *pAccessCntrInfo,
|
||||
NvU32 accessCntrIndex);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceDestroyAccessCntrInfo
|
||||
@@ -1402,4 +1448,243 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
|
||||
char *methodStream,
|
||||
NvU32 methodStreamSize);
|
||||
|
||||
/*******************************************************************************
|
||||
CSL Interface and Locking
|
||||
|
||||
The following functions do not acquire the RM API or GPU locks and must not be called
|
||||
concurrently with the same UvmCslContext parameter in different threads. The caller must
|
||||
guarantee this exclusion.
|
||||
|
||||
* nvUvmInterfaceCslRotateIv
|
||||
* nvUvmInterfaceCslEncrypt
|
||||
* nvUvmInterfaceCslDecrypt
|
||||
* nvUvmInterfaceCslSign
|
||||
* nvUvmInterfaceCslQueryMessagePool
|
||||
* nvUvmInterfaceCslIncrementIv
|
||||
*/
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslInitContext
|
||||
|
||||
Allocates and initializes a CSL context for a given secure channel.
|
||||
|
||||
The lifetime of the context is the same as the lifetime of the secure channel
|
||||
it is paired with.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
channel[IN] - Handle to a secure channel.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_STATE - The system is not operating in Confidential Compute mode.
|
||||
NV_ERR_INVALID_CHANNEL - The associated channel is not a secure channel.
|
||||
NV_ERR_IN_USE - The context has already been initialized.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
|
||||
uvmGpuChannelHandle channel);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceDeinitCslContext
|
||||
|
||||
Securely deinitializes and clears the contents of a context.
|
||||
|
||||
If context is already deinitialized then function returns immediately.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN] - The CSL context.
|
||||
*/
|
||||
void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslRotateIv
|
||||
|
||||
Rotates the IV for a given channel and operation.
|
||||
|
||||
This function will rotate the IV on both the CPU and the GPU.
|
||||
Outstanding messages that have been encrypted by the GPU should first be
|
||||
decrypted before calling this function with operation equal to
|
||||
UVM_CSL_OPERATION_DECRYPT. Similarly, outstanding messages that have been
|
||||
encrypted by the CPU should first be decrypted before calling this function
|
||||
with operation equal to UVM_CSL_OPERATION_ENCRYPT. For a given operation
|
||||
the channel must be idle before calling this function. This function can be
|
||||
called regardless of the value of the IV's message counter.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
operation[IN] - Either
|
||||
- UVM_CSL_OPERATION_ENCRYPT
|
||||
- UVM_CSL_OPERATION_DECRYPT
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The rotate operation would cause a counter
|
||||
to overflow.
|
||||
NV_ERR_INVALID_ARGUMENT - Invalid value for operation.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslEncrypt
|
||||
|
||||
Encrypts data and produces an authentication tag.
|
||||
|
||||
Auth, input, and output buffers must not overlap. If they do then calling
|
||||
this function produces undefined behavior. Performance is typically
|
||||
maximized when the input and output buffers are 16-byte aligned. This is
|
||||
natural alignment for AES block.
|
||||
The encryptIV can be obtained from nvUvmInterfaceCslIncrementIv.
|
||||
However, it is optional. If it is NULL, the next IV in line will be used.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
bufferSize[IN] - Size of the input and output buffers in
|
||||
units of bytes. Value can range from 1 byte
|
||||
to (2^32) - 1 bytes.
|
||||
inputBuffer[IN] - Address of plaintext input buffer.
|
||||
encryptIv[IN/OUT] - IV to use for encryption. Can be NULL.
|
||||
outputBuffer[OUT] - Address of ciphertext output buffer.
|
||||
authTagBuffer[OUT] - Address of authentication tag buffer.
|
||||
Its size is UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - The size of the data is 0 bytes.
|
||||
- The encryptIv has already been used.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize,
|
||||
NvU8 const *inputBuffer,
|
||||
UvmCslIv *encryptIv,
|
||||
NvU8 *outputBuffer,
|
||||
NvU8 *authTagBuffer);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslDecrypt
|
||||
|
||||
Verifies the authentication tag and decrypts data.
|
||||
|
||||
Auth, input, and output buffers must not overlap. If they do then calling
|
||||
this function produces undefined behavior. Performance is typically
|
||||
maximized when the input and output buffers are 16-byte aligned. This is
|
||||
natural alignment for AES block.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
bufferSize[IN] - Size of the input and output buffers in units of bytes.
|
||||
Value can range from 1 byte to (2^32) - 1 bytes.
|
||||
decryptIv[IN] - IV used to decrypt the ciphertext. Its value can either be given by
|
||||
nvUvmInterfaceCslIncrementIv, or, if NULL, the CSL context's
|
||||
internal counter is used.
|
||||
inputBuffer[IN] - Address of ciphertext input buffer.
|
||||
outputBuffer[OUT] - Address of plaintext output buffer.
|
||||
addAuthData[IN] - Address of the plaintext additional authenticated data used to
|
||||
calculate the authentication tag. Can be NULL.
|
||||
addAuthDataSize[IN] - Size of the additional authenticated data in units of bytes.
|
||||
Value can range from 1 byte to (2^32) - 1 bytes.
|
||||
This parameter is ignored if addAuthData is NULL.
|
||||
authTagBuffer[IN] - Address of authentication tag buffer.
|
||||
Its size is UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The decryption operation would cause a
|
||||
counter overflow to occur.
|
||||
NV_ERR_INVALID_ARGUMENT - The size of the data is 0 bytes.
|
||||
NV_ERR_INVALID_DATA - Verification of the authentication tag fails.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize,
|
||||
NvU8 const *inputBuffer,
|
||||
UvmCslIv const *decryptIv,
|
||||
NvU8 *outputBuffer,
|
||||
NvU8 const *addAuthData,
|
||||
NvU32 addAuthDataSize,
|
||||
NvU8 const *authTagBuffer);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslSign
|
||||
|
||||
Generates an authentication tag for secure work launch.
|
||||
|
||||
Auth and input buffers must not overlap. If they do then calling this function produces
|
||||
undefined behavior.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
bufferSize[IN] - Size of the input buffer in units of bytes.
|
||||
Value can range from 1 byte to (2^32) - 1 bytes.
|
||||
inputBuffer[IN] - Address of plaintext input buffer.
|
||||
authTagBuffer[OUT] - Address of authentication tag buffer.
|
||||
Its size is UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The signing operation would cause a counter overflow to occur.
|
||||
NV_ERR_INVALID_ARGUMENT - The size of the data is 0 bytes.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize,
|
||||
NvU8 const *inputBuffer,
|
||||
NvU8 *authTagBuffer);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslQueryMessagePool
|
||||
|
||||
Returns the number of messages that can be encrypted before the message counter will overflow.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
operation[IN] - Either UVM_CSL_OPERATION_ENCRYPT or UVM_CSL_OPERATION_DECRYPT.
|
||||
messageNum[OUT] - Number of messages left before overflow.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - The value of the operation parameter is illegal.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation,
|
||||
NvU64 *messageNum);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslIncrementIv
|
||||
|
||||
Increments the message counter by the specified amount.
|
||||
|
||||
If iv is non-NULL then the incremented value is returned.
|
||||
If operation is UVM_CSL_OPERATION_ENCRYPT then the returned IV's "freshness" bit is set and
|
||||
can be used in nvUvmInterfaceCslEncrypt. If operation is UVM_CSL_OPERATION_DECRYPT then
|
||||
the returned IV can be used in nvUvmInterfaceCslDecrypt.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
operation[IN] - Either
|
||||
- UVM_CSL_OPERATION_ENCRYPT
|
||||
- UVM_CSL_OPERATION_DECRYPT
|
||||
increment[IN] - The amount by which the IV is incremented. Can be 0.
|
||||
iv[out] - If non-NULL, a buffer to store the incremented IV.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - The value of the operation parameter is illegal.
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - Incrementing the message counter would result
|
||||
in an overflow.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation,
|
||||
NvU64 increment,
|
||||
UvmCslIv *iv);
|
||||
|
||||
#endif // _NV_UVM_INTERFACE_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -92,6 +92,7 @@ typedef unsigned long long UvmGpuPointer;
|
||||
typedef struct uvmGpuSession_tag *uvmGpuSessionHandle; // gpuSessionHandle
|
||||
typedef struct uvmGpuDevice_tag *uvmGpuDeviceHandle; // gpuDeviceHandle
|
||||
typedef struct uvmGpuAddressSpace_tag *uvmGpuAddressSpaceHandle; // gpuAddressSpaceHandle
|
||||
typedef struct uvmGpuTsg_tag *uvmGpuTsgHandle; // gpuTsgHandle
|
||||
typedef struct uvmGpuChannel_tag *uvmGpuChannelHandle; // gpuChannelHandle
|
||||
typedef struct uvmGpuCopyEngine_tag *uvmGpuCopyEngineHandle; // gpuObjectHandle
|
||||
|
||||
@@ -280,6 +281,16 @@ typedef struct UvmGpuChannelInfo_tag
|
||||
// to kick off the new work.
|
||||
//
|
||||
volatile NvU32 *pWorkSubmissionToken;
|
||||
|
||||
// GPU VAs of both GPFIFO and GPPUT are needed in Confidential Computing
|
||||
// so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
|
||||
NvU64 gpFifoGpuVa;
|
||||
NvU64 gpPutGpuVa;
|
||||
NvU64 gpGetGpuVa;
|
||||
// GPU VA of work submission offset is needed in Confidential Computing
|
||||
// so CE channels can ring doorbell of other channels as required for
|
||||
// WLC/LCIC work submission
|
||||
NvU64 workSubmissionOffsetGpuVa;
|
||||
} UvmGpuChannelInfo;
|
||||
|
||||
typedef enum
|
||||
@@ -292,6 +303,17 @@ typedef enum
|
||||
UVM_BUFFER_LOCATION_VID = 2,
|
||||
} UVM_BUFFER_LOCATION;
|
||||
|
||||
typedef struct UvmGpuTsgAllocParams_tag
|
||||
{
|
||||
// Interpreted as UVM_GPU_CHANNEL_ENGINE_TYPE
|
||||
NvU32 engineType;
|
||||
|
||||
// Index of the engine the TSG is bound to.
|
||||
// Ignored if engineType is anything other than
|
||||
// UVM_GPU_CHANNEL_ENGINE_TYPE_CE.
|
||||
NvU32 engineIndex;
|
||||
} UvmGpuTsgAllocParams;
|
||||
|
||||
typedef struct UvmGpuChannelAllocParams_tag
|
||||
{
|
||||
NvU32 numGpFifoEntries;
|
||||
@@ -299,14 +321,6 @@ typedef struct UvmGpuChannelAllocParams_tag
|
||||
// The next two fields store UVM_BUFFER_LOCATION values
|
||||
NvU32 gpFifoLoc;
|
||||
NvU32 gpPutLoc;
|
||||
|
||||
// Index of the engine the channel will be bound to
|
||||
// ignored if engineType is anything other than UVM_GPU_CHANNEL_ENGINE_TYPE_CE
|
||||
NvU32 engineIndex;
|
||||
|
||||
// interpreted as UVM_GPU_CHANNEL_ENGINE_TYPE
|
||||
NvU32 engineType;
|
||||
|
||||
} UvmGpuChannelAllocParams;
|
||||
|
||||
typedef struct UvmGpuPagingChannelAllocParams_tag
|
||||
@@ -372,39 +386,16 @@ typedef enum
|
||||
UVM_LINK_TYPE_NVLINK_2,
|
||||
UVM_LINK_TYPE_NVLINK_3,
|
||||
UVM_LINK_TYPE_NVLINK_4,
|
||||
UVM_LINK_TYPE_C2C,
|
||||
} UVM_LINK_TYPE;
|
||||
|
||||
typedef struct UvmGpuCaps_tag
|
||||
{
|
||||
NvU32 sysmemLink; // UVM_LINK_TYPE
|
||||
NvU32 sysmemLinkRateMBps; // See UvmGpuP2PCapsParams::totalLinkLineRateMBps
|
||||
// If numaEnabled is NV_TRUE, then the system address of allocated GPU
|
||||
// memory can be converted to struct pages. See
|
||||
// UvmGpuInfo::systemMemoryWindowStart.
|
||||
NvBool numaEnabled;
|
||||
NvU32 numaNodeId;
|
||||
|
||||
// On ATS systems, GPUs connected to different CPU sockets can have peer
|
||||
// traffic. They are called indirect peers. However, indirect peers are
|
||||
// mapped using sysmem aperture. In order to disambiguate the location of a
|
||||
// specific memory address, each GPU maps its memory to a different window
|
||||
// in the System Physical Address (SPA) space. The following fields contain
|
||||
// the base + size of such window for the GPU. systemMemoryWindowSize
|
||||
// different than 0 indicates that the window is valid.
|
||||
//
|
||||
// - If the window is valid, then we can map GPU memory to the CPU as
|
||||
// cache-coherent by adding the GPU address to the window start.
|
||||
// - If numaEnabled is NV_TRUE, then we can also convert the system
|
||||
// addresses of allocated GPU memory to struct pages.
|
||||
//
|
||||
// TODO: Bug 1986868: fix window start computation for SIMICS
|
||||
NvU64 systemMemoryWindowStart;
|
||||
NvU64 systemMemoryWindowSize;
|
||||
|
||||
// This tells if the GPU is connected to NVSwitch. On systems with NVSwitch
|
||||
// all GPUs are connected to it. If connectedToSwitch is NV_TRUE,
|
||||
// nvswitchMemoryWindowStart tells the base address for the GPU in the
|
||||
// NVSwitch address space. It is used when creating PTEs of memory mappings
|
||||
// to NVSwitch peers.
|
||||
NvBool connectedToSwitch;
|
||||
NvU64 nvswitchMemoryWindowStart;
|
||||
} UvmGpuCaps;
|
||||
|
||||
typedef struct UvmGpuAddressSpaceInfo_tag
|
||||
@@ -436,6 +427,8 @@ typedef struct UvmGpuAllocInfo_tag
|
||||
NvBool bMemGrowsDown; // Causes RM to reserve physical heap from top of FB
|
||||
NvBool bPersistentVidmem; // Causes RM to allocate persistent video memory
|
||||
NvHandle hPhysHandle; // Handle for phys allocation either provided or retrieved
|
||||
NvBool bUnprotected; // Allocation to be made in unprotected memory whenever
|
||||
// SEV or GPU CC modes are enabled. Ignored otherwise
|
||||
} UvmGpuAllocInfo;
|
||||
|
||||
typedef enum
|
||||
@@ -573,8 +566,11 @@ typedef struct UvmPlatformInfo_tag
|
||||
// Out: ATS (Address Translation Services) is supported
|
||||
NvBool atsSupported;
|
||||
|
||||
// Out: AMD SEV (Secure Encrypted Virtualization) is enabled
|
||||
NvBool sevEnabled;
|
||||
// Out: True if HW trusted execution, such as AMD's SEV-SNP or Intel's TDX,
|
||||
// is enabled in the VM, indicating that Confidential Computing must be
|
||||
// also enabled in the GPU(s); these two security features are either both
|
||||
// enabled, or both disabled.
|
||||
NvBool confComputingEnabled;
|
||||
} UvmPlatformInfo;
|
||||
|
||||
typedef struct UvmGpuClientInfo_tag
|
||||
@@ -584,6 +580,20 @@ typedef struct UvmGpuClientInfo_tag
|
||||
NvHandle hSmcPartRef;
|
||||
} UvmGpuClientInfo;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_GPU_CONF_COMPUTE_MODE_NONE,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_APM,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_HCC,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_COUNT
|
||||
} UvmGpuConfComputeMode;
|
||||
|
||||
typedef struct UvmGpuConfComputeCaps_tag
|
||||
{
|
||||
// Out: GPU's confidential compute mode
|
||||
UvmGpuConfComputeMode mode;
|
||||
} UvmGpuConfComputeCaps;
|
||||
|
||||
#define UVM_GPU_NAME_LENGTH 0x40
|
||||
|
||||
typedef struct UvmGpuInfo_tag
|
||||
@@ -648,6 +658,31 @@ typedef struct UvmGpuInfo_tag
|
||||
|
||||
UvmGpuClientInfo smcUserClientInfo;
|
||||
|
||||
// Confidential Compute capabilities of this GPU
|
||||
UvmGpuConfComputeCaps gpuConfComputeCaps;
|
||||
|
||||
// UVM_LINK_TYPE
|
||||
NvU32 sysmemLink;
|
||||
|
||||
// See UvmGpuP2PCapsParams::totalLinkLineRateMBps
|
||||
NvU32 sysmemLinkRateMBps;
|
||||
|
||||
// On coherent systems each GPU maps its memory to a window in the System
|
||||
// Physical Address (SPA) space. The following fields describe that window.
|
||||
//
|
||||
// systemMemoryWindowSize > 0 indicates that the window is valid. meaning
|
||||
// that GPU memory can be mapped by the CPU as cache-coherent by adding the
|
||||
// GPU address to the window start.
|
||||
NvU64 systemMemoryWindowStart;
|
||||
NvU64 systemMemoryWindowSize;
|
||||
|
||||
// This tells if the GPU is connected to NVSwitch. On systems with NVSwitch
|
||||
// all GPUs are connected to it. If connectedToSwitch is NV_TRUE,
|
||||
// nvswitchMemoryWindowStart tells the base address for the GPU in the
|
||||
// NVSwitch address space. It is used when creating PTEs of memory mappings
|
||||
// to NVSwitch peers.
|
||||
NvBool connectedToSwitch;
|
||||
NvU64 nvswitchMemoryWindowStart;
|
||||
} UvmGpuInfo;
|
||||
|
||||
typedef struct UvmGpuFbInfo_tag
|
||||
@@ -690,6 +725,9 @@ typedef struct UvmPmaStatistics_tag
|
||||
volatile NvU64 numPages2m; // PMA-wide 2MB pages count across all regions
|
||||
volatile NvU64 numFreePages64k; // PMA-wide free 64KB page count across all regions
|
||||
volatile NvU64 numFreePages2m; // PMA-wide free 2MB pages count across all regions
|
||||
volatile NvU64 numPages2mProtected; // PMA-wide 2MB pages count in protected memory
|
||||
volatile NvU64 numFreePages64kProtected; // PMA-wide free 64KB page count in protected memory
|
||||
volatile NvU64 numFreePages2mProtected; // PMA-wide free 2MB pages count in protected memory
|
||||
} UvmPmaStatistics;
|
||||
|
||||
/*******************************************************************************
|
||||
@@ -797,24 +835,92 @@ struct UvmOpsUvmEvents
|
||||
#endif
|
||||
};
|
||||
|
||||
#define UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES 32
|
||||
#define UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES 16
|
||||
|
||||
typedef union UvmFaultMetadataPacket_tag
|
||||
{
|
||||
struct {
|
||||
NvU8 authTag[UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES];
|
||||
NvBool valid;
|
||||
};
|
||||
// padding to 32Bytes
|
||||
NvU8 _padding[32];
|
||||
} UvmFaultMetadataPacket;
|
||||
|
||||
// This struct shall not be accessed nor modified directly by UVM as it is
|
||||
// entirely managed by the RM layer
|
||||
typedef struct UvmCslContext_tag
|
||||
{
|
||||
struct ccslContext_t *ctx;
|
||||
void *nvidia_stack;
|
||||
} UvmCslContext;
|
||||
|
||||
typedef struct UvmGpuFaultInfo_tag
|
||||
{
|
||||
struct
|
||||
{
|
||||
// Register mappings obtained from RM
|
||||
// Fault buffer GET register mapping.
|
||||
//
|
||||
// When Confidential Computing is enabled, GET refers to the shadow
|
||||
// buffer (see bufferAddress below), and not to the actual HW buffer.
|
||||
// In this setup, writes of GET (by UVM) do not result on re-evaluation
|
||||
// of any interrupt condition.
|
||||
volatile NvU32* pFaultBufferGet;
|
||||
|
||||
// Fault buffer PUT register mapping.
|
||||
//
|
||||
// When Confidential Computing is enabled, PUT refers to the shadow
|
||||
// buffer (see bufferAddress below), and not to the actual HW buffer.
|
||||
// In this setup, writes of PUT (by GSP-RM) do not result on
|
||||
// re-evaluation of any interrupt condition.
|
||||
volatile NvU32* pFaultBufferPut;
|
||||
// Note: this variable is deprecated since buffer overflow is not a separate
|
||||
// register from future chips.
|
||||
|
||||
// Note: this variable is deprecated since buffer overflow is not a
|
||||
// separate register from future chips.
|
||||
volatile NvU32* pFaultBufferInfo;
|
||||
|
||||
// Register mapping used to clear a replayable fault interrupt in
|
||||
// Turing+ GPUs.
|
||||
volatile NvU32* pPmcIntr;
|
||||
|
||||
// Register mapping used to enable replayable fault interrupts.
|
||||
volatile NvU32* pPmcIntrEnSet;
|
||||
|
||||
// Register mapping used to disable replayable fault interrupts.
|
||||
volatile NvU32* pPmcIntrEnClear;
|
||||
|
||||
// Register used to enable, or disable, faults on prefetches.
|
||||
volatile NvU32* pPrefetchCtrl;
|
||||
|
||||
// Replayable fault interrupt mask identifier.
|
||||
NvU32 replayableFaultMask;
|
||||
// fault buffer cpu mapping and size
|
||||
void* bufferAddress;
|
||||
|
||||
// Fault buffer CPU mapping
|
||||
void* bufferAddress;
|
||||
//
|
||||
// When Confidential Computing is disabled, the mapping points to the
|
||||
// actual HW fault buffer.
|
||||
//
|
||||
// When Confidential Computing is enabled, the mapping points to a
|
||||
// copy of the HW fault buffer. This "shadow buffer" is maintained
|
||||
// by GSP-RM.
|
||||
|
||||
// Size, in bytes, of the fault buffer pointed by bufferAddress.
|
||||
NvU32 bufferSize;
|
||||
// Mapping pointing to the start of the fault buffer metadata containing
|
||||
// a 16Byte authentication tag and a valid byte. Always NULL when
|
||||
// Confidential Computing is disabled.
|
||||
UvmFaultMetadataPacket *bufferMetadata;
|
||||
|
||||
// CSL context used for performing decryption of replayable faults when
|
||||
// Confidential Computing is enabled.
|
||||
UvmCslContext cslCtx;
|
||||
|
||||
// Indicates whether UVM owns the replayable fault buffer.
|
||||
// The value of this field is always NV_TRUE When Confidential Computing
|
||||
// is disabled.
|
||||
NvBool bUvmOwnsHwFaultBuffer;
|
||||
} replayable;
|
||||
struct
|
||||
{
|
||||
@@ -834,8 +940,19 @@ typedef struct UvmGpuFaultInfo_tag
|
||||
// Preallocated stack for functions called from the UVM isr bottom half
|
||||
void *isr_bh_sp;
|
||||
|
||||
// Used only when Hopper Confidential Compute is enabled
|
||||
// Register mappings obtained from RM
|
||||
volatile NvU32* pFaultBufferPut;
|
||||
|
||||
// Used only when Hopper Confidential Compute is enabled
|
||||
// Cached get index of the non-replayable shadow buffer
|
||||
NvU32 shadowBufferGet;
|
||||
|
||||
// See replayable.bufferMetadata
|
||||
UvmFaultMetadataPacket *shadowBufferMetadata;
|
||||
} nonReplayable;
|
||||
NvHandle faultBufferHandle;
|
||||
struct Device *pDevice;
|
||||
} UvmGpuFaultInfo;
|
||||
|
||||
struct Device;
|
||||
@@ -871,12 +988,6 @@ typedef struct UvmGpuAccessCntrInfo_tag
|
||||
void* bufferAddress;
|
||||
NvU32 bufferSize;
|
||||
NvHandle accessCntrBufferHandle;
|
||||
|
||||
// The Notification address in the access counter notification msg does not
|
||||
// contain the correct upper bits 63-47 for GPA-based notifications. RM
|
||||
// provides us with the correct offset to be added.
|
||||
// See Bug 1803015
|
||||
NvU64 baseDmaSysmemAddr;
|
||||
} UvmGpuAccessCntrInfo;
|
||||
|
||||
typedef enum
|
||||
@@ -919,6 +1030,7 @@ typedef enum UvmPmaGpuMemoryType_tag
|
||||
} UVM_PMA_GPU_MEMORY_TYPE;
|
||||
|
||||
typedef UvmGpuChannelInfo gpuChannelInfo;
|
||||
typedef UvmGpuTsgAllocParams gpuTsgAllocParams;
|
||||
typedef UvmGpuChannelAllocParams gpuChannelAllocParams;
|
||||
typedef UvmGpuCaps gpuCaps;
|
||||
typedef UvmGpuCopyEngineCaps gpuCeCaps;
|
||||
@@ -943,4 +1055,16 @@ typedef UvmGpuPagingChannelInfo gpuPagingChannelInfo;
|
||||
typedef UvmGpuPagingChannelAllocParams gpuPagingChannelAllocParams;
|
||||
typedef UvmPmaAllocationOptions gpuPmaAllocationOptions;
|
||||
|
||||
typedef struct UvmCslIv
|
||||
{
|
||||
NvU8 iv[12];
|
||||
NvU8 fresh;
|
||||
} UvmCslIv;
|
||||
|
||||
typedef enum UvmCslOperation
|
||||
{
|
||||
UVM_CSL_OPERATION_ENCRYPT,
|
||||
UVM_CSL_OPERATION_DECRYPT
|
||||
} UvmCslOperation;
|
||||
|
||||
#endif // _NV_UVM_TYPES_H_
|
||||
|
||||
@@ -165,8 +165,6 @@ struct NvKmsKapiConnectorInfo {
|
||||
|
||||
NvU32 physicalIndex;
|
||||
|
||||
NvU32 headMask;
|
||||
|
||||
NvKmsConnectorSignalFormat signalFormat;
|
||||
NvKmsConnectorType type;
|
||||
|
||||
@@ -194,6 +192,7 @@ struct NvKmsKapiStaticDisplayInfo {
|
||||
NvU32 numPossibleClones;
|
||||
NvKmsKapiDisplay possibleCloneHandles[NVKMS_KAPI_MAX_CLONE_DISPLAYS];
|
||||
|
||||
NvU32 headMask;
|
||||
};
|
||||
|
||||
struct NvKmsKapiSyncpt {
|
||||
@@ -1074,6 +1073,21 @@ struct NvKmsKapiFunctionsTable {
|
||||
NvU64 *pPages
|
||||
);
|
||||
|
||||
/*!
|
||||
* Check if this memory object can be scanned out for display.
|
||||
*
|
||||
* \param [in] device A device allocated using allocateDevice().
|
||||
*
|
||||
* \param [in] memory The memory object to check for display support.
|
||||
*
|
||||
* \return NV_TRUE if this memory can be displayed, NV_FALSE if not.
|
||||
*/
|
||||
NvBool (*isMemoryValidForDisplay)
|
||||
(
|
||||
const struct NvKmsKapiDevice *device,
|
||||
const struct NvKmsKapiMemory *memory
|
||||
);
|
||||
|
||||
/*
|
||||
* Import SGT as a memory handle.
|
||||
*
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
|
||||
//
|
||||
// This file was generated with FINN, an NVIDIA coding tool.
|
||||
// Source file: nvlimits.finn
|
||||
// Source file: nvlimits.finn
|
||||
//
|
||||
|
||||
|
||||
|
||||
@@ -149,6 +149,7 @@ NV_STATUS_CODE(NV_ERR_NVLINK_TRAINING_ERROR, 0x00000077, "Nvlink Train
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_CONFIGURATION_ERROR, 0x00000078, "Nvlink Configuration Error")
|
||||
NV_STATUS_CODE(NV_ERR_RISCV_ERROR, 0x00000079, "Generic RISC-V assert or halt")
|
||||
NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT, 0x0000007A, "Fabric Manager is not loaded")
|
||||
NV_STATUS_CODE(NV_ERR_ALREADY_SIGNALLED, 0x0000007B, "Semaphore Surface value already >= requested wait value")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
|
||||
@@ -513,6 +513,12 @@ typedef struct
|
||||
// place to re-locate these from nvos.h which cannot be included by a number
|
||||
// of builds that need them
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
|
||||
#define NV_ATTRIBUTE_UNUSED __attribute__((__unused__))
|
||||
#else
|
||||
#define NV_ATTRIBUTE_UNUSED
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
#if _MSC_VER >= 1310
|
||||
@@ -536,8 +542,6 @@ typedef struct
|
||||
|
||||
#define NV_FORCERESULTCHECK
|
||||
|
||||
#define NV_ATTRIBUTE_UNUSED
|
||||
|
||||
#define NV_FORMAT_PRINTF(_f, _a)
|
||||
|
||||
#else // ! defined(_MSC_VER)
|
||||
@@ -635,12 +639,6 @@ typedef struct
|
||||
#define NV_FORCERESULTCHECK
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
|
||||
#define NV_ATTRIBUTE_UNUSED __attribute__((__unused__))
|
||||
#else
|
||||
#define NV_ATTRIBUTE_UNUSED
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Functions decorated with NV_FORMAT_PRINTF(f, a) have a format string at
|
||||
* parameter number 'f' and variadic arguments start at parameter number 'a'.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -188,7 +188,7 @@ NV_STATUS NV_API_CALL os_read_file (void *, NvU8 *, NvU64, NvU
|
||||
NV_STATUS NV_API_CALL os_open_readonly_file (const char *, void **);
|
||||
NV_STATUS NV_API_CALL os_open_and_read_file (const char *, NvU8 *, NvU64);
|
||||
NvBool NV_API_CALL os_is_nvswitch_present (void);
|
||||
void NV_API_CALL os_get_random_bytes (NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL os_get_random_bytes (NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL os_alloc_wait_queue (os_wait_queue **);
|
||||
void NV_API_CALL os_free_wait_queue (os_wait_queue *);
|
||||
void NV_API_CALL os_wait_uninterruptible (os_wait_queue *);
|
||||
@@ -207,12 +207,15 @@ enum os_pci_req_atomics_type {
|
||||
OS_INTF_PCIE_REQ_ATOMICS_128BIT
|
||||
};
|
||||
NV_STATUS NV_API_CALL os_enable_pci_req_atomics (void *, enum os_pci_req_atomics_type);
|
||||
NV_STATUS NV_API_CALL os_numa_add_gpu_memory (void *, NvU64, NvU64, NvU32 *);
|
||||
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory (void *, NvU64, NvU64, NvU32);
|
||||
NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
|
||||
|
||||
extern NvU32 os_page_size;
|
||||
extern NvU64 os_page_mask;
|
||||
extern NvU8 os_page_shift;
|
||||
extern NvU32 os_sev_status;
|
||||
extern NvBool os_sev_enabled;
|
||||
extern NvBool os_cc_enabled;
|
||||
extern NvBool os_cc_tdx_enabled;
|
||||
extern NvBool os_dma_buf_enabled;
|
||||
|
||||
/*
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -56,7 +56,9 @@ NV_STATUS NV_API_CALL rm_gpu_ops_get_p2p_caps(nvidia_stack_t *, nvgpuDeviceHan
|
||||
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_memory_cpu_map(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, NvLength, void **, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_memory_cpu_ummap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, void*);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_channel_allocate(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, const nvgpuChannelAllocParams_t *, nvgpuChannelHandle_t *, nvgpuChannelInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_tsg_allocate(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, const nvgpuTsgAllocParams_t *, nvgpuTsgHandle_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_tsg_destroy(nvidia_stack_t *, nvgpuTsgHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_channel_allocate(nvidia_stack_t *, const nvgpuTsgHandle_t, const nvgpuChannelAllocParams_t *, nvgpuChannelHandle_t *, nvgpuChannelInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_channel_destroy(nvidia_stack_t *, nvgpuChannelHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_memory_free(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_query_caps(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuCaps_t);
|
||||
@@ -76,7 +78,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_destroy_fault_info(nvidia_stack_t *, nvgpuDevi
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, void *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_own_access_cntr_intr(nvidia_stack_t *, nvgpuSessionHandle_t, nvgpuAccessCntrInfo_t, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, nvgpuAccessCntrConfig_t);
|
||||
@@ -99,4 +101,14 @@ NV_STATUS NV_API_CALL rm_gpu_ops_paging_channels_map(nvidia_stack_t *, nvgpuAdd
|
||||
void NV_API_CALL rm_gpu_ops_paging_channels_unmap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, nvgpuPagingChannelHandle_t, char *, NvU32);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt_with_iv(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8*, NvU8 *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU8 *, NvU8 const *, NvU32, NvU8 const *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_sign(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_query_message_pool(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_increment_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64, NvU8 *);
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -44,6 +44,8 @@
|
||||
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
#include <linux/nvhost.h>
|
||||
#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)
|
||||
#include <linux/host1x-next.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
@@ -361,6 +363,21 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
|
||||
if (nv_drm_plane_state->fd_user_ptr) {
|
||||
req_config->config.syncptParams.postSyncptRequested = true;
|
||||
}
|
||||
#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)
|
||||
if (plane_state->fence != NULL) {
|
||||
int ret = host1x_fence_extract(
|
||||
plane_state->fence,
|
||||
&req_config->config.syncptParams.preSyncptId,
|
||||
&req_config->config.syncptParams.preSyncptValue);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
req_config->config.syncptParams.preSyncptSpecified = true;
|
||||
}
|
||||
|
||||
if (nv_drm_plane_state->fd_user_ptr) {
|
||||
req_config->config.syncptParams.postSyncptRequested = true;
|
||||
}
|
||||
#else
|
||||
return -1;
|
||||
|
||||
@@ -1367,8 +1367,23 @@ static struct drm_driver nv_drm_driver = {
|
||||
.ioctls = nv_drm_ioctls,
|
||||
.num_ioctls = ARRAY_SIZE(nv_drm_ioctls),
|
||||
|
||||
/*
|
||||
* linux-next commit 71a7974ac701 ("drm/prime: Unexport helpers for fd/handle
|
||||
* conversion") unexports drm_gem_prime_handle_to_fd() and
|
||||
* drm_gem_prime_fd_to_handle().
|
||||
*
|
||||
* Prior linux-next commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for
|
||||
* all drivers") made these helpers the default when .prime_handle_to_fd /
|
||||
* .prime_fd_to_handle are unspecified, so it's fine to just skip specifying
|
||||
* them if the helpers aren't present.
|
||||
*/
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_handle_to_fd
|
||||
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
|
||||
#endif
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_fd_to_handle
|
||||
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
|
||||
#endif
|
||||
|
||||
.gem_prime_import = nv_drm_gem_prime_import,
|
||||
.gem_prime_import_sg_table = nv_drm_gem_prime_import_sg_table,
|
||||
|
||||
@@ -1436,7 +1451,9 @@ static void nv_drm_update_drm_driver_features(void)
|
||||
|
||||
nv_drm_driver.dumb_create = nv_drm_dumb_create;
|
||||
nv_drm_driver.dumb_map_offset = nv_drm_dumb_map_offset;
|
||||
#if defined(NV_DRM_DRIVER_HAS_DUMB_DESTROY)
|
||||
nv_drm_driver.dumb_destroy = nv_drm_dumb_destroy;
|
||||
#endif /* NV_DRM_DRIVER_HAS_DUMB_DESTROY */
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
}
|
||||
|
||||
|
||||
@@ -205,7 +205,7 @@ nv_drm_add_encoder(struct drm_device *dev, NvKmsKapiDisplay hDisplay)
|
||||
encoder = nv_drm_encoder_new(dev,
|
||||
displayInfo->handle,
|
||||
connectorInfo->signalFormat,
|
||||
get_crtc_mask(dev, connectorInfo->headMask));
|
||||
get_crtc_mask(dev, displayInfo->headMask));
|
||||
|
||||
if (IS_ERR(encoder)) {
|
||||
ret = PTR_ERR(encoder);
|
||||
|
||||
@@ -150,6 +150,14 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(nv_fb->nv_gem); i++) {
|
||||
if (nv_fb->nv_gem[i] != NULL) {
|
||||
if (!nvKms->isMemoryValidForDisplay(nv_dev->pDevice,
|
||||
nv_fb->nv_gem[i]->pMemory)) {
|
||||
NV_DRM_DEV_LOG_INFO(
|
||||
nv_dev,
|
||||
"Framebuffer memory not appropriate for scanout");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
params.planes[i].memory = nv_fb->nv_gem[i]->pMemory;
|
||||
params.planes[i].offset = nv_fb->base.offsets[i];
|
||||
params.planes[i].pitch = nv_fb->base.pitches[i];
|
||||
@@ -164,6 +172,17 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
|
||||
params.layout = (modifier & 0x10) ?
|
||||
NvKmsSurfaceMemoryLayoutBlockLinear :
|
||||
NvKmsSurfaceMemoryLayoutPitch;
|
||||
|
||||
// See definition of DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D, we are testing
|
||||
// 'c', the lossless compression field of the modifier
|
||||
if (params.layout == NvKmsSurfaceMemoryLayoutBlockLinear &&
|
||||
(modifier >> 23) & 0x7) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Cannot create FB from compressible surface allocation");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
params.log2GobsPerBlockY = modifier & 0xf;
|
||||
} else {
|
||||
params.explicit_layout = false;
|
||||
@@ -174,11 +193,14 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
|
||||
nv_fb->pSurface = nvKms->createSurface(nv_dev->pDevice, ¶ms);
|
||||
if (nv_fb->pSurface == NULL) {
|
||||
NV_DRM_DEV_DEBUG_DRIVER(nv_dev, "Failed to create NvKmsKapiSurface");
|
||||
drm_framebuffer_cleanup(&nv_fb->base);
|
||||
return -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
drm_framebuffer_cleanup(&nv_fb->base);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
struct drm_framebuffer *nv_drm_internal_framebuffer_create(
|
||||
|
||||
@@ -516,6 +516,7 @@ int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
|
||||
struct nv_drm_gem_fence_context *nv_gem_fence_context;
|
||||
|
||||
nv_dma_fence_t *fence;
|
||||
nv_dma_resv_t *resv;
|
||||
|
||||
nv_gem = nv_drm_gem_object_lookup(nv_dev->dev, filep, p->handle);
|
||||
|
||||
@@ -566,18 +567,20 @@ int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
|
||||
goto fence_context_create_fence_failed;
|
||||
}
|
||||
|
||||
nv_dma_resv_lock(&nv_gem->resv, NULL);
|
||||
resv = nv_drm_gem_res_obj(nv_gem);
|
||||
|
||||
ret = nv_dma_resv_reserve_fences(&nv_gem->resv, 1, false);
|
||||
nv_dma_resv_lock(resv, NULL);
|
||||
|
||||
ret = nv_dma_resv_reserve_fences(resv, 1, false);
|
||||
if (ret == 0) {
|
||||
nv_dma_resv_add_excl_fence(&nv_gem->resv, fence);
|
||||
nv_dma_resv_add_excl_fence(resv, fence);
|
||||
} else {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Failed to reserve fence. Error code: %d", ret);
|
||||
}
|
||||
|
||||
nv_dma_resv_unlock(&nv_gem->resv);
|
||||
nv_dma_resv_unlock(resv);
|
||||
|
||||
/* dma_resv_add_excl_fence takes its own reference to the fence. */
|
||||
nv_dma_fence_put(fence);
|
||||
|
||||
@@ -131,11 +131,11 @@ static struct drm_gem_object *__nv_drm_gem_nvkms_prime_dup(
|
||||
const struct nv_drm_gem_object *nv_gem_src);
|
||||
|
||||
static int __nv_drm_gem_nvkms_map(
|
||||
struct nv_drm_device *nv_dev,
|
||||
struct NvKmsKapiMemory *pMemory,
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory,
|
||||
uint64_t size)
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = nv_nvkms_memory->base.nv_dev;
|
||||
struct NvKmsKapiMemory *pMemory = nv_nvkms_memory->base.pMemory;
|
||||
|
||||
if (!nv_dev->hasVideoMemory) {
|
||||
return 0;
|
||||
}
|
||||
@@ -153,7 +153,7 @@ static int __nv_drm_gem_nvkms_map(
|
||||
|
||||
nv_nvkms_memory->pWriteCombinedIORemapAddress = ioremap_wc(
|
||||
(uintptr_t)nv_nvkms_memory->pPhysicalAddress,
|
||||
size);
|
||||
nv_nvkms_memory->base.base.size);
|
||||
|
||||
if (!nv_nvkms_memory->pWriteCombinedIORemapAddress) {
|
||||
NV_DRM_DEV_LOG_INFO(
|
||||
@@ -167,6 +167,22 @@ static int __nv_drm_gem_nvkms_map(
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *__nv_drm_gem_nvkms_prime_vmap(
|
||||
struct nv_drm_gem_object *nv_gem)
|
||||
{
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
|
||||
to_nv_nvkms_memory(nv_gem);
|
||||
|
||||
if (!nv_nvkms_memory->physically_mapped) {
|
||||
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
|
||||
if (ret) {
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
}
|
||||
|
||||
return nv_nvkms_memory->pWriteCombinedIORemapAddress;
|
||||
}
|
||||
|
||||
static int __nv_drm_gem_map_nvkms_memory_offset(
|
||||
struct nv_drm_device *nv_dev,
|
||||
struct nv_drm_gem_object *nv_gem,
|
||||
@@ -176,10 +192,7 @@ static int __nv_drm_gem_map_nvkms_memory_offset(
|
||||
to_nv_nvkms_memory(nv_gem);
|
||||
|
||||
if (!nv_nvkms_memory->physically_mapped) {
|
||||
int ret = __nv_drm_gem_nvkms_map(nv_dev,
|
||||
nv_nvkms_memory->base.pMemory,
|
||||
nv_nvkms_memory,
|
||||
nv_nvkms_memory->base.base.size);
|
||||
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -201,7 +214,7 @@ static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
|
||||
nv_dev,
|
||||
"Cannot create sg_table for NvKmsKapiMemory 0x%p",
|
||||
nv_gem->pMemory);
|
||||
return NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
sg_table = nv_drm_prime_pages_to_sg(nv_dev->dev,
|
||||
@@ -214,6 +227,7 @@ static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
|
||||
const struct nv_drm_gem_object_funcs nv_gem_nvkms_memory_ops = {
|
||||
.free = __nv_drm_gem_nvkms_memory_free,
|
||||
.prime_dup = __nv_drm_gem_nvkms_prime_dup,
|
||||
.prime_vmap = __nv_drm_gem_nvkms_prime_vmap,
|
||||
.mmap = __nv_drm_gem_nvkms_mmap,
|
||||
.handle_vma_fault = __nv_drm_gem_nvkms_handle_vma_fault,
|
||||
.create_mmap_offset = __nv_drm_gem_map_nvkms_memory_offset,
|
||||
@@ -314,7 +328,7 @@ int nv_drm_dumb_create(
|
||||
* to use dumb buffers for software rendering, so they're not much use
|
||||
* without a CPU mapping.
|
||||
*/
|
||||
ret = __nv_drm_gem_nvkms_map(nv_dev, pMemory, nv_nvkms_memory, args->size);
|
||||
ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
|
||||
if (ret) {
|
||||
nv_drm_gem_object_unreference_unlocked(&nv_nvkms_memory->base);
|
||||
goto fail;
|
||||
@@ -583,11 +597,13 @@ int nv_drm_dumb_map_offset(struct drm_file *file,
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_DRIVER_HAS_DUMB_DESTROY)
|
||||
int nv_drm_dumb_destroy(struct drm_file *file,
|
||||
struct drm_device *dev,
|
||||
uint32_t handle)
|
||||
{
|
||||
return drm_gem_handle_delete(file, handle);
|
||||
}
|
||||
#endif /* NV_DRM_DRIVER_HAS_DUMB_DESTROY */
|
||||
|
||||
#endif
|
||||
|
||||
@@ -97,9 +97,11 @@ int nv_drm_dumb_map_offset(struct drm_file *file,
|
||||
struct drm_device *dev, uint32_t handle,
|
||||
uint64_t *offset);
|
||||
|
||||
#if defined(NV_DRM_DRIVER_HAS_DUMB_DESTROY)
|
||||
int nv_drm_dumb_destroy(struct drm_file *file,
|
||||
struct drm_device *dev,
|
||||
uint32_t handle);
|
||||
#endif /* NV_DRM_DRIVER_HAS_DUMB_DESTROY */
|
||||
|
||||
struct drm_gem_object *nv_drm_gem_nvkms_prime_import(
|
||||
struct drm_device *dev,
|
||||
|
||||
@@ -92,9 +92,9 @@ static int __nv_drm_gem_user_memory_mmap(struct nv_drm_gem_object *nv_gem,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
vma->vm_flags &= ~VM_PFNMAP;
|
||||
vma->vm_flags &= ~VM_IO;
|
||||
vma->vm_flags |= VM_MIXEDMAP;
|
||||
nv_vm_flags_clear(vma, VM_PFNMAP);
|
||||
nv_vm_flags_clear(vma, VM_IO);
|
||||
nv_vm_flags_set(vma, VM_MIXEDMAP);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -81,10 +81,13 @@ typedef struct dma_buf_map nv_sysio_map_t;
|
||||
static int nv_drm_gem_vmap(struct drm_gem_object *gem,
|
||||
nv_sysio_map_t *map)
|
||||
{
|
||||
map->vaddr = nv_drm_gem_prime_vmap(gem);
|
||||
if (map->vaddr == NULL) {
|
||||
void *vaddr = nv_drm_gem_prime_vmap(gem);
|
||||
if (vaddr == NULL) {
|
||||
return -ENOMEM;
|
||||
} else if (IS_ERR(vaddr)) {
|
||||
return PTR_ERR(vaddr);
|
||||
}
|
||||
map->vaddr = vaddr;
|
||||
map->is_iomem = true;
|
||||
return 0;
|
||||
}
|
||||
@@ -132,13 +135,8 @@ void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
|
||||
|
||||
/* Initialize the gem object */
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE) && !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
|
||||
nv_dma_resv_init(&nv_gem->resv);
|
||||
|
||||
#if defined(NV_DRM_GEM_OBJECT_HAS_RESV)
|
||||
nv_gem->base.resv = &nv_gem->resv;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(NV_DRM_DRIVER_HAS_GEM_FREE_OBJECT)
|
||||
@@ -212,8 +210,7 @@ void nv_drm_gem_prime_vunmap(struct drm_gem_object *gem, void *address)
|
||||
nv_dma_resv_t* nv_drm_gem_prime_res_obj(struct drm_gem_object *obj)
|
||||
{
|
||||
struct nv_drm_gem_object *nv_gem = to_nv_gem_object(obj);
|
||||
|
||||
return &nv_gem->resv;
|
||||
return nv_drm_gem_res_obj(nv_gem);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -299,7 +296,7 @@ int nv_drm_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
vma->vm_flags &= ~VM_MAYWRITE;
|
||||
nv_vm_flags_clear(vma, VM_MAYWRITE);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -45,6 +45,8 @@
|
||||
#include "nvidia-dma-resv-helper.h"
|
||||
#endif
|
||||
|
||||
#include "linux/dma-buf.h"
|
||||
|
||||
struct nv_drm_gem_object;
|
||||
|
||||
struct nv_drm_gem_object_funcs {
|
||||
@@ -71,7 +73,7 @@ struct nv_drm_gem_object {
|
||||
|
||||
struct NvKmsKapiMemory *pMemory;
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE) && !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
|
||||
nv_dma_resv_t resv;
|
||||
#endif
|
||||
};
|
||||
@@ -177,6 +179,17 @@ static inline int nv_drm_gem_handle_create(struct drm_file *filp,
|
||||
return drm_gem_handle_create(filp, &nv_gem->base, handle);
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
static inline nv_dma_resv_t *nv_drm_gem_res_obj(struct nv_drm_gem_object *nv_gem)
|
||||
{
|
||||
#if defined(NV_DRM_GEM_OBJECT_HAS_RESV)
|
||||
return nv_gem->base.resv;
|
||||
#else
|
||||
return nv_gem->base.dma_buf ? nv_gem->base.dma_buf->resv : &nv_gem->resv;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
|
||||
struct nv_drm_gem_object *nv_gem,
|
||||
const struct nv_drm_gem_object_funcs * const ops,
|
||||
|
||||
@@ -54,6 +54,8 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_atomic_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_inc
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_dec_and_test
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_alpha_blending_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_fd_to_handle
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_handle_to_fd
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
|
||||
@@ -129,3 +131,5 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_lookup
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
|
||||
|
||||
@@ -301,7 +301,7 @@ static void _q_flush_function(void *args)
|
||||
static void _raw_q_flush(nv_kthread_q_t *q)
|
||||
{
|
||||
nv_kthread_q_item_t q_item;
|
||||
DECLARE_COMPLETION(completion);
|
||||
DECLARE_COMPLETION_ONSTACK(completion);
|
||||
|
||||
nv_kthread_q_item_init(&q_item, _q_flush_function, &completion);
|
||||
|
||||
|
||||
@@ -65,6 +65,9 @@
|
||||
static bool output_rounding_fix = true;
|
||||
module_param_named(output_rounding_fix, output_rounding_fix, bool, 0400);
|
||||
|
||||
static bool disable_vrr_memclk_switch = false;
|
||||
module_param_named(disable_vrr_memclk_switch, disable_vrr_memclk_switch, bool, 0400);
|
||||
|
||||
/* These parameters are used for fault injection tests. Normally the defaults
|
||||
* should be used. */
|
||||
MODULE_PARM_DESC(fail_malloc, "Fail the Nth call to nvkms_alloc");
|
||||
@@ -91,6 +94,11 @@ NvBool nvkms_output_rounding_fix(void)
|
||||
return output_rounding_fix;
|
||||
}
|
||||
|
||||
NvBool nvkms_disable_vrr_memclk_switch(void)
|
||||
{
|
||||
return disable_vrr_memclk_switch;
|
||||
}
|
||||
|
||||
#define NVKMS_SYNCPT_STUBS_NEEDED
|
||||
|
||||
/*************************************************************************
|
||||
|
||||
@@ -40,11 +40,30 @@
|
||||
#include "nv_stdarg.h"
|
||||
|
||||
enum NvKmsSyncPtOp {
|
||||
/*
|
||||
* Call into Tegra's kernel nvhost driver, and allocate a syncpoint that can
|
||||
* be exclusively used by the caller. Internally, this operation will call
|
||||
* get() to set the initial refcount of the syncpoint to 1.
|
||||
*/
|
||||
NVKMS_SYNCPT_OP_ALLOC,
|
||||
NVKMS_SYNCPT_OP_GET,
|
||||
/*
|
||||
* Decrease the refcount of an already allocated syncpoint. Once the
|
||||
* refcount drops to 0, the syncpoint will be returned to the free pool that
|
||||
* nvhost manages, so PUT can also be used to balance out an ALLOC.
|
||||
*/
|
||||
NVKMS_SYNCPT_OP_PUT,
|
||||
/*
|
||||
* Extract syncpt id and thresh from the sync-file file descriptor
|
||||
*/
|
||||
NVKMS_SYNCPT_OP_FD_TO_ID_AND_THRESH,
|
||||
/*
|
||||
* Create dma-fence from syncpt id and thresh value and create sync_file
|
||||
* file descriptor for the dma-fence handle created.
|
||||
*/
|
||||
NVKMS_SYNCPT_OP_ID_AND_THRESH_TO_FD,
|
||||
/*
|
||||
* read syncpt minimum value of given syncpt
|
||||
*/
|
||||
NVKMS_SYNCPT_OP_READ_MINVAL,
|
||||
};
|
||||
|
||||
@@ -79,6 +98,8 @@ typedef struct {
|
||||
|
||||
NvBool nvkms_output_rounding_fix(void);
|
||||
|
||||
NvBool nvkms_disable_vrr_memclk_switch(void);
|
||||
|
||||
void nvkms_call_rm (void *ops);
|
||||
void* nvkms_alloc (size_t size,
|
||||
NvBool zero);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2011-2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2011-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -94,11 +94,10 @@ struct nvidia_p2p_params {
|
||||
} nvidia_p2p_params_t;
|
||||
|
||||
/*
|
||||
* Capability flag for users to detect
|
||||
* Macro for users to detect
|
||||
* driver support for persistent pages.
|
||||
*/
|
||||
extern int nvidia_p2p_cap_persistent_pages;
|
||||
#define NVIDIA_P2P_CAP_PERSISTENT_PAGES
|
||||
#define NVIDIA_P2P_CAP_GET_PAGES_PERSISTENT_API
|
||||
|
||||
/*
|
||||
* This API is not supported.
|
||||
@@ -173,11 +172,6 @@ struct nvidia_p2p_page_table {
|
||||
* A pointer to the function to be invoked when the pages
|
||||
* underlying the virtual address range are freed
|
||||
* implicitly.
|
||||
* If NULL, persistent pages will be returned.
|
||||
* This means the pages underlying the range of GPU virtual memory
|
||||
* will persist until explicitly freed by nvidia_p2p_put_pages().
|
||||
* Persistent GPU memory mappings are not supported on PowerPC,
|
||||
* MIG-enabled devices and vGPU.
|
||||
* @param[in] data
|
||||
* A non-NULL opaque pointer to private data to be passed to the
|
||||
* callback function.
|
||||
@@ -190,12 +184,48 @@ struct nvidia_p2p_page_table {
|
||||
* insufficient resources were available to complete the operation.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_get_pages(uint64_t p2p_token, uint32_t va_space,
|
||||
uint64_t virtual_address,
|
||||
int nvidia_p2p_get_pages( uint64_t p2p_token, uint32_t va_space,
|
||||
uint64_t virtual_address, uint64_t length,
|
||||
struct nvidia_p2p_page_table **page_table,
|
||||
void (*free_callback)(void *data), void *data);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Pin and make the pages underlying a range of GPU virtual memory
|
||||
* accessible to a third-party device. The pages will persist until
|
||||
* explicitly freed by nvidia_p2p_put_pages_persistent().
|
||||
*
|
||||
* Persistent GPU memory mappings are not supported on PowerPC,
|
||||
* MIG-enabled devices and vGPU.
|
||||
*
|
||||
* This API only supports pinned, GPU-resident memory, such as that provided
|
||||
* by cudaMalloc().
|
||||
*
|
||||
* This API may sleep.
|
||||
*
|
||||
* @param[in] virtual_address
|
||||
* The start address in the specified virtual address space.
|
||||
* Address must be aligned to the 64KB boundary.
|
||||
* @param[in] length
|
||||
* The length of the requested P2P mapping.
|
||||
* Length must be a multiple of 64KB.
|
||||
* @param[out] page_table
|
||||
* A pointer to an array of structures with P2P PTEs.
|
||||
* @param[in] flags
|
||||
* Must be set to zero for now.
|
||||
*
|
||||
* @return
|
||||
* 0 upon successful completion.
|
||||
* -EINVAL if an invalid argument was supplied.
|
||||
* -ENOTSUPP if the requested operation is not supported.
|
||||
* -ENOMEM if the driver failed to allocate memory or if
|
||||
* insufficient resources were available to complete the operation.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_get_pages_persistent(uint64_t virtual_address,
|
||||
uint64_t length,
|
||||
struct nvidia_p2p_page_table **page_table,
|
||||
void (*free_callback)(void *data),
|
||||
void *data);
|
||||
uint32_t flags);
|
||||
|
||||
#define NVIDIA_P2P_DMA_MAPPING_VERSION 0x00020003
|
||||
|
||||
@@ -268,6 +298,8 @@ int nvidia_p2p_dma_unmap_pages(struct pci_dev *peer,
|
||||
* Release a set of pages previously made accessible to
|
||||
* a third-party device.
|
||||
*
|
||||
* This API may sleep.
|
||||
*
|
||||
* @param[in] p2p_token
|
||||
* A token that uniquely identifies the P2P mapping.
|
||||
* @param[in] va_space
|
||||
@@ -282,10 +314,33 @@ int nvidia_p2p_dma_unmap_pages(struct pci_dev *peer,
|
||||
* -EINVAL if an invalid argument was supplied.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_put_pages(uint64_t p2p_token, uint32_t va_space,
|
||||
uint64_t virtual_address,
|
||||
int nvidia_p2p_put_pages(uint64_t p2p_token,
|
||||
uint32_t va_space, uint64_t virtual_address,
|
||||
struct nvidia_p2p_page_table *page_table);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Release a set of persistent pages previously made accessible to
|
||||
* a third-party device.
|
||||
*
|
||||
* This API may sleep.
|
||||
*
|
||||
* @param[in] virtual_address
|
||||
* The start address in the specified virtual address space.
|
||||
* @param[in] page_table
|
||||
* A pointer to the array of structures with P2P PTEs.
|
||||
* @param[in] flags
|
||||
* Must be set to zero for now.
|
||||
*
|
||||
* @return
|
||||
* 0 upon successful completion.
|
||||
* -EINVAL if an invalid argument was supplied.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_put_pages_persistent(uint64_t virtual_address,
|
||||
struct nvidia_p2p_page_table *page_table,
|
||||
uint32_t flags);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Free a third-party P2P page table. (This function is a no-op.)
|
||||
|
||||
@@ -284,8 +284,9 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
|
||||
static void nv_mem_put_pages_common(int nc,
|
||||
struct sg_table *sg_head,
|
||||
void *context)
|
||||
{
|
||||
int ret = 0;
|
||||
struct nv_mem_context *nv_mem_context =
|
||||
@@ -302,8 +303,13 @@ static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
|
||||
if (nv_mem_context->callback_task == current)
|
||||
return;
|
||||
|
||||
ret = nvidia_p2p_put_pages(0, 0, nv_mem_context->page_virt_start,
|
||||
nv_mem_context->page_table);
|
||||
if (nc) {
|
||||
ret = nvidia_p2p_put_pages_persistent(nv_mem_context->page_virt_start,
|
||||
nv_mem_context->page_table, 0);
|
||||
} else {
|
||||
ret = nvidia_p2p_put_pages(0, 0, nv_mem_context->page_virt_start,
|
||||
nv_mem_context->page_table);
|
||||
}
|
||||
|
||||
#ifdef _DEBUG_ONLY_
|
||||
/* Here we expect an error in real life cases that should be ignored - not printed.
|
||||
@@ -318,6 +324,16 @@ static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
|
||||
return;
|
||||
}
|
||||
|
||||
static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
|
||||
{
|
||||
nv_mem_put_pages_common(0, sg_head, context);
|
||||
}
|
||||
|
||||
static void nv_mem_put_pages_nc(struct sg_table *sg_head, void *context)
|
||||
{
|
||||
nv_mem_put_pages_common(1, sg_head, context);
|
||||
}
|
||||
|
||||
static void nv_mem_release(void *context)
|
||||
{
|
||||
struct nv_mem_context *nv_mem_context =
|
||||
@@ -396,8 +412,9 @@ static int nv_mem_get_pages_nc(unsigned long addr,
|
||||
nv_mem_context->core_context = core_context;
|
||||
nv_mem_context->page_size = GPU_PAGE_SIZE;
|
||||
|
||||
ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
|
||||
&nv_mem_context->page_table, NULL, NULL);
|
||||
ret = nvidia_p2p_get_pages_persistent(nv_mem_context->page_virt_start,
|
||||
nv_mem_context->mapped_size,
|
||||
&nv_mem_context->page_table, 0);
|
||||
if (ret < 0) {
|
||||
peer_err("error %d while calling nvidia_p2p_get_pages() with NULL callback\n", ret);
|
||||
return ret;
|
||||
@@ -407,13 +424,13 @@ static int nv_mem_get_pages_nc(unsigned long addr,
|
||||
}
|
||||
|
||||
static struct peer_memory_client nv_mem_client_nc = {
|
||||
.acquire = nv_mem_acquire,
|
||||
.get_pages = nv_mem_get_pages_nc,
|
||||
.dma_map = nv_dma_map,
|
||||
.dma_unmap = nv_dma_unmap,
|
||||
.put_pages = nv_mem_put_pages,
|
||||
.get_page_size = nv_mem_get_page_size,
|
||||
.release = nv_mem_release,
|
||||
.acquire = nv_mem_acquire,
|
||||
.get_pages = nv_mem_get_pages_nc,
|
||||
.dma_map = nv_dma_map,
|
||||
.dma_unmap = nv_dma_unmap,
|
||||
.put_pages = nv_mem_put_pages_nc,
|
||||
.get_page_size = nv_mem_get_page_size,
|
||||
.release = nv_mem_release,
|
||||
};
|
||||
|
||||
#endif /* NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT */
|
||||
@@ -477,9 +494,6 @@ static int __init nv_mem_client_init(void)
|
||||
}
|
||||
|
||||
// The nc client enables support for persistent pages.
|
||||
// Thanks to this check, nvidia-peermem requires the new symbol from nvidia.ko, which
|
||||
// prevents users to unintentionally load this module with unsupported nvidia.ko.
|
||||
BUG_ON(!nvidia_p2p_cap_persistent_pages);
|
||||
strcpy(nv_mem_client_nc.name, DRV_NAME "_nc");
|
||||
strcpy(nv_mem_client_nc.version, DRV_VERSION);
|
||||
reg_handle_nc = ib_register_peer_memory_client(&nv_mem_client_nc, NULL);
|
||||
|
||||
@@ -1,29 +1,33 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013 NVIDIA Corporation
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef __cla06fsubch_h__
|
||||
#define __cla06fsubch_h__
|
||||
#ifndef _cla06fsubch_h_
|
||||
#define _cla06fsubch_h_
|
||||
|
||||
#define NVA06F_SUBCHANNEL_2D 3
|
||||
#define NVA06F_SUBCHANNEL_3D 0
|
||||
#define NVA06F_SUBCHANNEL_COMPUTE 1
|
||||
#define NVA06F_SUBCHANNEL_COPY_ENGINE 4
|
||||
#define NVA06F_SUBCHANNEL_I2M 2
|
||||
|
||||
#endif // {__cla06fsubch_h__}
|
||||
#endif // _cla06fsubch_h_
|
||||
|
||||
@@ -1,25 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _cla16f_h_
|
||||
#define _cla16f_h_
|
||||
@@ -30,9 +30,48 @@ extern "C" {
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#define KEPLER_CHANNEL_GPFIFO_B (0x0000A16F)
|
||||
/* class KEPLER_CHANNEL_GPFIFO */
|
||||
/*
|
||||
* Documentation for KEPLER_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
|
||||
* chapter "User Control Registers". It is documented as device NV_UDMA.
|
||||
* The GPFIFO format itself is also documented in dev_pbdma.ref,
|
||||
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
|
||||
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
|
||||
*
|
||||
*/
|
||||
#define KEPLER_CHANNEL_GPFIFO_B (0x0000A16F)
|
||||
|
||||
/* pio method data structure */
|
||||
typedef volatile struct _cla16f_tag0 {
|
||||
NvV32 Reserved00[0x7c0];
|
||||
} NvA16FTypedef, KEPLER_ChannelGPFifoB;
|
||||
#define NVA16F_TYPEDEF KEPLER_CHANNELChannelGPFifo
|
||||
/* dma flow control data structure */
|
||||
typedef volatile struct _cla16f_tag1 {
|
||||
NvU32 Ignored00[0x010]; /* 0000-003f*/
|
||||
NvU32 Put; /* put offset, read/write 0040-0043*/
|
||||
NvU32 Get; /* get offset, read only 0044-0047*/
|
||||
NvU32 Reference; /* reference value, read only 0048-004b*/
|
||||
NvU32 PutHi; /* high order put offset bits 004c-004f*/
|
||||
NvU32 Ignored01[0x002]; /* 0050-0057*/
|
||||
NvU32 TopLevelGet; /* top level get offset, read only 0058-005b*/
|
||||
NvU32 TopLevelGetHi; /* high order top level get bits 005c-005f*/
|
||||
NvU32 GetHi; /* high order get offset bits 0060-0063*/
|
||||
NvU32 Ignored02[0x007]; /* 0064-007f*/
|
||||
NvU32 Ignored03; /* used to be engine yield 0080-0083*/
|
||||
NvU32 Ignored04[0x001]; /* 0084-0087*/
|
||||
NvU32 GPGet; /* GP FIFO get offset, read only 0088-008b*/
|
||||
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
|
||||
NvU32 Ignored05[0x5c];
|
||||
} NvA16FControl, KeplerBControlGPFifo;
|
||||
/* fields and values */
|
||||
#define NVA16F_NUMBER_OF_SUBCHANNELS (8)
|
||||
#define NVA16F_SET_OBJECT (0x00000000)
|
||||
#define NVA16F_SET_OBJECT_NVCLASS 15:0
|
||||
#define NVA16F_SET_OBJECT_ENGINE 20:16
|
||||
#define NVA16F_SET_OBJECT_ENGINE_SW 0x0000001f
|
||||
#define NVA16F_ILLEGAL (0x00000004)
|
||||
#define NVA16F_ILLEGAL_HANDLE 31:0
|
||||
#define NVA16F_NOP (0x00000008)
|
||||
#define NVA16F_NOP_HANDLE 31:0
|
||||
#define NVA16F_SEMAPHOREA (0x00000010)
|
||||
@@ -100,6 +139,12 @@ extern "C" {
|
||||
#define NVA16F_SET_REFERENCE_COUNT 31:0
|
||||
#define NVA16F_WFI (0x00000078)
|
||||
#define NVA16F_WFI_HANDLE 31:0
|
||||
#define NVA16F_CRC_CHECK (0x0000007c)
|
||||
#define NVA16F_CRC_CHECK_VALUE 31:0
|
||||
#define NVA16F_YIELD (0x00000080)
|
||||
#define NVA16F_YIELD_OP 1:0
|
||||
#define NVA16F_YIELD_OP_NOP 0x00000000
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVA16F_GP_ENTRY__SIZE 8
|
||||
@@ -126,13 +171,28 @@ extern "C" {
|
||||
#define NVA16F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
|
||||
/* dma method formats */
|
||||
#define NVA16F_DMA_METHOD_ADDRESS_OLD 12:2
|
||||
#define NVA16F_DMA_METHOD_ADDRESS 11:0
|
||||
#define NVA16F_DMA_SUBDEVICE_MASK 15:4
|
||||
#define NVA16F_DMA_METHOD_SUBCHANNEL 15:13
|
||||
#define NVA16F_DMA_TERT_OP 17:16
|
||||
#define NVA16F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
|
||||
#define NVA16F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
|
||||
#define NVA16F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
|
||||
#define NVA16F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
|
||||
#define NVA16F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
|
||||
#define NVA16F_DMA_METHOD_COUNT_OLD 28:18
|
||||
#define NVA16F_DMA_METHOD_COUNT 28:16
|
||||
#define NVA16F_DMA_IMMD_DATA 28:16
|
||||
#define NVA16F_DMA_SEC_OP 31:29
|
||||
#define NVA16F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
|
||||
#define NVA16F_DMA_SEC_OP_INC_METHOD (0x00000001)
|
||||
#define NVA16F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
|
||||
#define NVA16F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
|
||||
|
||||
#define NVA16F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
|
||||
#define NVA16F_DMA_SEC_OP_ONE_INC (0x00000005)
|
||||
#define NVA16F_DMA_SEC_OP_RESERVED6 (0x00000006)
|
||||
#define NVA16F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
|
||||
/* dma incrementing method format */
|
||||
#define NVA16F_DMA_INCR_ADDRESS 11:0
|
||||
#define NVA16F_DMA_INCR_SUBCHANNEL 15:13
|
||||
@@ -140,7 +200,6 @@ extern "C" {
|
||||
#define NVA16F_DMA_INCR_OPCODE 31:29
|
||||
#define NVA16F_DMA_INCR_OPCODE_VALUE (0x00000001)
|
||||
#define NVA16F_DMA_INCR_DATA 31:0
|
||||
|
||||
/* dma non-incrementing method format */
|
||||
#define NVA16F_DMA_NONINCR_ADDRESS 11:0
|
||||
#define NVA16F_DMA_NONINCR_SUBCHANNEL 15:13
|
||||
@@ -148,13 +207,45 @@ extern "C" {
|
||||
#define NVA16F_DMA_NONINCR_OPCODE 31:29
|
||||
#define NVA16F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
|
||||
#define NVA16F_DMA_NONINCR_DATA 31:0
|
||||
|
||||
/* dma increment-once method format */
|
||||
#define NVA16F_DMA_ONEINCR_ADDRESS 11:0
|
||||
#define NVA16F_DMA_ONEINCR_SUBCHANNEL 15:13
|
||||
#define NVA16F_DMA_ONEINCR_COUNT 28:16
|
||||
#define NVA16F_DMA_ONEINCR_OPCODE 31:29
|
||||
#define NVA16F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
|
||||
#define NVA16F_DMA_ONEINCR_DATA 31:0
|
||||
/* dma no-operation format */
|
||||
#define NVA16F_DMA_NOP (0x00000000)
|
||||
/* dma immediate-data format */
|
||||
#define NVA16F_DMA_IMMD_ADDRESS 11:0
|
||||
#define NVA16F_DMA_IMMD_SUBCHANNEL 15:13
|
||||
#define NVA16F_DMA_IMMD_DATA 28:16
|
||||
#define NVA16F_DMA_IMMD_OPCODE 31:29
|
||||
#define NVA16F_DMA_IMMD_OPCODE_VALUE (0x00000004)
|
||||
/* dma set sub-device mask format */
|
||||
#define NVA16F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVA16F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVA16F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
|
||||
/* dma store sub-device mask format */
|
||||
#define NVA16F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVA16F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVA16F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
|
||||
/* dma use sub-device mask format */
|
||||
#define NVA16F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVA16F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
|
||||
/* dma end-segment format */
|
||||
#define NVA16F_DMA_ENDSEG_OPCODE 31:29
|
||||
#define NVA16F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
|
||||
/* dma legacy incrementing/non-incrementing formats */
|
||||
#define NVA16F_DMA_ADDRESS 12:2
|
||||
#define NVA16F_DMA_SUBCH 15:13
|
||||
#define NVA16F_DMA_OPCODE3 17:16
|
||||
#define NVA16F_DMA_OPCODE3_NONE (0x00000000)
|
||||
#define NVA16F_DMA_COUNT 28:18
|
||||
#define NVA16F_DMA_OPCODE 31:29
|
||||
#define NVA16F_DMA_OPCODE_METHOD (0x00000000)
|
||||
#define NVA16F_DMA_OPCODE_NONINC_METHOD (0x00000002)
|
||||
#define NVA16F_DMA_DATA 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
|
||||
@@ -1,24 +1,26 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2014 NVidia Corporation
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
#ifndef _clb069_h_
|
||||
#define _clb069_h_
|
||||
|
||||
|
||||
@@ -1,28 +1,28 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2014 NVIDIA Corporation
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _clB06f_h_
|
||||
#define _clB06f_h_
|
||||
#ifndef _clb06f_h_
|
||||
#define _clb06f_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -30,10 +30,46 @@ extern "C" {
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
/* class MAXWELL_CHANNEL_GPFIFO */
|
||||
/*
|
||||
* Documentation for MAXWELL_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
|
||||
* chapter "User Control Registers". It is documented as device NV_UDMA.
|
||||
* The GPFIFO format itself is also documented in dev_pbdma.ref,
|
||||
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
|
||||
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
|
||||
*
|
||||
*/
|
||||
#define MAXWELL_CHANNEL_GPFIFO_A (0x0000B06F)
|
||||
|
||||
/* class MAXWELL_CHANNEL_GPFIFO */
|
||||
#define NVB06F_TYPEDEF MAXWELL_CHANNELChannelGPFifoA
|
||||
|
||||
/* dma flow control data structure */
|
||||
typedef volatile struct _clb06f_tag0 {
|
||||
NvU32 Ignored00[0x010]; /* 0000-003f*/
|
||||
NvU32 Put; /* put offset, read/write 0040-0043*/
|
||||
NvU32 Get; /* get offset, read only 0044-0047*/
|
||||
NvU32 Reference; /* reference value, read only 0048-004b*/
|
||||
NvU32 PutHi; /* high order put offset bits 004c-004f*/
|
||||
NvU32 Ignored01[0x002]; /* 0050-0057*/
|
||||
NvU32 TopLevelGet; /* top level get offset, read only 0058-005b*/
|
||||
NvU32 TopLevelGetHi; /* high order top level get bits 005c-005f*/
|
||||
NvU32 GetHi; /* high order get offset bits 0060-0063*/
|
||||
NvU32 Ignored02[0x007]; /* 0064-007f*/
|
||||
NvU32 Ignored03; /* used to be engine yield 0080-0083*/
|
||||
NvU32 Ignored04[0x001]; /* 0084-0087*/
|
||||
NvU32 GPGet; /* GP FIFO get offset, read only 0088-008b*/
|
||||
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
|
||||
NvU32 Ignored05[0x5c];
|
||||
} Nvb06FControl, MaxwellAControlGPFifo;
|
||||
|
||||
/* fields and values */
|
||||
#define NVB06F_NUMBER_OF_SUBCHANNELS (8)
|
||||
#define NVB06F_SET_OBJECT (0x00000000)
|
||||
#define NVB06F_SET_OBJECT_NVCLASS 15:0
|
||||
#define NVB06F_SET_OBJECT_ENGINE 20:16
|
||||
#define NVB06F_SET_OBJECT_ENGINE_SW 0x0000001f
|
||||
#define NVB06F_ILLEGAL (0x00000004)
|
||||
#define NVB06F_ILLEGAL_HANDLE 31:0
|
||||
#define NVB06F_NOP (0x00000008)
|
||||
#define NVB06F_NOP_HANDLE 31:0
|
||||
#define NVB06F_SEMAPHOREA (0x00000010)
|
||||
@@ -47,6 +83,8 @@ extern "C" {
|
||||
#define NVB06F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
|
||||
#define NVB06F_SEMAPHORED_OPERATION_RELEASE 0x00000002
|
||||
#define NVB06F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
|
||||
#define NVB06F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
|
||||
#define NVB06F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
|
||||
#define NVB06F_SEMAPHORED_ACQUIRE_SWITCH 12:12
|
||||
#define NVB06F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
|
||||
#define NVB06F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
|
||||
@@ -56,8 +94,22 @@ extern "C" {
|
||||
#define NVB06F_SEMAPHORED_RELEASE_SIZE 24:24
|
||||
#define NVB06F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
|
||||
#define NVB06F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
|
||||
|
||||
#define NVB06F_SEMAPHORED_REDUCTION 30:27
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_MIN 0x00000000
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_MAX 0x00000001
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_XOR 0x00000002
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_AND 0x00000003
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_OR 0x00000004
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_ADD 0x00000005
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_INC 0x00000006
|
||||
#define NVB06F_SEMAPHORED_REDUCTION_DEC 0x00000007
|
||||
#define NVB06F_SEMAPHORED_FORMAT 31:31
|
||||
#define NVB06F_SEMAPHORED_FORMAT_SIGNED 0x00000000
|
||||
#define NVB06F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
|
||||
#define NVB06F_NON_STALL_INTERRUPT (0x00000020)
|
||||
#define NVB06F_NON_STALL_INTERRUPT_HANDLE 31:0
|
||||
#define NVB06F_FB_FLUSH (0x00000024)
|
||||
#define NVB06F_FB_FLUSH_HANDLE 31:0
|
||||
// NOTE - MEM_OP_A and MEM_OP_B have been removed for gm20x to make room for
|
||||
// possible future MEM_OP features. MEM_OP_C/D have identical functionality
|
||||
// to the previous MEM_OP_A/B methods.
|
||||
@@ -84,10 +136,27 @@ extern "C" {
|
||||
#define NVB06F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
|
||||
#define NVB06F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
|
||||
#define NVB06F_MEM_OP_D_TLB_INVALIDATE_ADDR_HI 7:0
|
||||
#define NVB06F_SET_REFERENCE (0x00000050)
|
||||
#define NVB06F_SET_REFERENCE_COUNT 31:0
|
||||
#define NVB06F_WFI (0x00000078)
|
||||
#define NVB06F_WFI_SCOPE 0:0
|
||||
#define NVB06F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
|
||||
#define NVB06F_WFI_SCOPE_ALL 0x00000001
|
||||
#define NVB06F_CRC_CHECK (0x0000007c)
|
||||
#define NVB06F_CRC_CHECK_VALUE 31:0
|
||||
#define NVB06F_YIELD (0x00000080)
|
||||
#define NVB06F_YIELD_OP 1:0
|
||||
#define NVB06F_YIELD_OP_NOP 0x00000000
|
||||
#define NVB06F_YIELD_OP_PBDMA_TIMESLICE 0x00000001
|
||||
#define NVB06F_YIELD_OP_RUNLIST_TIMESLICE 0x00000002
|
||||
#define NVB06F_YIELD_OP_TSG 0x00000003
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVB06F_GP_ENTRY__SIZE 8
|
||||
#define NVB06F_GP_ENTRY0_FETCH 0:0
|
||||
#define NVB06F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
|
||||
#define NVB06F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
|
||||
#define NVB06F_GP_ENTRY0_GET 31:2
|
||||
#define NVB06F_GP_ENTRY0_OPERAND 31:0
|
||||
#define NVB06F_GP_ENTRY1_GET_HI 7:0
|
||||
@@ -98,11 +167,38 @@ extern "C" {
|
||||
#define NVB06F_GP_ENTRY1_LEVEL_MAIN 0x00000000
|
||||
#define NVB06F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
|
||||
#define NVB06F_GP_ENTRY1_LENGTH 30:10
|
||||
#define NVB06F_GP_ENTRY1_SYNC 31:31
|
||||
#define NVB06F_GP_ENTRY1_SYNC_PROCEED 0x00000000
|
||||
#define NVB06F_GP_ENTRY1_SYNC_WAIT 0x00000001
|
||||
#define NVB06F_GP_ENTRY1_OPCODE 7:0
|
||||
#define NVB06F_GP_ENTRY1_OPCODE_NOP 0x00000000
|
||||
#define NVB06F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
|
||||
#define NVB06F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
|
||||
#define NVB06F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
|
||||
/* dma method formats */
|
||||
#define NVB06F_DMA_METHOD_ADDRESS_OLD 12:2
|
||||
#define NVB06F_DMA_METHOD_ADDRESS 11:0
|
||||
#define NVB06F_DMA_SUBDEVICE_MASK 15:4
|
||||
#define NVB06F_DMA_METHOD_SUBCHANNEL 15:13
|
||||
#define NVB06F_DMA_TERT_OP 17:16
|
||||
#define NVB06F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
|
||||
#define NVB06F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
|
||||
#define NVB06F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
|
||||
#define NVB06F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
|
||||
#define NVB06F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
|
||||
#define NVB06F_DMA_METHOD_COUNT_OLD 28:18
|
||||
#define NVB06F_DMA_METHOD_COUNT 28:16
|
||||
#define NVB06F_DMA_IMMD_DATA 28:16
|
||||
#define NVB06F_DMA_SEC_OP 31:29
|
||||
#define NVB06F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
|
||||
#define NVB06F_DMA_SEC_OP_INC_METHOD (0x00000001)
|
||||
#define NVB06F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
|
||||
#define NVB06F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
|
||||
#define NVB06F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
|
||||
#define NVB06F_DMA_SEC_OP_ONE_INC (0x00000005)
|
||||
#define NVB06F_DMA_SEC_OP_RESERVED6 (0x00000006)
|
||||
#define NVB06F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
|
||||
/* dma incrementing method format */
|
||||
#define NVB06F_DMA_INCR_ADDRESS 11:0
|
||||
#define NVB06F_DMA_INCR_SUBCHANNEL 15:13
|
||||
@@ -132,9 +228,33 @@ extern "C" {
|
||||
#define NVB06F_DMA_IMMD_DATA 28:16
|
||||
#define NVB06F_DMA_IMMD_OPCODE 31:29
|
||||
#define NVB06F_DMA_IMMD_OPCODE_VALUE (0x00000004)
|
||||
/* dma set sub-device mask format */
|
||||
#define NVB06F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVB06F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVB06F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
|
||||
/* dma store sub-device mask format */
|
||||
#define NVB06F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVB06F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVB06F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
|
||||
/* dma use sub-device mask format */
|
||||
#define NVB06F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVB06F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
|
||||
/* dma end-segment format */
|
||||
#define NVB06F_DMA_ENDSEG_OPCODE 31:29
|
||||
#define NVB06F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
|
||||
/* dma legacy incrementing/non-incrementing formats */
|
||||
#define NVB06F_DMA_ADDRESS 12:2
|
||||
#define NVB06F_DMA_SUBCH 15:13
|
||||
#define NVB06F_DMA_OPCODE3 17:16
|
||||
#define NVB06F_DMA_OPCODE3_NONE (0x00000000)
|
||||
#define NVB06F_DMA_COUNT 28:18
|
||||
#define NVB06F_DMA_OPCODE 31:29
|
||||
#define NVB06F_DMA_OPCODE_METHOD (0x00000000)
|
||||
#define NVB06F_DMA_OPCODE_NONINC_METHOD (0x00000002)
|
||||
#define NVB06F_DMA_DATA 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* _clB06F_h_ */
|
||||
#endif /* _clb06f_h_ */
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2014 NVIDIA Corporation
|
||||
Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
@@ -32,6 +32,10 @@ extern "C" {
|
||||
|
||||
#define MAXWELL_DMA_COPY_A (0x0000B0B5)
|
||||
|
||||
#define NVB0B5_NOP (0x00000100)
|
||||
#define NVB0B5_NOP_PARAMETER 31:0
|
||||
#define NVB0B5_PM_TRIGGER (0x00000140)
|
||||
#define NVB0B5_PM_TRIGGER_V 31:0
|
||||
#define NVB0B5_SET_SEMAPHORE_A (0x00000240)
|
||||
#define NVB0B5_SET_SEMAPHORE_A_UPPER 7:0
|
||||
#define NVB0B5_SET_SEMAPHORE_B (0x00000244)
|
||||
@@ -183,9 +187,75 @@ extern "C" {
|
||||
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
|
||||
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
|
||||
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
|
||||
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE (0x0000070C)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_WIDTH_QUARTER_GOB (0x0000000E)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_TESLA_4 (0x00000000)
|
||||
#define NVB0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVB0B5_SET_DST_WIDTH (0x00000710)
|
||||
#define NVB0B5_SET_DST_WIDTH_V 31:0
|
||||
#define NVB0B5_SET_DST_HEIGHT (0x00000714)
|
||||
#define NVB0B5_SET_DST_HEIGHT_V 31:0
|
||||
#define NVB0B5_SET_DST_DEPTH (0x00000718)
|
||||
#define NVB0B5_SET_DST_DEPTH_V 31:0
|
||||
#define NVB0B5_SET_DST_LAYER (0x0000071C)
|
||||
#define NVB0B5_SET_DST_LAYER_V 31:0
|
||||
#define NVB0B5_SET_DST_ORIGIN (0x00000720)
|
||||
#define NVB0B5_SET_DST_ORIGIN_X 15:0
|
||||
#define NVB0B5_SET_DST_ORIGIN_Y 31:16
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE (0x00000728)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_WIDTH_QUARTER_GOB (0x0000000E)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_TESLA_4 (0x00000000)
|
||||
#define NVB0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVB0B5_SET_SRC_WIDTH (0x0000072C)
|
||||
#define NVB0B5_SET_SRC_WIDTH_V 31:0
|
||||
#define NVB0B5_SET_SRC_HEIGHT (0x00000730)
|
||||
#define NVB0B5_SET_SRC_HEIGHT_V 31:0
|
||||
#define NVB0B5_SET_SRC_DEPTH (0x00000734)
|
||||
#define NVB0B5_SET_SRC_DEPTH_V 31:0
|
||||
#define NVB0B5_SET_SRC_LAYER (0x00000738)
|
||||
#define NVB0B5_SET_SRC_LAYER_V 31:0
|
||||
#define NVB0B5_SET_SRC_ORIGIN (0x0000073C)
|
||||
#define NVB0B5_SET_SRC_ORIGIN_X 15:0
|
||||
#define NVB0B5_SET_SRC_ORIGIN_Y 31:16
|
||||
#define NVB0B5_PM_TRIGGER_END (0x00001114)
|
||||
#define NVB0B5_PM_TRIGGER_END_V 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
#endif // _clb0b5_h
|
||||
|
||||
|
||||
@@ -1,25 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2014 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _clc06f_h_
|
||||
#define _clc06f_h_
|
||||
@@ -30,10 +30,47 @@ extern "C" {
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
/* class PASCAL_CHANNEL_GPFIFO */
|
||||
/*
|
||||
* Documentation for PASCAL_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
|
||||
* chapter "User Control Registers". It is documented as device NV_UDMA.
|
||||
* The GPFIFO format itself is also documented in dev_pbdma.ref,
|
||||
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
|
||||
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
|
||||
*
|
||||
* Note there is no .mfs file for this class.
|
||||
*/
|
||||
#define PASCAL_CHANNEL_GPFIFO_A (0x0000C06F)
|
||||
|
||||
/* class PASCAL_CHANNEL_GPFIFO_A */
|
||||
#define NVC06F_TYPEDEF PASCAL_CHANNELChannelGPFifoA
|
||||
|
||||
/* dma flow control data structure */
|
||||
typedef volatile struct Nvc06fControl_struct {
|
||||
NvU32 Ignored00[0x010]; /* 0000-003f*/
|
||||
NvU32 Put; /* put offset, read/write 0040-0043*/
|
||||
NvU32 Get; /* get offset, read only 0044-0047*/
|
||||
NvU32 Reference; /* reference value, read only 0048-004b*/
|
||||
NvU32 PutHi; /* high order put offset bits 004c-004f*/
|
||||
NvU32 Ignored01[0x002]; /* 0050-0057*/
|
||||
NvU32 TopLevelGet; /* top level get offset, read only 0058-005b*/
|
||||
NvU32 TopLevelGetHi; /* high order top level get bits 005c-005f*/
|
||||
NvU32 GetHi; /* high order get offset bits 0060-0063*/
|
||||
NvU32 Ignored02[0x007]; /* 0064-007f*/
|
||||
NvU32 Ignored03; /* used to be engine yield 0080-0083*/
|
||||
NvU32 Ignored04[0x001]; /* 0084-0087*/
|
||||
NvU32 GPGet; /* GP FIFO get offset, read only 0088-008b*/
|
||||
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
|
||||
NvU32 Ignored05[0x5c];
|
||||
} Nvc06fControl, PascalAControlGPFifo;
|
||||
|
||||
/* fields and values */
|
||||
#define NVC06F_NUMBER_OF_SUBCHANNELS (8)
|
||||
#define NVC06F_SET_OBJECT (0x00000000)
|
||||
#define NVC06F_SET_OBJECT_NVCLASS 15:0
|
||||
#define NVC06F_SET_OBJECT_ENGINE 20:16
|
||||
#define NVC06F_SET_OBJECT_ENGINE_SW 0x0000001f
|
||||
#define NVC06F_ILLEGAL (0x00000004)
|
||||
#define NVC06F_ILLEGAL_HANDLE 31:0
|
||||
#define NVC06F_NOP (0x00000008)
|
||||
#define NVC06F_NOP_HANDLE 31:0
|
||||
#define NVC06F_SEMAPHOREA (0x00000010)
|
||||
@@ -47,54 +84,33 @@ extern "C" {
|
||||
#define NVC06F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
|
||||
#define NVC06F_SEMAPHORED_OPERATION_RELEASE 0x00000002
|
||||
#define NVC06F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
|
||||
#define NVC06F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
|
||||
#define NVC06F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
|
||||
#define NVC06F_SEMAPHORED_ACQUIRE_SWITCH 12:12
|
||||
#define NVC06F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
|
||||
#define NVC06F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVC06F_GP_ENTRY__SIZE 8
|
||||
#define NVC06F_GP_ENTRY0_GET 31:2
|
||||
#define NVC06F_GP_ENTRY0_OPERAND 31:0
|
||||
#define NVC06F_GP_ENTRY1_GET_HI 7:0
|
||||
#define NVC06F_GP_ENTRY1_PRIV 8:8
|
||||
#define NVC06F_GP_ENTRY1_PRIV_USER 0x00000000
|
||||
#define NVC06F_GP_ENTRY1_PRIV_KERNEL 0x00000001
|
||||
#define NVC06F_GP_ENTRY1_LEVEL 9:9
|
||||
#define NVC06F_GP_ENTRY1_LEVEL_MAIN 0x00000000
|
||||
#define NVC06F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
|
||||
#define NVC06F_GP_ENTRY1_LENGTH 30:10
|
||||
|
||||
/* dma incrementing method format */
|
||||
#define NVC06F_DMA_INCR_ADDRESS 11:0
|
||||
#define NVC06F_DMA_INCR_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_INCR_COUNT 28:16
|
||||
#define NVC06F_DMA_INCR_OPCODE 31:29
|
||||
#define NVC06F_DMA_INCR_OPCODE_VALUE (0x00000001)
|
||||
#define NVC06F_DMA_INCR_DATA 31:0
|
||||
/* dma non-incrementing method format */
|
||||
#define NVC06F_DMA_NONINCR_ADDRESS 11:0
|
||||
#define NVC06F_DMA_NONINCR_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_NONINCR_COUNT 28:16
|
||||
#define NVC06F_DMA_NONINCR_OPCODE 31:29
|
||||
#define NVC06F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
|
||||
#define NVC06F_DMA_NONINCR_DATA 31:0
|
||||
/* dma increment-once method format */
|
||||
#define NVC06F_DMA_ONEINCR_ADDRESS 11:0
|
||||
#define NVC06F_DMA_ONEINCR_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_ONEINCR_COUNT 28:16
|
||||
#define NVC06F_DMA_ONEINCR_OPCODE 31:29
|
||||
#define NVC06F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
|
||||
#define NVC06F_DMA_ONEINCR_DATA 31:0
|
||||
/* dma no-operation format */
|
||||
#define NVC06F_DMA_NOP (0x00000000)
|
||||
/* dma immediate-data format */
|
||||
#define NVC06F_DMA_IMMD_ADDRESS 11:0
|
||||
#define NVC06F_DMA_IMMD_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_IMMD_DATA 28:16
|
||||
#define NVC06F_DMA_IMMD_OPCODE 31:29
|
||||
#define NVC06F_DMA_IMMD_OPCODE_VALUE (0x00000004)
|
||||
|
||||
#define NVC06F_SEMAPHORED_RELEASE_WFI 20:20
|
||||
#define NVC06F_SEMAPHORED_RELEASE_WFI_EN 0x00000000
|
||||
#define NVC06F_SEMAPHORED_RELEASE_WFI_DIS 0x00000001
|
||||
#define NVC06F_SEMAPHORED_RELEASE_SIZE 24:24
|
||||
#define NVC06F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
|
||||
#define NVC06F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
|
||||
#define NVC06F_SEMAPHORED_REDUCTION 30:27
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_MIN 0x00000000
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_MAX 0x00000001
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_XOR 0x00000002
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_AND 0x00000003
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_OR 0x00000004
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_ADD 0x00000005
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_INC 0x00000006
|
||||
#define NVC06F_SEMAPHORED_REDUCTION_DEC 0x00000007
|
||||
#define NVC06F_SEMAPHORED_FORMAT 31:31
|
||||
#define NVC06F_SEMAPHORED_FORMAT_SIGNED 0x00000000
|
||||
#define NVC06F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
|
||||
#define NVC06F_NON_STALL_INTERRUPT (0x00000020)
|
||||
#define NVC06F_NON_STALL_INTERRUPT_HANDLE 31:0
|
||||
#define NVC06F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
|
||||
#define NVC06F_FB_FLUSH_HANDLE 31:0
|
||||
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
|
||||
// specifying the page address for a targeted TLB invalidate and the uTLB for
|
||||
// a targeted REPLAY_CANCEL for UVM.
|
||||
@@ -153,19 +169,142 @@ extern "C" {
|
||||
#define NVC06F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
|
||||
#define NVC06F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
|
||||
// CLEAN_LINES is an alias for Tegra/GPU IP usage
|
||||
#define NVC06F_MEM_OP_D_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
|
||||
// This B alias is confusing but it was missed as part of the update. Left here
|
||||
// for compatibility.
|
||||
#define NVC06F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
|
||||
#define NVC06F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
|
||||
#define NVC06F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
|
||||
#define NVC06F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015
|
||||
#define NVC06F_SET_REFERENCE (0x00000050)
|
||||
#define NVC06F_SET_REFERENCE_COUNT 31:0
|
||||
|
||||
// Syncpoint methods are only available on Tegra parts. Attempting to use
|
||||
// them on discrete GPUs will result in Host raising NV_PPBDMA_INTR_0_METHOD.
|
||||
#define NVC06F_SYNCPOINTA (0x00000070)
|
||||
#define NVC06F_SYNCPOINTA_PAYLOAD 31:0
|
||||
#define NVC06F_SYNCPOINTB (0x00000074)
|
||||
#define NVC06F_SYNCPOINTB_OPERATION 0:0
|
||||
#define NVC06F_SYNCPOINTB_OPERATION_WAIT 0x00000000
|
||||
#define NVC06F_SYNCPOINTB_OPERATION_INCR 0x00000001
|
||||
#define NVC06F_SYNCPOINTB_WAIT_SWITCH 4:4
|
||||
#define NVC06F_SYNCPOINTB_WAIT_SWITCH_DIS 0x00000000
|
||||
#define NVC06F_SYNCPOINTB_WAIT_SWITCH_EN 0x00000001
|
||||
#define NVC06F_SYNCPOINTB_SYNCPT_INDEX 19:8
|
||||
#define NVC06F_WFI (0x00000078)
|
||||
#define NVC06F_WFI_SCOPE 0:0
|
||||
#define NVC06F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
|
||||
#define NVC06F_WFI_SCOPE_ALL 0x00000001
|
||||
#define NVC06F_CRC_CHECK (0x0000007c)
|
||||
#define NVC06F_CRC_CHECK_VALUE 31:0
|
||||
#define NVC06F_YIELD (0x00000080)
|
||||
#define NVC06F_YIELD_OP 1:0
|
||||
#define NVC06F_YIELD_OP_NOP 0x00000000
|
||||
#define NVC06F_YIELD_OP_PBDMA_TIMESLICE 0x00000001
|
||||
#define NVC06F_YIELD_OP_RUNLIST_TIMESLICE 0x00000002
|
||||
#define NVC06F_YIELD_OP_TSG 0x00000003
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVC06F_GP_ENTRY__SIZE 8
|
||||
#define NVC06F_GP_ENTRY0_FETCH 0:0
|
||||
#define NVC06F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
|
||||
#define NVC06F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
|
||||
#define NVC06F_GP_ENTRY0_GET 31:2
|
||||
#define NVC06F_GP_ENTRY0_OPERAND 31:0
|
||||
#define NVC06F_GP_ENTRY1_GET_HI 7:0
|
||||
#define NVC06F_GP_ENTRY1_PRIV 8:8
|
||||
#define NVC06F_GP_ENTRY1_PRIV_USER 0x00000000
|
||||
#define NVC06F_GP_ENTRY1_PRIV_KERNEL 0x00000001
|
||||
#define NVC06F_GP_ENTRY1_LEVEL 9:9
|
||||
#define NVC06F_GP_ENTRY1_LEVEL_MAIN 0x00000000
|
||||
#define NVC06F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
|
||||
#define NVC06F_GP_ENTRY1_LENGTH 30:10
|
||||
#define NVC06F_GP_ENTRY1_SYNC 31:31
|
||||
#define NVC06F_GP_ENTRY1_SYNC_PROCEED 0x00000000
|
||||
#define NVC06F_GP_ENTRY1_SYNC_WAIT 0x00000001
|
||||
#define NVC06F_GP_ENTRY1_OPCODE 7:0
|
||||
#define NVC06F_GP_ENTRY1_OPCODE_NOP 0x00000000
|
||||
#define NVC06F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
|
||||
#define NVC06F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
|
||||
#define NVC06F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
|
||||
/* dma method formats */
|
||||
#define NVC06F_DMA_METHOD_ADDRESS_OLD 12:2
|
||||
#define NVC06F_DMA_METHOD_ADDRESS 11:0
|
||||
#define NVC06F_DMA_SUBDEVICE_MASK 15:4
|
||||
#define NVC06F_DMA_METHOD_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_TERT_OP 17:16
|
||||
#define NVC06F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
|
||||
#define NVC06F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
|
||||
#define NVC06F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
|
||||
#define NVC06F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
|
||||
#define NVC06F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
|
||||
#define NVC06F_DMA_METHOD_COUNT_OLD 28:18
|
||||
#define NVC06F_DMA_METHOD_COUNT 28:16
|
||||
#define NVC06F_DMA_IMMD_DATA 28:16
|
||||
#define NVC06F_DMA_SEC_OP 31:29
|
||||
#define NVC06F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
|
||||
#define NVC06F_DMA_SEC_OP_INC_METHOD (0x00000001)
|
||||
#define NVC06F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
|
||||
#define NVC06F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
|
||||
#define NVC06F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
|
||||
#define NVC06F_DMA_SEC_OP_ONE_INC (0x00000005)
|
||||
#define NVC06F_DMA_SEC_OP_RESERVED6 (0x00000006)
|
||||
#define NVC06F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
|
||||
/* dma incrementing method format */
|
||||
#define NVC06F_DMA_INCR_ADDRESS 11:0
|
||||
#define NVC06F_DMA_INCR_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_INCR_COUNT 28:16
|
||||
#define NVC06F_DMA_INCR_OPCODE 31:29
|
||||
#define NVC06F_DMA_INCR_OPCODE_VALUE (0x00000001)
|
||||
#define NVC06F_DMA_INCR_DATA 31:0
|
||||
/* dma non-incrementing method format */
|
||||
#define NVC06F_DMA_NONINCR_ADDRESS 11:0
|
||||
#define NVC06F_DMA_NONINCR_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_NONINCR_COUNT 28:16
|
||||
#define NVC06F_DMA_NONINCR_OPCODE 31:29
|
||||
#define NVC06F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
|
||||
#define NVC06F_DMA_NONINCR_DATA 31:0
|
||||
/* dma increment-once method format */
|
||||
#define NVC06F_DMA_ONEINCR_ADDRESS 11:0
|
||||
#define NVC06F_DMA_ONEINCR_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_ONEINCR_COUNT 28:16
|
||||
#define NVC06F_DMA_ONEINCR_OPCODE 31:29
|
||||
#define NVC06F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
|
||||
#define NVC06F_DMA_ONEINCR_DATA 31:0
|
||||
/* dma no-operation format */
|
||||
#define NVC06F_DMA_NOP (0x00000000)
|
||||
/* dma immediate-data format */
|
||||
#define NVC06F_DMA_IMMD_ADDRESS 11:0
|
||||
#define NVC06F_DMA_IMMD_SUBCHANNEL 15:13
|
||||
#define NVC06F_DMA_IMMD_DATA 28:16
|
||||
#define NVC06F_DMA_IMMD_OPCODE 31:29
|
||||
#define NVC06F_DMA_IMMD_OPCODE_VALUE (0x00000004)
|
||||
/* dma set sub-device mask format */
|
||||
#define NVC06F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC06F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC06F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
|
||||
/* dma store sub-device mask format */
|
||||
#define NVC06F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC06F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC06F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
|
||||
/* dma use sub-device mask format */
|
||||
#define NVC06F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC06F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
|
||||
/* dma end-segment format */
|
||||
#define NVC06F_DMA_ENDSEG_OPCODE 31:29
|
||||
#define NVC06F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
|
||||
/* dma legacy incrementing/non-incrementing formats */
|
||||
#define NVC06F_DMA_ADDRESS 12:2
|
||||
#define NVC06F_DMA_SUBCH 15:13
|
||||
#define NVC06F_DMA_OPCODE3 17:16
|
||||
#define NVC06F_DMA_OPCODE3_NONE (0x00000000)
|
||||
#define NVC06F_DMA_COUNT 28:18
|
||||
#define NVC06F_DMA_OPCODE 31:29
|
||||
#define NVC06F_DMA_OPCODE_METHOD (0x00000000)
|
||||
#define NVC06F_DMA_OPCODE_NONINC_METHOD (0x00000002)
|
||||
#define NVC06F_DMA_DATA 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2014 NVIDIA Corporation
|
||||
Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
@@ -32,6 +32,10 @@ extern "C" {
|
||||
|
||||
#define PASCAL_DMA_COPY_A (0x0000C0B5)
|
||||
|
||||
#define NVC0B5_NOP (0x00000100)
|
||||
#define NVC0B5_NOP_PARAMETER 31:0
|
||||
#define NVC0B5_PM_TRIGGER (0x00000140)
|
||||
#define NVC0B5_PM_TRIGGER_V 31:0
|
||||
#define NVC0B5_SET_SEMAPHORE_A (0x00000240)
|
||||
#define NVC0B5_SET_SEMAPHORE_A_UPPER 16:0
|
||||
#define NVC0B5_SET_SEMAPHORE_B (0x00000244)
|
||||
@@ -115,6 +119,10 @@ extern "C" {
|
||||
#define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2 20:20
|
||||
#define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING (0x00000000)
|
||||
#define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE (0x00000001)
|
||||
#define NVC0B5_LAUNCH_DMA_DST_BYPASS_L2 21:21
|
||||
#define NVC0B5_LAUNCH_DMA_DST_BYPASS_L2_USE_PTE_SETTING (0x00000000)
|
||||
#define NVC0B5_LAUNCH_DMA_DST_BYPASS_L2_FORCE_VOLATILE (0x00000001)
|
||||
#define NVC0B5_LAUNCH_DMA_RESERVED 31:28
|
||||
#define NVC0B5_OFFSET_IN_UPPER (0x00000400)
|
||||
#define NVC0B5_OFFSET_IN_UPPER_UPPER 16:0
|
||||
#define NVC0B5_OFFSET_IN_LOWER (0x00000404)
|
||||
@@ -183,6 +191,68 @@ extern "C" {
|
||||
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE (0x0000070C)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC0B5_SET_DST_WIDTH (0x00000710)
|
||||
#define NVC0B5_SET_DST_WIDTH_V 31:0
|
||||
#define NVC0B5_SET_DST_HEIGHT (0x00000714)
|
||||
#define NVC0B5_SET_DST_HEIGHT_V 31:0
|
||||
#define NVC0B5_SET_DST_DEPTH (0x00000718)
|
||||
#define NVC0B5_SET_DST_DEPTH_V 31:0
|
||||
#define NVC0B5_SET_DST_LAYER (0x0000071C)
|
||||
#define NVC0B5_SET_DST_LAYER_V 31:0
|
||||
#define NVC0B5_SET_DST_ORIGIN (0x00000720)
|
||||
#define NVC0B5_SET_DST_ORIGIN_X 15:0
|
||||
#define NVC0B5_SET_DST_ORIGIN_Y 31:16
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE (0x00000728)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC0B5_SET_SRC_WIDTH (0x0000072C)
|
||||
#define NVC0B5_SET_SRC_WIDTH_V 31:0
|
||||
#define NVC0B5_SET_SRC_HEIGHT (0x00000730)
|
||||
#define NVC0B5_SET_SRC_HEIGHT_V 31:0
|
||||
#define NVC0B5_SET_SRC_DEPTH (0x00000734)
|
||||
#define NVC0B5_SET_SRC_DEPTH_V 31:0
|
||||
#define NVC0B5_SET_SRC_LAYER (0x00000738)
|
||||
#define NVC0B5_SET_SRC_LAYER_V 31:0
|
||||
#define NVC0B5_SET_SRC_ORIGIN (0x0000073C)
|
||||
#define NVC0B5_SET_SRC_ORIGIN_X 15:0
|
||||
#define NVC0B5_SET_SRC_ORIGIN_Y 31:16
|
||||
#define NVC0B5_PM_TRIGGER_END (0x00001114)
|
||||
#define NVC0B5_PM_TRIGGER_END_V 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2014 NVIDIA Corporation
|
||||
Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
@@ -32,6 +32,10 @@ extern "C" {
|
||||
|
||||
#define PASCAL_DMA_COPY_B (0x0000C1B5)
|
||||
|
||||
#define NVC1B5_NOP (0x00000100)
|
||||
#define NVC1B5_NOP_PARAMETER 31:0
|
||||
#define NVC1B5_PM_TRIGGER (0x00000140)
|
||||
#define NVC1B5_PM_TRIGGER_V 31:0
|
||||
#define NVC1B5_SET_SEMAPHORE_A (0x00000240)
|
||||
#define NVC1B5_SET_SEMAPHORE_A_UPPER 16:0
|
||||
#define NVC1B5_SET_SEMAPHORE_B (0x00000244)
|
||||
@@ -115,6 +119,14 @@ extern "C" {
|
||||
#define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2 20:20
|
||||
#define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING (0x00000000)
|
||||
#define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE (0x00000001)
|
||||
#define NVC1B5_LAUNCH_DMA_DST_BYPASS_L2 21:21
|
||||
#define NVC1B5_LAUNCH_DMA_DST_BYPASS_L2_USE_PTE_SETTING (0x00000000)
|
||||
#define NVC1B5_LAUNCH_DMA_DST_BYPASS_L2_FORCE_VOLATILE (0x00000001)
|
||||
#define NVC1B5_LAUNCH_DMA_VPRMODE 23:22
|
||||
#define NVC1B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC1B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC1B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC1B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
|
||||
#define NVC1B5_OFFSET_IN_UPPER (0x00000400)
|
||||
#define NVC1B5_OFFSET_IN_UPPER_UPPER 16:0
|
||||
#define NVC1B5_OFFSET_IN_LOWER (0x00000404)
|
||||
@@ -183,6 +195,76 @@ extern "C" {
|
||||
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE (0x0000070C)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC1B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC1B5_SET_DST_WIDTH (0x00000710)
|
||||
#define NVC1B5_SET_DST_WIDTH_V 31:0
|
||||
#define NVC1B5_SET_DST_HEIGHT (0x00000714)
|
||||
#define NVC1B5_SET_DST_HEIGHT_V 31:0
|
||||
#define NVC1B5_SET_DST_DEPTH (0x00000718)
|
||||
#define NVC1B5_SET_DST_DEPTH_V 31:0
|
||||
#define NVC1B5_SET_DST_LAYER (0x0000071C)
|
||||
#define NVC1B5_SET_DST_LAYER_V 31:0
|
||||
#define NVC1B5_SET_DST_ORIGIN (0x00000720)
|
||||
#define NVC1B5_SET_DST_ORIGIN_X 15:0
|
||||
#define NVC1B5_SET_DST_ORIGIN_Y 31:16
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE (0x00000728)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC1B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC1B5_SET_SRC_WIDTH (0x0000072C)
|
||||
#define NVC1B5_SET_SRC_WIDTH_V 31:0
|
||||
#define NVC1B5_SET_SRC_HEIGHT (0x00000730)
|
||||
#define NVC1B5_SET_SRC_HEIGHT_V 31:0
|
||||
#define NVC1B5_SET_SRC_DEPTH (0x00000734)
|
||||
#define NVC1B5_SET_SRC_DEPTH_V 31:0
|
||||
#define NVC1B5_SET_SRC_LAYER (0x00000738)
|
||||
#define NVC1B5_SET_SRC_LAYER_V 31:0
|
||||
#define NVC1B5_SET_SRC_ORIGIN (0x0000073C)
|
||||
#define NVC1B5_SET_SRC_ORIGIN_X 15:0
|
||||
#define NVC1B5_SET_SRC_ORIGIN_Y 31:16
|
||||
#define NVC1B5_SRC_ORIGIN_X (0x00000744)
|
||||
#define NVC1B5_SRC_ORIGIN_X_VALUE 31:0
|
||||
#define NVC1B5_SRC_ORIGIN_Y (0x00000748)
|
||||
#define NVC1B5_SRC_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC1B5_DST_ORIGIN_X (0x0000074C)
|
||||
#define NVC1B5_DST_ORIGIN_X_VALUE 31:0
|
||||
#define NVC1B5_DST_ORIGIN_Y (0x00000750)
|
||||
#define NVC1B5_DST_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC1B5_PM_TRIGGER_END (0x00001114)
|
||||
#define NVC1B5_PM_TRIGGER_END_V 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
@@ -32,6 +32,10 @@ extern "C" {
|
||||
|
||||
#define VOLTA_DMA_COPY_A (0x0000C3B5)
|
||||
|
||||
#define NVC3B5_NOP (0x00000100)
|
||||
#define NVC3B5_NOP_PARAMETER 31:0
|
||||
#define NVC3B5_PM_TRIGGER (0x00000140)
|
||||
#define NVC3B5_PM_TRIGGER_V 31:0
|
||||
#define NVC3B5_SET_SEMAPHORE_A (0x00000240)
|
||||
#define NVC3B5_SET_SEMAPHORE_A_UPPER 16:0
|
||||
#define NVC3B5_SET_SEMAPHORE_B (0x00000244)
|
||||
@@ -126,8 +130,6 @@ extern "C" {
|
||||
#define NVC3B5_LAUNCH_DMA_VPRMODE 23:22
|
||||
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
|
||||
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
|
||||
#define NVC3B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC3B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
|
||||
#define NVC3B5_OFFSET_IN_UPPER (0x00000400)
|
||||
@@ -198,6 +200,76 @@ extern "C" {
|
||||
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE (0x0000070C)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC3B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC3B5_SET_DST_WIDTH (0x00000710)
|
||||
#define NVC3B5_SET_DST_WIDTH_V 31:0
|
||||
#define NVC3B5_SET_DST_HEIGHT (0x00000714)
|
||||
#define NVC3B5_SET_DST_HEIGHT_V 31:0
|
||||
#define NVC3B5_SET_DST_DEPTH (0x00000718)
|
||||
#define NVC3B5_SET_DST_DEPTH_V 31:0
|
||||
#define NVC3B5_SET_DST_LAYER (0x0000071C)
|
||||
#define NVC3B5_SET_DST_LAYER_V 31:0
|
||||
#define NVC3B5_SET_DST_ORIGIN (0x00000720)
|
||||
#define NVC3B5_SET_DST_ORIGIN_X 15:0
|
||||
#define NVC3B5_SET_DST_ORIGIN_Y 31:16
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE (0x00000728)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC3B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC3B5_SET_SRC_WIDTH (0x0000072C)
|
||||
#define NVC3B5_SET_SRC_WIDTH_V 31:0
|
||||
#define NVC3B5_SET_SRC_HEIGHT (0x00000730)
|
||||
#define NVC3B5_SET_SRC_HEIGHT_V 31:0
|
||||
#define NVC3B5_SET_SRC_DEPTH (0x00000734)
|
||||
#define NVC3B5_SET_SRC_DEPTH_V 31:0
|
||||
#define NVC3B5_SET_SRC_LAYER (0x00000738)
|
||||
#define NVC3B5_SET_SRC_LAYER_V 31:0
|
||||
#define NVC3B5_SET_SRC_ORIGIN (0x0000073C)
|
||||
#define NVC3B5_SET_SRC_ORIGIN_X 15:0
|
||||
#define NVC3B5_SET_SRC_ORIGIN_Y 31:16
|
||||
#define NVC3B5_SRC_ORIGIN_X (0x00000744)
|
||||
#define NVC3B5_SRC_ORIGIN_X_VALUE 31:0
|
||||
#define NVC3B5_SRC_ORIGIN_Y (0x00000748)
|
||||
#define NVC3B5_SRC_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC3B5_DST_ORIGIN_X (0x0000074C)
|
||||
#define NVC3B5_DST_ORIGIN_X_VALUE 31:0
|
||||
#define NVC3B5_DST_ORIGIN_Y (0x00000750)
|
||||
#define NVC3B5_DST_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC3B5_PM_TRIGGER_END (0x00001114)
|
||||
#define NVC3B5_PM_TRIGGER_END_V 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
|
||||
97
kernel-open/nvidia-uvm/clcba2.h
Normal file
97
kernel-open/nvidia-uvm/clcba2.h
Normal file
@@ -0,0 +1,97 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#ifndef _clcba2_h_
|
||||
#define _clcba2_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define HOPPER_SEC2_WORK_LAUNCH_A (0x0000CBA2)
|
||||
|
||||
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI (0x00000400)
|
||||
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI_DATA 24:0
|
||||
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_LO (0x00000404)
|
||||
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_LO_DATA 31:4
|
||||
#define NVCBA2_DECRYPT_COPY_DST_ADDR_HI (0x00000408)
|
||||
#define NVCBA2_DECRYPT_COPY_DST_ADDR_HI_DATA 24:0
|
||||
#define NVCBA2_DECRYPT_COPY_DST_ADDR_LO (0x0000040c)
|
||||
#define NVCBA2_DECRYPT_COPY_DST_ADDR_LO_DATA 31:4
|
||||
#define NVCBA2_DECRYPT_COPY_SIZE (0x00000410)
|
||||
#define NVCBA2_DECRYPT_COPY_SIZE_DATA 31:2
|
||||
#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_HI (0x00000414)
|
||||
#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_HI_DATA 24:0
|
||||
#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_LO (0x00000418)
|
||||
#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_LO_DATA 31:4
|
||||
#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_HI (0x0000041C)
|
||||
#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_HI_DATA 24:0
|
||||
#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_LO (0x00000420)
|
||||
#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_LO_DATA 31:4
|
||||
#define NVCBA2_SEMAPHORE_A (0x00000440)
|
||||
#define NVCBA2_SEMAPHORE_A_UPPER 24:0
|
||||
#define NVCBA2_SEMAPHORE_B (0x00000444)
|
||||
#define NVCBA2_SEMAPHORE_B_LOWER 31:2
|
||||
#define NVCBA2_SET_SEMAPHORE_PAYLOAD_LOWER (0x00000448)
|
||||
#define NVCBA2_SET_SEMAPHORE_PAYLOAD_LOWER_DATA 31:0
|
||||
#define NVCBA2_SET_SEMAPHORE_PAYLOAD_UPPER (0x0000044C)
|
||||
#define NVCBA2_SET_SEMAPHORE_PAYLOAD_UPPER_DATA 31:0
|
||||
#define NVCBA2_SEMAPHORE_D (0x00000450)
|
||||
#define NVCBA2_SEMAPHORE_D_NOTIFY_INTR 0:0
|
||||
#define NVCBA2_SEMAPHORE_D_NOTIFY_INTR_DISABLE (0x00000000)
|
||||
#define NVCBA2_SEMAPHORE_D_NOTIFY_INTR_ENABLE (0x00000001)
|
||||
#define NVCBA2_SEMAPHORE_D_PAYLOAD_SIZE 1:1
|
||||
#define NVCBA2_SEMAPHORE_D_PAYLOAD_SIZE_32_BIT (0x00000000)
|
||||
#define NVCBA2_SEMAPHORE_D_PAYLOAD_SIZE_64_BIT (0x00000001)
|
||||
#define NVCBA2_SEMAPHORE_D_TIMESTAMP 2:2
|
||||
#define NVCBA2_SEMAPHORE_D_TIMESTAMP_DISABLE (0x00000000)
|
||||
#define NVCBA2_SEMAPHORE_D_TIMESTAMP_ENABLE (0x00000001)
|
||||
#define NVCBA2_SEMAPHORE_D_FLUSH_DISABLE 3:3
|
||||
#define NVCBA2_SEMAPHORE_D_FLUSH_DISABLE_FALSE (0x00000000)
|
||||
#define NVCBA2_SEMAPHORE_D_FLUSH_DISABLE_TRUE (0x00000001)
|
||||
#define NVCBA2_EXECUTE (0x00000470)
|
||||
#define NVCBA2_EXECUTE_NOTIFY 0:0
|
||||
#define NVCBA2_EXECUTE_NOTIFY_DISABLE (0x00000000)
|
||||
#define NVCBA2_EXECUTE_NOTIFY_ENABLE (0x00000001)
|
||||
#define NVCBA2_EXECUTE_NOTIFY_ON 1:1
|
||||
#define NVCBA2_EXECUTE_NOTIFY_ON_END (0x00000000)
|
||||
#define NVCBA2_EXECUTE_NOTIFY_ON_BEGIN (0x00000001)
|
||||
#define NVCBA2_EXECUTE_FLUSH_DISABLE 2:2
|
||||
#define NVCBA2_EXECUTE_FLUSH_DISABLE_FALSE (0x00000000)
|
||||
#define NVCBA2_EXECUTE_FLUSH_DISABLE_TRUE (0x00000001)
|
||||
#define NVCBA2_EXECUTE_NOTIFY_INTR 3:3
|
||||
#define NVCBA2_EXECUTE_NOTIFY_INTR_DISABLE (0x00000000)
|
||||
#define NVCBA2_EXECUTE_NOTIFY_INTR_ENABLE (0x00000001)
|
||||
#define NVCBA2_EXECUTE_PAYLOAD_SIZE 4:4
|
||||
#define NVCBA2_EXECUTE_PAYLOAD_SIZE_32_BIT (0x00000000)
|
||||
#define NVCBA2_EXECUTE_PAYLOAD_SIZE_64_BIT (0x00000001)
|
||||
#define NVCBA2_EXECUTE_TIMESTAMP 5:5
|
||||
#define NVCBA2_EXECUTE_TIMESTAMP_DISABLE (0x00000000)
|
||||
#define NVCBA2_EXECUTE_TIMESTAMP_ENABLE (0x00000001)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
#endif // _clcba2_h
|
||||
@@ -301,7 +301,7 @@ static void _q_flush_function(void *args)
|
||||
static void _raw_q_flush(nv_kthread_q_t *q)
|
||||
{
|
||||
nv_kthread_q_item_t q_item;
|
||||
DECLARE_COMPLETION(completion);
|
||||
DECLARE_COMPLETION_ONSTACK(completion);
|
||||
|
||||
nv_kthread_q_item_init(&q_item, _q_flush_function, &completion);
|
||||
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
NVIDIA_UVM_SOURCES ?=
|
||||
NVIDIA_UVM_SOURCES_CXX ?=
|
||||
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
|
||||
|
||||
@@ -84,7 +84,9 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_set
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmgrab
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
|
||||
@@ -103,5 +105,11 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_device_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_test.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_va_space_mm.h"
|
||||
#include "uvm_va_range.h"
|
||||
#include "uvm_va_block.h"
|
||||
#include "uvm_tools.h"
|
||||
@@ -72,6 +73,11 @@ uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
|
||||
BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
case UVM_FD_MM:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(struct file) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
default:
|
||||
UVM_ASSERT(0);
|
||||
}
|
||||
@@ -82,6 +88,106 @@ uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
|
||||
return type;
|
||||
}
|
||||
|
||||
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
|
||||
|
||||
if (uvm_fd_type(filp, &ptr) == type)
|
||||
return ptr;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_va_space_mm_t *va_space_mm;
|
||||
struct file *uvm_file;
|
||||
uvm_fd_type_t old_fd_type;
|
||||
struct mm_struct *mm;
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_file = fget(params->uvmFd);
|
||||
if (!uvm_file_is_nvidia_uvm(uvm_file)) {
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (uvm_fd_type(uvm_file, (void **)&va_space) != UVM_FD_VA_SPACE) {
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
// Tell userspace the MM FD is not required and it may be released
|
||||
// with no loss of functionality.
|
||||
if (!uvm_va_space_mm_enabled(va_space)) {
|
||||
status = NV_WARN_NOTHING_TO_DO;
|
||||
goto err;
|
||||
}
|
||||
|
||||
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type != UVM_FD_UNINITIALIZED) {
|
||||
status = NV_ERR_IN_USE;
|
||||
goto err;
|
||||
}
|
||||
|
||||
va_space_mm = &va_space->va_space_mm;
|
||||
uvm_spin_lock(&va_space_mm->lock);
|
||||
switch (va_space->va_space_mm.state) {
|
||||
// We only allow the va_space_mm to be initialised once. If
|
||||
// userspace passed the UVM FD to another process it is up to
|
||||
// userspace to ensure it also passes the UVM MM FD that
|
||||
// initialised the va_space_mm or arranges some other way to keep
|
||||
// a reference on the FD.
|
||||
case UVM_VA_SPACE_MM_STATE_ALIVE:
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto err_release_unlock;
|
||||
break;
|
||||
|
||||
// Once userspace has released the va_space_mm the GPU is
|
||||
// effectively dead and no new work can be started. We don't
|
||||
// support re-initializing once userspace has closed the FD.
|
||||
case UVM_VA_SPACE_MM_STATE_RELEASED:
|
||||
status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
|
||||
goto err_release_unlock;
|
||||
break;
|
||||
|
||||
// Keep the warnings at bay
|
||||
case UVM_VA_SPACE_MM_STATE_UNINITIALIZED:
|
||||
mm = va_space->va_space_mm.mm;
|
||||
if (!mm || !mmget_not_zero(mm)) {
|
||||
status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
|
||||
goto err_release_unlock;
|
||||
}
|
||||
|
||||
va_space_mm->state = UVM_VA_SPACE_MM_STATE_ALIVE;
|
||||
break;
|
||||
|
||||
default:
|
||||
UVM_ASSERT(0);
|
||||
break;
|
||||
}
|
||||
uvm_spin_unlock(&va_space_mm->lock);
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)uvm_file | UVM_FD_MM);
|
||||
|
||||
return NV_OK;
|
||||
|
||||
err_release_unlock:
|
||||
uvm_spin_unlock(&va_space_mm->lock);
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
|
||||
|
||||
err:
|
||||
if (uvm_file)
|
||||
fput(uvm_file);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Called when opening /dev/nvidia-uvm. This code doesn't take any UVM locks, so
|
||||
// there's no need to acquire g_uvm_global.pm.lock, but if that changes the PM
|
||||
// lock will need to be taken.
|
||||
@@ -147,20 +253,44 @@ static void uvm_release_deferred(void *data)
|
||||
uvm_up_read(&g_uvm_global.pm.lock);
|
||||
}
|
||||
|
||||
static void uvm_mm_release(struct file *filp, struct file *uvm_file)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(uvm_file);
|
||||
uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
|
||||
struct mm_struct *mm = va_space_mm->mm;
|
||||
|
||||
if (uvm_va_space_mm_enabled(va_space)) {
|
||||
uvm_va_space_mm_unregister(va_space);
|
||||
|
||||
if (uvm_va_space_mm_enabled(va_space))
|
||||
uvm_mmput(mm);
|
||||
|
||||
va_space_mm->mm = NULL;
|
||||
fput(uvm_file);
|
||||
}
|
||||
}
|
||||
|
||||
static int uvm_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
void *ptr;
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_fd_type_t fd_type;
|
||||
int ret;
|
||||
|
||||
fd_type = uvm_fd_type(filp, (void **)&va_space);
|
||||
fd_type = uvm_fd_type(filp, &ptr);
|
||||
UVM_ASSERT(fd_type != UVM_FD_INITIALIZING);
|
||||
if (fd_type == UVM_FD_UNINITIALIZED) {
|
||||
uvm_kvfree(filp->f_mapping);
|
||||
return 0;
|
||||
}
|
||||
else if (fd_type == UVM_FD_MM) {
|
||||
uvm_kvfree(filp->f_mapping);
|
||||
uvm_mm_release(filp, (struct file *)ptr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
UVM_ASSERT(fd_type == UVM_FD_VA_SPACE);
|
||||
va_space = (uvm_va_space_t *)ptr;
|
||||
filp->private_data = NULL;
|
||||
filp->f_mapping = NULL;
|
||||
|
||||
@@ -696,7 +826,7 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
// of removing CPU mappings in the parent on fork()+exec(). Users can call
|
||||
// madvise(MDV_DOFORK) if the child process requires access to the
|
||||
// allocation.
|
||||
vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTCOPY;
|
||||
nv_vm_flags_set(vma, VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTCOPY);
|
||||
|
||||
vma->vm_ops = &uvm_vm_ops_managed;
|
||||
|
||||
@@ -756,6 +886,13 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool uvm_vma_is_managed(struct vm_area_struct *vma)
|
||||
{
|
||||
return vma->vm_ops == &uvm_vm_ops_disabled ||
|
||||
vma->vm_ops == &uvm_vm_ops_managed ||
|
||||
vma->vm_ops == &uvm_vm_ops_semaphore_pool;
|
||||
}
|
||||
|
||||
static int uvm_mmap_entry(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
UVM_ENTRY_RET(uvm_mmap(filp, vma));
|
||||
@@ -804,6 +941,9 @@ static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *
|
||||
else
|
||||
status = NV_OK;
|
||||
}
|
||||
else if (old_fd_type == UVM_FD_MM) {
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(old_fd_type == UVM_FD_INITIALIZING);
|
||||
status = NV_ERR_BUSY_RETRY;
|
||||
@@ -827,6 +967,7 @@ static long uvm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
return 0;
|
||||
|
||||
UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_INITIALIZE, uvm_api_initialize);
|
||||
UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_MM_INITIALIZE, uvm_api_mm_initialize);
|
||||
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_PAGEABLE_MEM_ACCESS, uvm_api_pageable_mem_access);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_PAGEABLE_MEM_ACCESS_ON_GPU, uvm_api_pageable_mem_access_on_gpu);
|
||||
|
||||
@@ -54,7 +54,7 @@
|
||||
#ifndef _UVM_H_
|
||||
#define _UVM_H_
|
||||
|
||||
#define UVM_API_LATEST_REVISION 7
|
||||
#define UVM_API_LATEST_REVISION 8
|
||||
|
||||
#if !defined(UVM_API_REVISION)
|
||||
#error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
|
||||
@@ -410,6 +410,12 @@ NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
|
||||
// location will have their range group association changed to
|
||||
// UVM_RANGE_GROUP_ID_NONE.
|
||||
//
|
||||
// If the Confidential Computing feature is enabled in the system, any VA
|
||||
// ranges allocated using UvmAllocSemaphorePool and owned by this GPU will be
|
||||
// unmapped from all GPUs and the CPU. UvmFree must still be called on those
|
||||
// ranges to reclaim the VA. See UvmAllocSemaphorePool to determine which GPU
|
||||
// is considered the owner.
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to unregister.
|
||||
@@ -1094,10 +1100,12 @@ NV_STATUS UvmAllowMigrationRangeGroups(const NvU64 *rangeGroupIds,
|
||||
// Creates a new mapping in the virtual address space of the process, populates
|
||||
// it at the specified preferred location, maps it on the provided list of
|
||||
// processors if feasible and associates the range with the given range group.
|
||||
// If the preferredLocationUuid is the UUID of the CPU, preferred location is
|
||||
// set to all CPU nodes allowed by the global and thread memory policies.
|
||||
//
|
||||
// This API is equivalent to the following code sequence:
|
||||
// UvmMemMap(base, length);
|
||||
// UvmSetPreferredLocation(base, length, preferredLocationUuid);
|
||||
// UvmSetPreferredLocation(base, length, preferredLocationUuid, -1);
|
||||
// for (i = 0; i < accessedByCount; i++) {
|
||||
// UvmSetAccessedBy(base, length, &accessedByUuids[i]);
|
||||
// }
|
||||
@@ -1262,6 +1270,12 @@ NV_STATUS UvmCleanUpZombieResources(void);
|
||||
//
|
||||
// The VA range can be unmapped and freed via a call to UvmFree.
|
||||
//
|
||||
// If the Confidential Computing feature is enabled in the system, at least one
|
||||
// GPU must be provided in the perGpuAttribs array. The first GPU in the array
|
||||
// is considered the owning GPU. If the owning GPU is unregistered via
|
||||
// UvmUnregisterGpu, this allocation will no longer be usable.
|
||||
// See UvmUnregisterGpu.
|
||||
//
|
||||
// Arguments:
|
||||
// base: (INPUT)
|
||||
// Base address of the virtual address range.
|
||||
@@ -1298,6 +1312,8 @@ NV_STATUS UvmCleanUpZombieResources(void);
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// perGpuAttribs is NULL but gpuAttribsCount is non-zero or vice-versa,
|
||||
// or caching is requested on more than one GPU.
|
||||
// The Confidential Computing feature is enabled and the perGpuAttribs
|
||||
// list is empty.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The current process is not the one which called UvmInitialize, and
|
||||
@@ -1444,7 +1460,7 @@ NV_STATUS UvmMigrate(void *base,
|
||||
NV_STATUS UvmMigrate(void *base,
|
||||
NvLength length,
|
||||
const NvProcessorUuid *destinationUuid,
|
||||
NvU32 preferredCpuMemoryNode);
|
||||
NvS32 preferredCpuMemoryNode);
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -1537,7 +1553,7 @@ NV_STATUS UvmMigrateAsync(void *base,
|
||||
NV_STATUS UvmMigrateAsync(void *base,
|
||||
NvLength length,
|
||||
const NvProcessorUuid *destinationUuid,
|
||||
NvU32 preferredCpuMemoryNode,
|
||||
NvS32 preferredCpuMemoryNode,
|
||||
void *semaphoreAddress,
|
||||
NvU32 semaphorePayload);
|
||||
#endif
|
||||
@@ -2217,11 +2233,10 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// supported by the specified processor.
|
||||
//
|
||||
// The virtual address range specified by (base, length) must have been
|
||||
// allocated via a call to either UvmAlloc or UvmMemMap, or be supported
|
||||
// system-allocated pageable memory. If the input range is pageable memory and
|
||||
// at least one GPU in the system supports transparent access to pageable
|
||||
// memory, the behavior described below does not take effect and the preferred
|
||||
// location of the pages in the given range does not change.
|
||||
// allocated via a call to either UvmAlloc or UvmMemMap (managed memory), or be
|
||||
// supported system-allocated pageable memory. If the input range corresponds to
|
||||
// a file backed shared mapping and least one GPU in the system supports
|
||||
// transparent access to pageable memory, the behavior below is not guaranteed.
|
||||
//
|
||||
// If any pages in the VA range are associated with a range group that was made
|
||||
// non-migratable via UvmPreventMigrationRangeGroups, then those pages are
|
||||
@@ -2240,17 +2255,17 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// not cause a migration if a mapping for that page from that processor can be
|
||||
// established without migrating the page.
|
||||
//
|
||||
// When a page migrates away from its preferred location, the mapping on the
|
||||
// preferred location's processor is cleared so that the next access from that
|
||||
// processor will cause a fault and migrate the page back to its preferred
|
||||
// location. In other words, a page is mapped on the preferred location's
|
||||
// processor only if the page is in its preferred location. Thus, when the
|
||||
// preferred location changes, mappings to pages in the given range are removed
|
||||
// from the new preferred location if the pages are resident in a different
|
||||
// processor. Note that if the preferred location's processor is a GPU, then a
|
||||
// mapping from that GPU to a page in the VA range is only created if a GPU VA
|
||||
// space has been registered for that GPU and the page is in its preferred
|
||||
// location.
|
||||
// When a page that was allocated via either UvmAlloc or UvmMemMap migrates away
|
||||
// from its preferred location, the mapping on the preferred location's
|
||||
// processor is cleared so that the next access from that processor will cause a
|
||||
// fault and migrate the page back to its preferred location. In other words, a
|
||||
// page is mapped on the preferred location's processor only if the page is in
|
||||
// its preferred location. Thus, when the preferred location changes, mappings
|
||||
// to pages in the given range are removed from the new preferred location if
|
||||
// the pages are resident in a different processor. Note that if the preferred
|
||||
// location's processor is a GPU, then a mapping from that GPU to a page in the
|
||||
// VA range is only created if a GPU VA space has been registered for that GPU
|
||||
// and the page is in its preferred location.
|
||||
//
|
||||
// If read duplication has been enabled for any pages in this VA range and
|
||||
// UvmPreventMigrationRangeGroups has not been called on the range group that
|
||||
@@ -2263,7 +2278,7 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
//
|
||||
// If the preferred location processor is present in the accessed-by list of any
|
||||
// of the pages in this VA range, then the migration and mapping policies
|
||||
// associated with associated with the accessed-by list.
|
||||
// associated with this API override those associated with the accessed-by list.
|
||||
//
|
||||
// The state set by this API can be cleared either by calling
|
||||
// UvmUnsetPreferredLocation for the same VA range or by calling
|
||||
@@ -2284,35 +2299,66 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// preferredLocationUuid: (INPUT)
|
||||
// UUID of the preferred location.
|
||||
//
|
||||
// preferredCpuNumaNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if preferredLocationUuid is the
|
||||
// UUID of the CPU. -1 is a special value which indicates all CPU nodes
|
||||
// allowed by the global and thread memory policies. This argument is
|
||||
// ignored if preferredLocationUuid refers to a GPU or the given virtual
|
||||
// address range corresponds to managed memory. If NUMA is not enabled,
|
||||
// only 0 or -1 is allowed.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
// base and length are not properly aligned, or the range does not
|
||||
// represent a valid UVM allocation, or the range is pageable memory and
|
||||
// the system does not support accessing pageable memory, or the range
|
||||
// does not represent a supported Operating System allocation.
|
||||
// One of the following occurred:
|
||||
// - base and length are not properly aligned.
|
||||
// - The range does not represent a valid UVM allocation.
|
||||
// - The range is pageable memory and the system does not support
|
||||
// accessing pageable memory.
|
||||
// - The range does not represent a supported Operating System
|
||||
// allocation.
|
||||
//
|
||||
// NV_ERR_OUT_OF_RANGE:
|
||||
// The VA range exceeds the largest virtual address supported by the
|
||||
// specified processor.
|
||||
//
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
// preferredLocationUuid is neither the UUID of the CPU nor the UUID of
|
||||
// a GPU that was registered by this process. Or at least one page in
|
||||
// VA range belongs to a non-migratable range group and the specified
|
||||
// UUID represents a fault-capable GPU. Or preferredLocationUuid is the
|
||||
// UUID of a non-fault-capable GPU and at least one page in the VA range
|
||||
// belongs to a non-migratable range group and another non-fault-capable
|
||||
// GPU is in the accessed-by list of the same page but P2P support
|
||||
// between both GPUs has not been enabled.
|
||||
// One of the following occurred:
|
||||
// - preferredLocationUuid is neither the UUID of the CPU nor the UUID
|
||||
// of a GPU that was registered by this process.
|
||||
// - At least one page in VA range belongs to a non-migratable range
|
||||
// group and the specified UUID represents a fault-capable GPU.
|
||||
// - preferredLocationUuid is the UUID of a non-fault-capable GPU and at
|
||||
// least one page in the VA range belongs to a non-migratable range
|
||||
// group and another non-fault-capable GPU is in the accessed-by list
|
||||
// of the same page but P2P support between both GPUs has not been
|
||||
// enabled.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// One of the following occured:
|
||||
// - preferredLocationUuid is the UUID of a CPU and preferredCpuNumaNode
|
||||
// refers to a registered GPU.
|
||||
// - preferredCpuNumaNode is invalid and preferredLocationUuid is the
|
||||
// UUID of the CPU.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The UVM file descriptor is associated with another process and the
|
||||
// input virtual range corresponds to system-allocated pageable memory.
|
||||
//
|
||||
// NV_ERR_GENERIC:
|
||||
// Unexpected error. We try hard to avoid returning this error code,
|
||||
// because it is not very informative.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(7)
|
||||
NV_STATUS UvmSetPreferredLocation(void *base,
|
||||
NvLength length,
|
||||
const NvProcessorUuid *preferredLocationUuid);
|
||||
#else
|
||||
NV_STATUS UvmSetPreferredLocation(void *base,
|
||||
NvLength length,
|
||||
const NvProcessorUuid *preferredLocationUuid,
|
||||
NvS32 preferredCpuNumaNode);
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmUnsetPreferredLocation
|
||||
@@ -2326,10 +2372,9 @@ NV_STATUS UvmSetPreferredLocation(void *base,
|
||||
//
|
||||
// The virtual address range specified by (base, length) must have been
|
||||
// allocated via a call to either UvmAlloc or UvmMemMap, or be supported
|
||||
// system-allocated pageable memory. If the input range is pageable memory and
|
||||
// at least one GPU in the system supports transparent access to pageable
|
||||
// memory, the behavior described below does not take effect and the preferred
|
||||
// location of the pages in the given range does not change.
|
||||
// system-allocated pageable memory. If the input range corresponds to a file
|
||||
// backed shared mapping and least one GPU in the system supports transparent
|
||||
// access to pageable memory, the behavior below is not guaranteed.
|
||||
//
|
||||
// If the VA range is associated with a non-migratable range group, then that
|
||||
// association is cleared. i.e. the pages in this VA range have their range
|
||||
@@ -2348,10 +2393,18 @@ NV_STATUS UvmSetPreferredLocation(void *base,
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
// base and length are not properly aligned or the range does not
|
||||
// represent a valid UVM allocation, or the range is pageable memory and
|
||||
// the system does not support accessing pageable memory, or the range
|
||||
// does not represent a supported Operating System allocation.
|
||||
// One of the following occured:
|
||||
// - base and length are not properly aligned or the range does not
|
||||
// represent a valid UVM allocation.
|
||||
// - The range is pageable memory and the system does not support
|
||||
// accessing pageable memory.
|
||||
// - The range does not represent a supported Operating System
|
||||
// allocation.
|
||||
// - The range contains both managed and pageable memory allocations.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The UVM file descriptor is associated with another process and the
|
||||
// input virtual range corresponds to system-allocated pageable memory.
|
||||
//
|
||||
// NV_ERR_GENERIC:
|
||||
// Unexpected error. We try hard to avoid returning this error code,
|
||||
@@ -2632,13 +2685,34 @@ NV_STATUS UvmDisableSystemWideAtomics(const NvProcessorUuid *gpuUuid);
|
||||
// NV_ERR_INVALID_STATE:
|
||||
// UVM was not initialized before calling this function.
|
||||
//
|
||||
// NV_ERR_GENERIC:
|
||||
// Unexpected error. We try hard to avoid returning this error code,
|
||||
// because it is not very informative.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmGetFileDescriptor(UvmFileDescriptor *returnedFd);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmGetMmFileDescriptor
|
||||
//
|
||||
// Returns the UVM file descriptor currently being used to keep the
|
||||
// memory management context valid. The data type of the returned file
|
||||
// descriptor is platform specific.
|
||||
//
|
||||
// If UvmInitialize has not yet been called, an error is returned.
|
||||
//
|
||||
// Arguments:
|
||||
// returnedFd: (OUTPUT)
|
||||
// A platform specific file descriptor.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// returnedFd is NULL.
|
||||
//
|
||||
// NV_ERR_INVALID_STATE:
|
||||
// UVM was not initialized before calling this function.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// This file descriptor is not required on this platform.
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmGetMmFileDescriptor(UvmFileDescriptor *returnedFd);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmIs8Supported
|
||||
//
|
||||
@@ -3761,7 +3835,6 @@ NV_STATUS UvmToolsDisableCounters(UvmToolsCountersHandle counters,
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// Read spans more than a single target process allocation.
|
||||
//
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmToolsReadProcessMemory(UvmToolsSessionHandle session,
|
||||
void *buffer,
|
||||
|
||||
@@ -49,11 +49,13 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
// A single top level PDE on Ada covers 128 TB and that's the minimum size
|
||||
// that can be used.
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = 384ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
parent_gpu->ce_phys_vidmem_write_supported = true;
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
|
||||
// Not all units on Ada support 49-bit addressing, including those which
|
||||
|
||||
@@ -47,14 +47,16 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
// A single top level PDE on Ampere covers 128 TB and that's the minimum
|
||||
// size that can be used.
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = 384ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
// See uvm_mmu.h for mapping placement
|
||||
parent_gpu->flat_vidmem_va_base = 136ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->flat_sysmem_va_base = 256ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->flat_vidmem_va_base = 136 * UVM_SIZE_1TB;
|
||||
parent_gpu->flat_sysmem_va_base = 256 * UVM_SIZE_1TB;
|
||||
|
||||
parent_gpu->ce_phys_vidmem_write_supported = true;
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
|
||||
|
||||
@@ -121,6 +121,8 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
|
||||
return true;
|
||||
|
||||
if (uvm_channel_is_proxy(push->channel)) {
|
||||
uvm_pushbuffer_t *pushbuffer;
|
||||
|
||||
if (dst.is_virtual) {
|
||||
UVM_ERR_PRINT("Destination address of memcopy must be physical, not virtual\n");
|
||||
return false;
|
||||
@@ -142,7 +144,8 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
|
||||
return false;
|
||||
}
|
||||
|
||||
push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
|
||||
pushbuffer = uvm_channel_get_pushbuffer(push->channel);
|
||||
push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
|
||||
|
||||
if ((src.address < push_begin_gpu_va) || (src.address >= push_begin_gpu_va + uvm_push_get_size(push))) {
|
||||
UVM_ERR_PRINT("Source address of memcopy must point to pushbuffer\n");
|
||||
@@ -177,13 +180,19 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
|
||||
// irrespective of the virtualization mode.
|
||||
void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src)
|
||||
{
|
||||
uvm_pushbuffer_t *pushbuffer;
|
||||
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return;
|
||||
|
||||
src->address -= uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
|
||||
pushbuffer = uvm_channel_get_pushbuffer(push->channel);
|
||||
src->address -= uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
|
||||
}
|
||||
|
||||
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
size_t num_elements,
|
||||
size_t element_size)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
|
||||
@@ -201,21 +201,20 @@ static bool uvm_api_range_invalid_64k(NvU64 base, NvU64 length)
|
||||
return uvm_api_range_invalid_aligned(base, length, UVM_PAGE_SIZE_64K);
|
||||
}
|
||||
|
||||
// Returns true if the interval [start, start + length -1] is entirely covered
|
||||
// by vmas.
|
||||
//
|
||||
// LOCKING: mm->mmap_lock must be held in at least read mode.
|
||||
bool uvm_is_valid_vma_range(struct mm_struct *mm, NvU64 start, NvU64 length);
|
||||
typedef enum
|
||||
{
|
||||
UVM_API_RANGE_TYPE_MANAGED,
|
||||
UVM_API_RANGE_TYPE_HMM,
|
||||
UVM_API_RANGE_TYPE_ATS,
|
||||
UVM_API_RANGE_TYPE_INVALID
|
||||
} uvm_api_range_type_t;
|
||||
|
||||
// Check that the interval [base, base + length) is fully covered by UVM
|
||||
// managed ranges (NV_OK is returned), or (if ATS is enabled and mm != NULL)
|
||||
// fully covered by valid vmas (NV_WARN_NOTHING_TO_DO is returned), or (if HMM
|
||||
// is enabled and mm != NULL) fully covered by valid vmas (NV_OK is returned).
|
||||
// Any other input results in a return status of NV_ERR_INVALID_ADDRESS.
|
||||
// If the interval [base, base + length) is fully covered by VMAs which all have
|
||||
// the same uvm_api_range_type_t, that range type is returned.
|
||||
//
|
||||
// LOCKING: va_space->lock must be held in at least read mode. If mm != NULL,
|
||||
// mm->mmap_lock must also be held in at least read mode.
|
||||
NV_STATUS uvm_api_range_type_check(uvm_va_space_t *va_space, struct mm_struct *mm, NvU64 base, NvU64 length);
|
||||
uvm_api_range_type_t uvm_api_range_type_check(uvm_va_space_t *va_space, struct mm_struct *mm, NvU64 base, NvU64 length);
|
||||
|
||||
NV_STATUS uvm_api_pageable_mem_access_on_gpu(UVM_PAGEABLE_MEM_ACCESS_ON_GPU_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_register_gpu(UVM_REGISTER_GPU_PARAMS *params, struct file *filp);
|
||||
|
||||
@@ -44,6 +44,8 @@ void uvm_ats_init(const UvmPlatformInfo *platform_info)
|
||||
|
||||
void uvm_ats_init_va_space(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_init_rwsem(&va_space->ats.lock, UVM_LOCK_ORDER_LEAF);
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_init_va_space(va_space);
|
||||
}
|
||||
@@ -57,6 +59,10 @@ NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
return uvm_ats_ibm_add_gpu(parent_gpu);
|
||||
}
|
||||
else if (UVM_ATS_SVA_SUPPORTED()) {
|
||||
if (g_uvm_global.ats.enabled)
|
||||
return uvm_ats_sva_add_gpu(parent_gpu);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
@@ -71,6 +77,10 @@ void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
uvm_ats_ibm_remove_gpu(parent_gpu);
|
||||
}
|
||||
else if (UVM_ATS_SVA_SUPPORTED()) {
|
||||
if (g_uvm_global.ats.enabled)
|
||||
uvm_ats_sva_remove_gpu(parent_gpu);
|
||||
}
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
@@ -87,6 +97,8 @@ NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
status = uvm_ats_ibm_bind_gpu(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
status = uvm_ats_sva_bind_gpu(gpu_va_space);
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -100,6 +112,8 @@ void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_unbind_gpu(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
uvm_ats_sva_unbind_gpu(gpu_va_space);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
@@ -126,6 +140,8 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
status = uvm_ats_ibm_register_gpu_va_space(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
status = uvm_ats_sva_register_gpu_va_space(gpu_va_space);
|
||||
|
||||
if (status == NV_OK)
|
||||
uvm_processor_mask_set(&va_space->ats.registered_gpu_va_spaces, gpu_id);
|
||||
@@ -148,6 +164,8 @@ void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_unregister_gpu_va_space(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
uvm_ats_sva_unregister_gpu_va_space(gpu_va_space);
|
||||
|
||||
uvm_va_space_down_write(va_space);
|
||||
uvm_processor_mask_clear(&va_space->ats.registered_gpu_va_spaces, gpu_id);
|
||||
|
||||
@@ -28,8 +28,21 @@
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "nv_uvm_types.h"
|
||||
#include "uvm_lock.h"
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED())
|
||||
#include "uvm_ats_sva.h"
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
|
||||
|
||||
// ATS prefetcher uses hmm_range_fault() to query residency information.
|
||||
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
|
||||
// of memory regions while hmm_range_fault() is being called, MMU interval
|
||||
// notifiers are needed.
|
||||
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
|
||||
#define UVM_ATS_PREFETCH_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_ATS_PREFETCH_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -37,10 +50,15 @@ typedef struct
|
||||
// indexed by gpu->id. This mask is protected by the VA space lock.
|
||||
uvm_processor_mask_t registered_gpu_va_spaces;
|
||||
|
||||
// Protects racing invalidates in the VA space while hmm_range_fault() is
|
||||
// being called in ats_compute_residency_mask().
|
||||
uvm_rw_semaphore_t lock;
|
||||
|
||||
union
|
||||
{
|
||||
uvm_ibm_va_space_t ibm;
|
||||
|
||||
uvm_sva_va_space_t sva;
|
||||
};
|
||||
} uvm_ats_va_space_t;
|
||||
|
||||
@@ -58,6 +76,7 @@ typedef struct
|
||||
{
|
||||
uvm_ibm_gpu_va_space_t ibm;
|
||||
|
||||
uvm_sva_gpu_va_space_t sva;
|
||||
};
|
||||
} uvm_ats_gpu_va_space_t;
|
||||
|
||||
@@ -90,6 +109,8 @@ void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
// LOCKING: mmap_lock must be lockable.
|
||||
// VA space lock must be lockable.
|
||||
// gpu_va_space->gpu must be retained.
|
||||
// mm must be retained with uvm_va_space_mm_retain() iff
|
||||
// UVM_ATS_SVA_SUPPORTED() is 1
|
||||
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// Decrements the refcount on the {gpu, mm} pair. Removes the binding from the
|
||||
|
||||
@@ -20,74 +20,33 @@
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_api.h"
|
||||
#include "uvm_tools.h"
|
||||
#include "uvm_va_range.h"
|
||||
#include "uvm_ats.h"
|
||||
#include "uvm_ats_faults.h"
|
||||
#include "uvm_migrate_pageable.h"
|
||||
#include <linux/nodemask.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
|
||||
// TODO: Bug 2103669: Implement a real prefetching policy and remove or adapt
|
||||
// these experimental parameters. These are intended to help guide that policy.
|
||||
static unsigned int uvm_exp_perf_prefetch_ats_order_replayable = 0;
|
||||
module_param(uvm_exp_perf_prefetch_ats_order_replayable, uint, 0644);
|
||||
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_replayable,
|
||||
"Max order of pages (2^N) to prefetch on replayable ATS faults");
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
#include <linux/hmm.h>
|
||||
#endif
|
||||
|
||||
static unsigned int uvm_exp_perf_prefetch_ats_order_non_replayable = 0;
|
||||
module_param(uvm_exp_perf_prefetch_ats_order_non_replayable, uint, 0644);
|
||||
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_non_replayable,
|
||||
"Max order of pages (2^N) to prefetch on non-replayable ATS faults");
|
||||
|
||||
// Expand the fault region to the naturally-aligned region with order given by
|
||||
// the module parameters, clamped to the vma containing fault_addr (if any).
|
||||
// Note that this means the region contains fault_addr but may not begin at
|
||||
// fault_addr.
|
||||
static void expand_fault_region(struct mm_struct *mm,
|
||||
NvU64 fault_addr,
|
||||
uvm_fault_client_type_t client_type,
|
||||
unsigned long *start,
|
||||
unsigned long *size)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned int order;
|
||||
unsigned long outer, aligned_start, aligned_size;
|
||||
|
||||
*start = fault_addr;
|
||||
*size = PAGE_SIZE;
|
||||
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_HUB)
|
||||
order = uvm_exp_perf_prefetch_ats_order_non_replayable;
|
||||
else
|
||||
order = uvm_exp_perf_prefetch_ats_order_replayable;
|
||||
|
||||
if (order == 0)
|
||||
return;
|
||||
|
||||
vma = find_vma_intersection(mm, fault_addr, fault_addr + 1);
|
||||
if (!vma)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(order < BITS_PER_LONG - PAGE_SHIFT);
|
||||
|
||||
aligned_size = (1UL << order) * PAGE_SIZE;
|
||||
|
||||
aligned_start = fault_addr & ~(aligned_size - 1);
|
||||
|
||||
*start = max(vma->vm_start, aligned_start);
|
||||
outer = min(vma->vm_end, aligned_start + aligned_size);
|
||||
*size = outer - *start;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU64 fault_addr,
|
||||
uvm_fault_access_type_t access_type,
|
||||
uvm_fault_client_type_t client_type)
|
||||
static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 start,
|
||||
size_t length,
|
||||
uvm_fault_access_type_t access_type,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
|
||||
NV_STATUS status;
|
||||
NvU64 start;
|
||||
NvU64 length;
|
||||
NvU64 user_space_start;
|
||||
NvU64 user_space_length;
|
||||
|
||||
// Request uvm_migrate_pageable() to touch the corresponding page after
|
||||
// population.
|
||||
@@ -96,17 +55,18 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
// 2) guest physical -> host physical
|
||||
//
|
||||
// The overall ATS translation will fault if either of those translations is
|
||||
// invalid. The get_user_pages() call above handles translation #1, but not
|
||||
// #2. We don't know if we're running as a guest, but in case we are we can
|
||||
// force that translation to be valid by touching the guest physical address
|
||||
// from the CPU. If the translation is not valid then the access will cause
|
||||
// a hypervisor fault. Note that dma_map_page() can't establish mappings
|
||||
// used by GPU ATS SVA translations. GPU accesses to host physical addresses
|
||||
// obtained as a result of the address translation request uses the CPU
|
||||
// address space instead of the IOMMU address space since the translated
|
||||
// host physical address isn't necessarily an IOMMU address. The only way to
|
||||
// establish guest physical to host physical mapping in the CPU address
|
||||
// space is to touch the page from the CPU.
|
||||
// invalid. The pin_user_pages() call within uvm_migrate_pageable() call
|
||||
// below handles translation #1, but not #2. We don't know if we're running
|
||||
// as a guest, but in case we are we can force that translation to be valid
|
||||
// by touching the guest physical address from the CPU. If the translation
|
||||
// is not valid then the access will cause a hypervisor fault. Note that
|
||||
// dma_map_page() can't establish mappings used by GPU ATS SVA translations.
|
||||
// GPU accesses to host physical addresses obtained as a result of the
|
||||
// address translation request uses the CPU address space instead of the
|
||||
// IOMMU address space since the translated host physical address isn't
|
||||
// necessarily an IOMMU address. The only way to establish guest physical to
|
||||
// host physical mapping in the CPU address space is to touch the page from
|
||||
// the CPU.
|
||||
//
|
||||
// We assume that the hypervisor mappings are all VM_PFNMAP, VM_SHARED, and
|
||||
// VM_WRITE, meaning that the mappings are all granted write access on any
|
||||
@@ -117,23 +77,22 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
uvm_migrate_args_t uvm_migrate_args =
|
||||
{
|
||||
.va_space = va_space,
|
||||
.mm = mm,
|
||||
.dst_id = gpu_va_space->gpu->parent->id,
|
||||
.dst_node_id = -1,
|
||||
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
|
||||
.touch = true,
|
||||
.skip_mapped = true,
|
||||
.user_space_start = &start,
|
||||
.user_space_length = &length,
|
||||
.va_space = va_space,
|
||||
.mm = mm,
|
||||
.dst_id = ats_context->residency_id,
|
||||
.dst_node_id = ats_context->residency_node,
|
||||
.start = start,
|
||||
.length = length,
|
||||
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
|
||||
.touch = true,
|
||||
.skip_mapped = true,
|
||||
.populate_on_cpu_alloc_failures = true,
|
||||
.user_space_start = &user_space_start,
|
||||
.user_space_length = &user_space_length,
|
||||
};
|
||||
|
||||
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
|
||||
|
||||
expand_fault_region(mm, fault_addr, client_type, &uvm_migrate_args.start, &uvm_migrate_args.length);
|
||||
|
||||
// TODO: Bug 2103669: Service more than a single fault at a time
|
||||
//
|
||||
// We are trying to use migrate_vma API in the kernel (if it exists) to
|
||||
// populate and map the faulting region on the GPU. We want to do this only
|
||||
// on the first touch. That is, pages which are not already mapped. So, we
|
||||
@@ -148,114 +107,448 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_buffer_entry_t *current_entry,
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate)
|
||||
static void flush_tlb_write_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU64 addr,
|
||||
size_t size,
|
||||
uvm_fault_client_type_t client_type)
|
||||
{
|
||||
NvU64 gmmu_region_base;
|
||||
bool in_gmmu_region;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_fault_access_type_t service_access_type;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate;
|
||||
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
else
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
|
||||
|
||||
if (!ats_invalidate->write_faults_in_batch) {
|
||||
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->write_faults_tlb_batch);
|
||||
ats_invalidate->write_faults_in_batch = true;
|
||||
}
|
||||
|
||||
uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
|
||||
}
|
||||
|
||||
static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
int residency = uvm_gpu_numa_node(gpu);
|
||||
|
||||
#if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
|
||||
struct mempolicy *vma_policy = vma_policy(vma);
|
||||
unsigned short mode;
|
||||
|
||||
ats_context->prefetch_state.has_preferred_location = false;
|
||||
|
||||
// It's safe to read vma_policy since the mmap_lock is held in at least read
|
||||
// mode in this path.
|
||||
uvm_assert_mmap_lock_locked(vma->vm_mm);
|
||||
|
||||
if (!vma_policy)
|
||||
goto done;
|
||||
|
||||
mode = vma_policy->mode;
|
||||
|
||||
if ((mode == MPOL_BIND) || (mode == MPOL_PREFERRED_MANY) || (mode == MPOL_PREFERRED)) {
|
||||
int home_node = NUMA_NO_NODE;
|
||||
|
||||
#if defined(NV_MEMPOLICY_HAS_HOME_NODE)
|
||||
if ((mode != MPOL_PREFERRED) && (vma_policy->home_node != NUMA_NO_NODE))
|
||||
home_node = vma_policy->home_node;
|
||||
#endif
|
||||
|
||||
// Prefer home_node if set. Otherwise, prefer the faulting GPU if it's
|
||||
// in the list of preferred nodes, else prefer the closest_cpu_numa_node
|
||||
// to the GPU if closest_cpu_numa_node is in the list of preferred
|
||||
// nodes. Fallback to the faulting GPU if all else fails.
|
||||
if (home_node != NUMA_NO_NODE) {
|
||||
residency = home_node;
|
||||
}
|
||||
else if (!node_isset(residency, vma_policy->nodes)) {
|
||||
int closest_cpu_numa_node = gpu->parent->closest_cpu_numa_node;
|
||||
|
||||
if ((closest_cpu_numa_node != NUMA_NO_NODE) && node_isset(closest_cpu_numa_node, vma_policy->nodes))
|
||||
residency = gpu->parent->closest_cpu_numa_node;
|
||||
else
|
||||
residency = first_node(vma_policy->nodes);
|
||||
}
|
||||
|
||||
if (!nodes_empty(vma_policy->nodes))
|
||||
ats_context->prefetch_state.has_preferred_location = true;
|
||||
}
|
||||
|
||||
// Update gpu if residency is not the faulting gpu.
|
||||
if (residency != uvm_gpu_numa_node(gpu))
|
||||
gpu = uvm_va_space_find_gpu_with_memory_node_id(gpu_va_space->va_space, residency);
|
||||
|
||||
done:
|
||||
#else
|
||||
ats_context->prefetch_state.has_preferred_location = false;
|
||||
#endif
|
||||
|
||||
ats_context->residency_id = gpu ? gpu->parent->id : UVM_ID_CPU;
|
||||
ats_context->residency_node = residency;
|
||||
}
|
||||
|
||||
static void get_range_in_vma(struct vm_area_struct *vma, NvU64 base, NvU64 *start, NvU64 *end)
|
||||
{
|
||||
*start = max(vma->vm_start, (unsigned long) base);
|
||||
*end = min(vma->vm_end, (unsigned long) (base + UVM_VA_BLOCK_SIZE));
|
||||
}
|
||||
|
||||
static uvm_page_index_t uvm_ats_cpu_page_index(NvU64 base, NvU64 addr)
|
||||
{
|
||||
UVM_ASSERT(addr >= base);
|
||||
UVM_ASSERT(addr <= (base + UVM_VA_BLOCK_SIZE));
|
||||
|
||||
return (addr - base) / PAGE_SIZE;
|
||||
}
|
||||
|
||||
// start and end must be aligned to PAGE_SIZE and must fall within
|
||||
// [base, base + UVM_VA_BLOCK_SIZE]
|
||||
static uvm_va_block_region_t uvm_ats_region_from_start_end(NvU64 start, NvU64 end)
|
||||
{
|
||||
// base can be greater than, less than or equal to the start of a VMA.
|
||||
NvU64 base = UVM_VA_BLOCK_ALIGN_DOWN(start);
|
||||
|
||||
UVM_ASSERT(start < end);
|
||||
UVM_ASSERT(PAGE_ALIGNED(start));
|
||||
UVM_ASSERT(PAGE_ALIGNED(end));
|
||||
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
|
||||
|
||||
return uvm_va_block_region(uvm_ats_cpu_page_index(base, start), uvm_ats_cpu_page_index(base, end));
|
||||
}
|
||||
|
||||
static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma, NvU64 base)
|
||||
{
|
||||
NvU64 start;
|
||||
NvU64 end;
|
||||
|
||||
get_range_in_vma(vma, base, &start, &end);
|
||||
|
||||
return uvm_ats_region_from_start_end(start, end);
|
||||
}
|
||||
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
|
||||
static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
|
||||
{
|
||||
uvm_ats_fault_context_t *ats_context = container_of(mni, uvm_ats_fault_context_t, prefetch_state.notifier);
|
||||
uvm_va_space_t *va_space = ats_context->prefetch_state.va_space;
|
||||
|
||||
// The following write lock protects against concurrent invalidates while
|
||||
// hmm_range_fault() is being called in ats_compute_residency_mask().
|
||||
uvm_down_write(&va_space->ats.lock);
|
||||
|
||||
mmu_interval_set_seq(mni, cur_seq);
|
||||
|
||||
uvm_up_write(&va_space->ats.lock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool uvm_ats_invalidate_notifier_entry(struct mmu_interval_notifier *mni,
|
||||
const struct mmu_notifier_range *range,
|
||||
unsigned long cur_seq)
|
||||
{
|
||||
UVM_ENTRY_RET(uvm_ats_invalidate_notifier(mni, cur_seq));
|
||||
}
|
||||
|
||||
static const struct mmu_interval_notifier_ops uvm_ats_notifier_ops =
|
||||
{
|
||||
.invalidate = uvm_ats_invalidate_notifier_entry,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
int ret;
|
||||
NvU64 start;
|
||||
NvU64 end;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
struct hmm_range range;
|
||||
uvm_page_index_t page_index;
|
||||
uvm_va_block_region_t vma_region;
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
|
||||
uvm_assert_rwsem_locked_read(&va_space->lock);
|
||||
|
||||
ats_context->prefetch_state.first_touch = true;
|
||||
|
||||
uvm_page_mask_zero(residency_mask);
|
||||
|
||||
get_range_in_vma(vma, base, &start, &end);
|
||||
|
||||
vma_region = uvm_ats_region_from_start_end(start, end);
|
||||
|
||||
range.notifier = &ats_context->prefetch_state.notifier;
|
||||
range.start = start;
|
||||
range.end = end;
|
||||
range.hmm_pfns = ats_context->prefetch_state.pfns;
|
||||
range.default_flags = 0;
|
||||
range.pfn_flags_mask = 0;
|
||||
range.dev_private_owner = NULL;
|
||||
|
||||
ats_context->prefetch_state.va_space = va_space;
|
||||
|
||||
// mmu_interval_notifier_insert() will try to acquire mmap_lock for write
|
||||
// and will deadlock since mmap_lock is already held for read in this path.
|
||||
// This is prevented by calling __mmu_notifier_register() during va_space
|
||||
// creation. See the comment in uvm_mmu_notifier_register() for more
|
||||
// details.
|
||||
ret = mmu_interval_notifier_insert(range.notifier, mm, start, end, &uvm_ats_notifier_ops);
|
||||
if (ret)
|
||||
return errno_to_nv_status(ret);
|
||||
|
||||
while (true) {
|
||||
range.notifier_seq = mmu_interval_read_begin(range.notifier);
|
||||
ret = hmm_range_fault(&range);
|
||||
if (ret == -EBUSY)
|
||||
continue;
|
||||
if (ret) {
|
||||
status = errno_to_nv_status(ret);
|
||||
UVM_ASSERT(status != NV_OK);
|
||||
break;
|
||||
}
|
||||
|
||||
uvm_down_read(&va_space->ats.lock);
|
||||
|
||||
// Pages may have been freed or re-allocated after hmm_range_fault() is
|
||||
// called. So the PTE might point to a different page or nothing. In the
|
||||
// memory hot-unplug case it is not safe to call page_to_nid() on the
|
||||
// page as the struct page itself may have been freed. To protect
|
||||
// against these cases, uvm_ats_invalidate_entry() blocks on va_space
|
||||
// ATS write lock for concurrent invalidates since va_space ATS lock is
|
||||
// held for read in this path.
|
||||
if (!mmu_interval_read_retry(range.notifier, range.notifier_seq))
|
||||
break;
|
||||
|
||||
uvm_up_read(&va_space->ats.lock);
|
||||
}
|
||||
|
||||
if (status == NV_OK) {
|
||||
for_each_va_block_page_in_region(page_index, vma_region) {
|
||||
unsigned long pfn = ats_context->prefetch_state.pfns[page_index - vma_region.first];
|
||||
|
||||
if (pfn & HMM_PFN_VALID) {
|
||||
struct page *page = hmm_pfn_to_page(pfn);
|
||||
|
||||
if (page_to_nid(page) == ats_context->residency_node)
|
||||
uvm_page_mask_set(residency_mask, page_index);
|
||||
|
||||
ats_context->prefetch_state.first_touch = false;
|
||||
}
|
||||
}
|
||||
|
||||
uvm_up_read(&va_space->ats.lock);
|
||||
}
|
||||
|
||||
mmu_interval_notifier_remove(range.notifier);
|
||||
|
||||
#endif
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void ats_expand_fault_region(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
uvm_ats_fault_context_t *ats_context,
|
||||
uvm_va_block_region_t max_prefetch_region,
|
||||
uvm_page_mask_t *faulted_mask)
|
||||
{
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;
|
||||
|
||||
if (uvm_page_mask_empty(faulted_mask))
|
||||
return;
|
||||
|
||||
uvm_perf_prefetch_compute_ats(gpu_va_space->va_space,
|
||||
faulted_mask,
|
||||
uvm_va_block_region_from_mask(NULL, faulted_mask),
|
||||
max_prefetch_region,
|
||||
residency_mask,
|
||||
bitmap_tree,
|
||||
prefetch_mask);
|
||||
|
||||
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
|
||||
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
|
||||
}
|
||||
|
||||
static NV_STATUS ats_fault_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
|
||||
|
||||
if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
|
||||
return status;
|
||||
|
||||
if (uvm_page_mask_empty(faulted_mask))
|
||||
return status;
|
||||
|
||||
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// Prefetch the entire region if none of the pages are resident on any node
|
||||
// and if preferred_location is the faulting GPU.
|
||||
if (ats_context->prefetch_state.has_preferred_location &&
|
||||
ats_context->prefetch_state.first_touch &&
|
||||
uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->parent->id)) {
|
||||
|
||||
uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
|
||||
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
|
||||
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
ats_expand_fault_region(gpu_va_space, vma, ats_context, max_prefetch_region, faulted_mask);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_block_region_t subregion;
|
||||
uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
|
||||
uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
|
||||
uvm_fault_client_type_t client_type = ats_context->client_type;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
|
||||
UVM_ASSERT(g_uvm_global.ats.enabled);
|
||||
UVM_ASSERT(gpu_va_space);
|
||||
UVM_ASSERT(gpu_va_space->ats.enabled);
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
|
||||
|
||||
UVM_ASSERT(current_entry->fault_access_type ==
|
||||
uvm_fault_access_type_mask_highest(current_entry->access_type_mask));
|
||||
uvm_page_mask_zero(faults_serviced_mask);
|
||||
uvm_page_mask_zero(reads_serviced_mask);
|
||||
|
||||
service_access_type = current_entry->fault_access_type;
|
||||
if (!(vma->vm_flags & VM_READ))
|
||||
return status;
|
||||
|
||||
// ATS lookups are disabled on all addresses within the same
|
||||
// UVM_GMMU_ATS_GRANULARITY as existing GMMU mappings (see documentation in
|
||||
// uvm_mmu.h). User mode is supposed to reserve VAs as appropriate to
|
||||
// prevent any system memory allocations from falling within the NO_ATS
|
||||
// range of other GMMU mappings, so this shouldn't happen during normal
|
||||
// operation. However, since this scenario may lead to infinite fault loops,
|
||||
// we handle it by canceling the fault.
|
||||
//
|
||||
// TODO: Bug 2103669: Remove redundant VA range lookups
|
||||
gmmu_region_base = UVM_ALIGN_DOWN(current_entry->fault_address, UVM_GMMU_ATS_GRANULARITY);
|
||||
in_gmmu_region = !uvm_va_space_range_empty(current_entry->va_space,
|
||||
gmmu_region_base,
|
||||
gmmu_region_base + UVM_GMMU_ATS_GRANULARITY - 1);
|
||||
if (in_gmmu_region) {
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
else {
|
||||
// TODO: Bug 2103669: Service more than a single fault at a time
|
||||
status = uvm_ats_service_fault(gpu_va_space,
|
||||
current_entry->fault_address,
|
||||
service_access_type,
|
||||
current_entry->fault_source.client_type);
|
||||
if (!(vma->vm_flags & VM_WRITE)) {
|
||||
// If VMA doesn't have write permissions, all write faults are fatal.
|
||||
// Try servicing such faults for read iff they are also present in
|
||||
// read_fault_mask. This is because for replayable faults, if there are
|
||||
// pending read accesses on the same page, we have to service them
|
||||
// before we can cancel the write/atomic faults. So we try with read
|
||||
// fault access type even though these write faults are fatal.
|
||||
if (ats_context->client_type == UVM_FAULT_CLIENT_TYPE_GPC)
|
||||
uvm_page_mask_and(write_fault_mask, write_fault_mask, read_fault_mask);
|
||||
else
|
||||
uvm_page_mask_zero(write_fault_mask);
|
||||
}
|
||||
|
||||
// Do not flag prefetch faults as fatal unless something fatal happened
|
||||
if (status == NV_ERR_INVALID_ADDRESS) {
|
||||
if (current_entry->fault_access_type != UVM_FAULT_ACCESS_TYPE_PREFETCH) {
|
||||
current_entry->is_fatal = true;
|
||||
current_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||||
ats_batch_select_residency(gpu_va_space, vma, ats_context);
|
||||
|
||||
// Compute cancel mode for replayable faults
|
||||
if (current_entry->is_replayable) {
|
||||
if (service_access_type == UVM_FAULT_ACCESS_TYPE_READ || in_gmmu_region)
|
||||
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
else
|
||||
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_WRITE_AND_ATOMIC;
|
||||
ats_fault_prefetch(gpu_va_space, vma, base, ats_context);
|
||||
|
||||
// If there are pending read accesses on the same page, we have to
|
||||
// service them before we can cancel the write/atomic faults. So we
|
||||
// retry with read fault access type.
|
||||
if (!in_gmmu_region &&
|
||||
current_entry->fault_access_type > UVM_FAULT_ACCESS_TYPE_READ &&
|
||||
uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
|
||||
status = uvm_ats_service_fault(gpu_va_space,
|
||||
current_entry->fault_address,
|
||||
UVM_FAULT_ACCESS_TYPE_READ,
|
||||
current_entry->fault_source.client_type);
|
||||
for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
|
||||
uvm_fault_access_type_t access_type = (vma->vm_flags & VM_WRITE) ?
|
||||
UVM_FAULT_ACCESS_TYPE_WRITE :
|
||||
UVM_FAULT_ACCESS_TYPE_READ;
|
||||
|
||||
// If read accesses are also invalid, cancel the fault. If a
|
||||
// different error code is returned, exit
|
||||
if (status == NV_ERR_INVALID_ADDRESS)
|
||||
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
else if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
}
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
if (vma->vm_flags & VM_WRITE) {
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
|
||||
|
||||
// The Linux kernel never invalidates TLB entries on mapping
|
||||
// permission upgrade. This is a problem if the GPU has cached
|
||||
// entries with the old permission. The GPU will re-fetch the entry
|
||||
// if the PTE is invalid and page size is not 4K (this is the case
|
||||
// on P9). However, if a page gets upgraded from R/O to R/W and GPU
|
||||
// has the PTEs cached with R/O permissions we will enter an
|
||||
// infinite loop because we just forward the fault to the Linux
|
||||
// kernel and it will see that the permissions in the page table are
|
||||
// correct. Therefore, we flush TLB entries on ATS write faults.
|
||||
flush_tlb_write_faults(gpu_va_space, start, length, client_type);
|
||||
}
|
||||
else {
|
||||
current_entry->is_invalid_prefetch = true;
|
||||
uvm_page_mask_region_fill(reads_serviced_mask, subregion);
|
||||
}
|
||||
|
||||
// Do not fail overall fault servicing due to logical errors
|
||||
status = NV_OK;
|
||||
}
|
||||
|
||||
// The Linux kernel never invalidates TLB entries on mapping permission
|
||||
// upgrade. This is a problem if the GPU has cached entries with the old
|
||||
// permission. The GPU will re-fetch the entry if the PTE is invalid and
|
||||
// page size is not 4K (this is the case on P9). However, if a page gets
|
||||
// upgraded from R/O to R/W and GPU has the PTEs cached with R/O
|
||||
// permissions we will enter an infinite loop because we just forward the
|
||||
// fault to the Linux kernel and it will see that the permissions in the
|
||||
// page table are correct. Therefore, we flush TLB entries on ATS write
|
||||
// faults.
|
||||
if (!current_entry->is_fatal && current_entry->fault_access_type > UVM_FAULT_ACCESS_TYPE_READ) {
|
||||
if (!ats_invalidate->write_faults_in_batch) {
|
||||
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->write_faults_tlb_batch);
|
||||
ats_invalidate->write_faults_in_batch = true;
|
||||
}
|
||||
// Remove write faults from read_fault_mask
|
||||
uvm_page_mask_andnot(read_fault_mask, read_fault_mask, write_fault_mask);
|
||||
|
||||
uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch,
|
||||
current_entry->fault_address,
|
||||
PAGE_SIZE,
|
||||
PAGE_SIZE,
|
||||
UVM_MEMBAR_NONE);
|
||||
for_each_va_block_subregion_in_mask(subregion, read_fault_mask, region) {
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
|
||||
uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_READ;
|
||||
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next)
|
||||
{
|
||||
uvm_va_range_t *prev;
|
||||
NvU64 gmmu_region_base = UVM_ALIGN_DOWN(address, UVM_GMMU_ATS_GRANULARITY);
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
|
||||
if (next) {
|
||||
if (next->node.start <= gmmu_region_base + UVM_GMMU_ATS_GRANULARITY - 1)
|
||||
return true;
|
||||
|
||||
prev = uvm_va_range_container(uvm_range_tree_prev(&va_space->va_range_tree, &next->node));
|
||||
}
|
||||
else {
|
||||
// No VA range exists after address, so check the last VA range in the
|
||||
// tree.
|
||||
prev = uvm_va_range_container(uvm_range_tree_last(&va_space->va_range_tree));
|
||||
}
|
||||
|
||||
return prev && (prev->node.end >= gmmu_region_base);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate,
|
||||
uvm_tracker_t *out_tracker)
|
||||
@@ -287,3 +580,4 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@@ -25,10 +25,31 @@
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_gpu.h"
|
||||
|
||||
NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_buffer_entry_t *current_entry,
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate);
|
||||
// Service ATS faults in the range (base, base + UVM_VA_BLOCK_SIZE) with service
|
||||
// type for individual pages in the range requested by page masks set in
|
||||
// ats_context->read_fault_mask/write_fault_mask. base must be aligned to
|
||||
// UVM_VA_BLOCK_SIZE. The caller is responsible for ensuring that faulting
|
||||
// addresses fall completely within the VMA. The caller is also responsible for
|
||||
// ensuring that the faulting addresses don't overlap a GMMU region. (See
|
||||
// uvm_ats_check_in_gmmu_region). The caller is also responsible for handling
|
||||
// any errors returned by this function (fault cancellations etc.).
|
||||
//
|
||||
// Returns the fault service status in ats_context->faults_serviced_mask. In
|
||||
// addition, ats_context->reads_serviced_mask returns whether read servicing
|
||||
// worked on write faults iff the read service was also requested in the
|
||||
// corresponding bit in read_fault_mask. These returned masks are only valid if
|
||||
// the return status is NV_OK. Status other than NV_OK indicate system global
|
||||
// fault servicing failures.
|
||||
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context);
|
||||
|
||||
// Return whether there are any VA ranges (and thus GMMU mappings) within the
|
||||
// UVM_GMMU_ATS_GRANULARITY-aligned region containing address.
|
||||
bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next);
|
||||
|
||||
// This function performs pending TLB invalidations for ATS and clears the
|
||||
// ats_invalidate->write_faults_in_batch flag
|
||||
|
||||
156
kernel-open/nvidia-uvm/uvm_ats_sva.c
Normal file
156
kernel-open/nvidia-uvm/uvm_ats_sva.c
Normal file
@@ -0,0 +1,156 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_ats_sva.h"
|
||||
|
||||
#if UVM_ATS_SVA_SUPPORTED()
|
||||
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_va_space_mm.h"
|
||||
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/mm_types.h>
|
||||
|
||||
// linux/sched/mm.h is needed for mmget_not_zero and mmput to get the mm
|
||||
// reference required for the iommu_sva_bind_device() call. This header is not
|
||||
// present in all the supported versions. Instead of adding a conftest just for
|
||||
// this header file, use UVM_ATS_SVA_SUPPORTED().
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
// iommu_sva_bind_device() removed drvdata paramter with commit
|
||||
// 942fd5435dccb273f90176b046ae6bbba60cfbd8 (10/31/2022).
|
||||
#if defined(NV_IOMMU_SVA_BIND_DEVICE_HAS_DRVDATA_ARG)
|
||||
#define UVM_IOMMU_SVA_BIND_DEVICE(dev, mm) iommu_sva_bind_device(dev, mm, NULL)
|
||||
#else
|
||||
#define UVM_IOMMU_SVA_BIND_DEVICE(dev, mm) iommu_sva_bind_device(dev, mm)
|
||||
#endif
|
||||
|
||||
NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = iommu_dev_enable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
|
||||
return errno_to_nv_status(ret);
|
||||
}
|
||||
|
||||
void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
iommu_dev_disable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_sva_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
struct iommu_sva *iommu_handle;
|
||||
struct pci_dev *pci_dev = gpu_va_space->gpu->parent->pci_dev;
|
||||
uvm_sva_gpu_va_space_t *sva_gpu_va_space = &gpu_va_space->ats.sva;
|
||||
struct mm_struct *mm = gpu_va_space->va_space->va_space_mm.mm;
|
||||
|
||||
UVM_ASSERT(gpu_va_space->ats.enabled);
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_INIT);
|
||||
UVM_ASSERT(mm);
|
||||
|
||||
// The mmput() below may trigger the kernel's mm teardown with exit_mmap()
|
||||
// and uvm_va_space_mm_shutdown() and uvm_vm_close_managed() in that path
|
||||
// will try to grab the va_space lock and deadlock if va_space was already
|
||||
// locked.
|
||||
uvm_assert_unlocked_order(UVM_LOCK_ORDER_VA_SPACE);
|
||||
|
||||
// iommu_sva_bind_device() requires the mm reference to be acquired. Since
|
||||
// the mm is already retained, mm is still valid but may be inactive since
|
||||
// mm_users can still be zero since UVM doesn't use mm_users and maintains a
|
||||
// separate refcount (retained_count) for the mm in va_space_mm. See the
|
||||
// block comment in va_space_mm.c for more details. So, return an error if
|
||||
// mm_users is zero.
|
||||
if (!mmget_not_zero(mm))
|
||||
return NV_ERR_PAGE_TABLE_NOT_AVAIL;
|
||||
|
||||
// Multiple calls for the {same pci_dev, mm} pair are refcounted by the ARM
|
||||
// SMMU Layer.
|
||||
iommu_handle = UVM_IOMMU_SVA_BIND_DEVICE(&pci_dev->dev, mm);
|
||||
if (IS_ERR(iommu_handle)) {
|
||||
status = errno_to_nv_status(PTR_ERR(iommu_handle));
|
||||
goto out;
|
||||
}
|
||||
|
||||
// If this is not the first bind of the gpu in the mm, then the previously
|
||||
// stored iommu_handle in the gpu_va_space must match the handle returned by
|
||||
// iommu_sva_bind_device().
|
||||
if (sva_gpu_va_space->iommu_handle) {
|
||||
UVM_ASSERT(sva_gpu_va_space->iommu_handle == iommu_handle);
|
||||
nv_kref_get(&sva_gpu_va_space->kref);
|
||||
}
|
||||
else {
|
||||
sva_gpu_va_space->iommu_handle = iommu_handle;
|
||||
nv_kref_init(&sva_gpu_va_space->kref);
|
||||
}
|
||||
|
||||
out:
|
||||
mmput(mm);
|
||||
return status;
|
||||
}
|
||||
|
||||
static void uvm_sva_reset_iommu_handle(nv_kref_t *nv_kref)
|
||||
{
|
||||
uvm_sva_gpu_va_space_t *sva_gpu_va_space = container_of(nv_kref, uvm_sva_gpu_va_space_t, kref);
|
||||
sva_gpu_va_space->iommu_handle = NULL;
|
||||
}
|
||||
|
||||
void uvm_ats_sva_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
uvm_sva_gpu_va_space_t *sva_gpu_va_space = &gpu_va_space->ats.sva;
|
||||
|
||||
// ARM SMMU layer decrements the refcount for the {pci_dev, mm} pair.
|
||||
// The actual unbind happens only when the refcount reaches zero.
|
||||
if (sva_gpu_va_space->iommu_handle) {
|
||||
iommu_sva_unbind_device(sva_gpu_va_space->iommu_handle);
|
||||
nv_kref_put(&sva_gpu_va_space->kref, uvm_sva_reset_iommu_handle);
|
||||
}
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_sva_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
NvU32 pasid;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_sva_gpu_va_space_t *sva_gpu_va_space = &gpu_va_space->ats.sva;
|
||||
|
||||
// A successful iommu_sva_bind_device() should have preceded this call.
|
||||
UVM_ASSERT(sva_gpu_va_space->iommu_handle);
|
||||
|
||||
pasid = iommu_sva_get_pasid(sva_gpu_va_space->iommu_handle);
|
||||
if (pasid == IOMMU_PASID_INVALID)
|
||||
status = errno_to_nv_status(ENODEV);
|
||||
else
|
||||
gpu_va_space->ats.pasid = pasid;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
gpu_va_space->ats.pasid = -1U;
|
||||
}
|
||||
|
||||
#endif // UVM_ATS_SVA_SUPPORTED()
|
||||
112
kernel-open/nvidia-uvm/uvm_ats_sva.h
Normal file
112
kernel-open/nvidia-uvm/uvm_ats_sva.h
Normal file
@@ -0,0 +1,112 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef __UVM_ATS_SVA_H__
|
||||
#define __UVM_ATS_SVA_H__
|
||||
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_forward_decl.h"
|
||||
|
||||
#include <linux/iommu.h>
|
||||
|
||||
// For ATS support on aarch64, arm_smmu_sva_bind() is needed for
|
||||
// iommu_sva_bind_device() calls. Unfortunately, arm_smmu_sva_bind() is not
|
||||
// conftest-able. We instead look for the presence of ioasid_get() or
|
||||
// mm_pasid_set(). ioasid_get() was added in the same patch series as
|
||||
// arm_smmu_sva_bind() and removed in v6.0. mm_pasid_set() was added in the
|
||||
// same patch as the removal of ioasid_get(). We assume the presence of
|
||||
// arm_smmu_sva_bind() if ioasid_get(v5.11 - v5.17) or mm_pasid_set(v5.18+) is
|
||||
// present.
|
||||
//
|
||||
// arm_smmu_sva_bind() was added with commit
|
||||
// 32784a9562fb0518b12e9797ee2aec52214adf6f and ioasid_get() was added with
|
||||
// commit cb4789b0d19ff231ce9f73376a023341300aed96 (11/23/2020). Commit
|
||||
// 701fac40384f07197b106136012804c3cae0b3de (02/15/2022) removed ioasid_get()
|
||||
// and added mm_pasid_set().
|
||||
#if UVM_CAN_USE_MMU_NOTIFIERS() && (defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_SET_PRESENT))
|
||||
#define UVM_ATS_SVA_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_ATS_SVA_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int placeholder;
|
||||
} uvm_sva_va_space_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Reference count for the iommu_handle
|
||||
nv_kref_t kref;
|
||||
struct iommu_sva *iommu_handle;
|
||||
} uvm_sva_gpu_va_space_t;
|
||||
|
||||
#if UVM_ATS_SVA_SUPPORTED()
|
||||
NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// LOCKING: mmap_lock must be lockable
|
||||
// VA space lock must not be held.
|
||||
NV_STATUS uvm_ats_sva_bind_gpu(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// LOCKING: VA space lock must not be held.
|
||||
void uvm_ats_sva_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// LOCKING: None
|
||||
NV_STATUS uvm_ats_sva_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// LOCKING: None
|
||||
void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
#else
|
||||
static NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_ats_sva_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_ats_sva_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_ats_sva_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
|
||||
}
|
||||
#endif // UVM_ATS_SVA_SUPPORTED
|
||||
|
||||
#endif // __UVM_ATS_SVA_H__
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -31,6 +31,7 @@
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_rm_mem.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_gpu.h"
|
||||
|
||||
#define CE_TEST_MEM_SIZE (2 * 1024 * 1024)
|
||||
#define CE_TEST_MEM_END_SIZE 32
|
||||
@@ -53,6 +54,11 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
|
||||
uvm_push_t push;
|
||||
bool is_proxy;
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, CE_TEST_MEM_SIZE, 0, &host_mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
|
||||
@@ -67,7 +73,7 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
is_proxy = uvm_channel_is_proxy(push.channel);
|
||||
host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, is_proxy);
|
||||
host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, is_proxy).address;
|
||||
|
||||
// All of the following CE transfers are done from a single (L)CE and
|
||||
// disabling pipelining is enough to order them when needed. Only push_end
|
||||
@@ -75,7 +81,7 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
|
||||
|
||||
// Initialize to a bad value
|
||||
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
|
||||
mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy);
|
||||
mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy).address;
|
||||
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
@@ -84,7 +90,7 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
|
||||
|
||||
// Set the first buffer to 1
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[0], gpu, is_proxy);
|
||||
mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[0], gpu, is_proxy).address;
|
||||
gpu->parent->ce_hal->memset_v_4(&push, mem_gpu_va, 1, CE_TEST_MEM_SIZE);
|
||||
|
||||
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
|
||||
@@ -92,9 +98,9 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
|
||||
if (dst == CE_TEST_MEM_COUNT)
|
||||
dst_va = host_mem_gpu_va;
|
||||
else
|
||||
dst_va = uvm_rm_mem_get_gpu_va(mem[dst], gpu, is_proxy);
|
||||
dst_va = uvm_rm_mem_get_gpu_va(mem[dst], gpu, is_proxy).address;
|
||||
|
||||
src_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy);
|
||||
src_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy).address;
|
||||
|
||||
// The first memcpy needs to be non-pipelined as otherwise the previous
|
||||
// memset/memcpy to the source may not be done yet.
|
||||
@@ -168,6 +174,11 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
|
||||
uvm_push_t push;
|
||||
NvU32 value;
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(NvU32), 0, &host_mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
|
||||
@@ -176,7 +187,7 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Membar test");
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, uvm_channel_is_proxy(push.channel));
|
||||
host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, uvm_channel_is_proxy(push.channel)).address;
|
||||
|
||||
for (i = 0; i < REDUCTIONS; ++i) {
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
@@ -334,6 +345,16 @@ static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
// If physical accesses aren't supported, silently convert to virtual to
|
||||
// test the flat mapping.
|
||||
TEST_CHECK_RET(gpu_verif_addr.is_virtual);
|
||||
|
||||
if (!src.is_virtual)
|
||||
src = uvm_gpu_address_copy(gpu, uvm_gpu_phys_address(src.aperture, src.address));
|
||||
|
||||
if (!dst.is_virtual)
|
||||
dst = uvm_gpu_address_copy(gpu, uvm_gpu_phys_address(dst.aperture, dst.address));
|
||||
|
||||
// Memset src with the appropriate element size, then memcpy to dst and from
|
||||
// dst to the verif location (physical sysmem).
|
||||
|
||||
@@ -375,7 +396,7 @@ static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
|
||||
static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
bool is_proxy_va_space;
|
||||
bool is_proxy_va_space = false;
|
||||
uvm_gpu_address_t gpu_verif_addr;
|
||||
void *cpu_verif_addr;
|
||||
uvm_mem_t *verif_mem = NULL;
|
||||
@@ -383,17 +404,17 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
uvm_mem_t *gpu_uvm_mem = NULL;
|
||||
uvm_rm_mem_t *sys_rm_mem = NULL;
|
||||
uvm_rm_mem_t *gpu_rm_mem = NULL;
|
||||
uvm_gpu_address_t gpu_addresses[4];
|
||||
NvU64 gpu_va;
|
||||
size_t size;
|
||||
uvm_gpu_address_t gpu_addresses[4] = {0};
|
||||
size_t size = gpu->big_page.internal_size;
|
||||
static const size_t element_sizes[] = {1, 4, 8};
|
||||
const size_t iterations = 4;
|
||||
size_t i, j, k, s;
|
||||
uvm_mem_alloc_params_t mem_params = {0};
|
||||
|
||||
size = gpu->big_page.internal_size;
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &verif_mem), done);
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(size, gpu, current->mm, &verif_mem), done);
|
||||
else
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &verif_mem), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, gpu), done);
|
||||
|
||||
gpu_verif_addr = uvm_mem_gpu_address_virtual_kernel(verif_mem, gpu);
|
||||
@@ -411,6 +432,34 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
}
|
||||
}
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by sysmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
|
||||
gpu_addresses[0] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
for (i = 0; i < iterations; ++i) {
|
||||
for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
|
||||
TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
|
||||
gpu_addresses[0],
|
||||
gpu_addresses[0],
|
||||
size,
|
||||
element_sizes[s],
|
||||
gpu_verif_addr,
|
||||
cpu_verif_addr,
|
||||
i),
|
||||
done);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Because gpu_verif_addr is in sysmem, when the Confidential
|
||||
// Computing feature is enabled, only the previous cases are valid.
|
||||
// TODO: Bug 3839176: the test partially waived on Confidential
|
||||
// Computing because it assumes that GPU can access system memory
|
||||
// without using encryption.
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Using a page size equal to the allocation size ensures that the UVM
|
||||
// memories about to be allocated are physically contiguous. And since the
|
||||
// size is a valid GPU page size, the memories can be virtually mapped on
|
||||
@@ -422,23 +471,17 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
// Physical address in sysmem
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &sys_uvm_mem), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(sys_uvm_mem, gpu), done);
|
||||
gpu_addresses[0] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
|
||||
gpu_addresses[1] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
|
||||
|
||||
// Physical address in vidmem
|
||||
mem_params.backing_gpu = gpu;
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
|
||||
gpu_addresses[1] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
|
||||
gpu_addresses[2] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by vidmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
|
||||
is_proxy_va_space = false;
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
|
||||
gpu_addresses[2] = uvm_gpu_address_virtual(gpu_va);
|
||||
gpu_addresses[3] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by sysmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
|
||||
gpu_addresses[3] = uvm_gpu_address_virtual(gpu_va);
|
||||
|
||||
for (i = 0; i < iterations; ++i) {
|
||||
for (j = 0; j < ARRAY_SIZE(gpu_addresses); ++j) {
|
||||
@@ -514,6 +557,11 @@ static NV_STATUS test_semaphore_reduction_inc(uvm_gpu_t *gpu)
|
||||
// Semaphore reduction needs 1 word (4 bytes).
|
||||
const size_t size = sizeof(NvU32);
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = test_semaphore_alloc_sem(gpu, size, &mem);
|
||||
TEST_CHECK_RET(status == NV_OK);
|
||||
|
||||
@@ -561,6 +609,11 @@ static NV_STATUS test_semaphore_release(uvm_gpu_t *gpu)
|
||||
// Semaphore release needs 1 word (4 bytes).
|
||||
const size_t size = sizeof(NvU32);
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = test_semaphore_alloc_sem(gpu, size, &mem);
|
||||
TEST_CHECK_RET(status == NV_OK);
|
||||
|
||||
@@ -610,6 +663,11 @@ static NV_STATUS test_semaphore_timestamp(uvm_gpu_t *gpu)
|
||||
// The semaphore is 4 words long (16 bytes).
|
||||
const size_t size = 16;
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = test_semaphore_alloc_sem(gpu, size, &mem);
|
||||
TEST_CHECK_RET(status == NV_OK);
|
||||
|
||||
@@ -646,6 +704,517 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
static bool mem_match(uvm_mem_t *mem1, uvm_mem_t *mem2, size_t size)
|
||||
{
|
||||
void *mem1_addr;
|
||||
void *mem2_addr;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_sysmem(mem1));
|
||||
UVM_ASSERT(uvm_mem_is_sysmem(mem2));
|
||||
UVM_ASSERT(mem1->size >= size);
|
||||
UVM_ASSERT(mem2->size >= size);
|
||||
|
||||
mem1_addr = uvm_mem_get_cpu_addr_kernel(mem1);
|
||||
mem2_addr = uvm_mem_get_cpu_addr_kernel(mem2);
|
||||
|
||||
return !memcmp(mem1_addr, mem2_addr, size);
|
||||
}
|
||||
|
||||
static NV_STATUS zero_vidmem(uvm_mem_t *mem)
|
||||
{
|
||||
uvm_push_t push;
|
||||
uvm_gpu_address_t gpu_address;
|
||||
uvm_gpu_t *gpu = mem->backing_gpu;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_vidmem(mem));
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "zero vidmem"));
|
||||
|
||||
gpu_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu);
|
||||
gpu->parent->ce_hal->memset_1(&push, gpu_address, 0, mem->size);
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void write_range_cpu(uvm_mem_t *mem, NvU64 base_val)
|
||||
{
|
||||
NvU64 *mem_cpu_va;
|
||||
unsigned i;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_sysmem(mem));
|
||||
UVM_ASSERT(IS_ALIGNED(mem->size, sizeof(*mem_cpu_va)));
|
||||
|
||||
mem_cpu_va = (NvU64 *) uvm_mem_get_cpu_addr_kernel(mem);
|
||||
|
||||
for (i = 0; i < (mem->size / sizeof(*mem_cpu_va)); i++)
|
||||
mem_cpu_va[i] = base_val++;
|
||||
}
|
||||
|
||||
static NV_STATUS alloc_vidmem_protected(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(mem);
|
||||
|
||||
*mem = NULL;
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem(size, gpu, mem));
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
|
||||
TEST_NV_CHECK_GOTO(zero_vidmem(*mem), err);
|
||||
|
||||
return NV_OK;
|
||||
|
||||
err:
|
||||
uvm_mem_free(*mem);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS alloc_sysmem_unprotected(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(mem);
|
||||
|
||||
*mem = NULL;
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem_dma(size, gpu, NULL, mem));
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_cpu_kernel(*mem), err);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
|
||||
|
||||
memset(uvm_mem_get_cpu_addr_kernel(*mem), 0, (*mem)->size);
|
||||
|
||||
return NV_OK;
|
||||
|
||||
err:
|
||||
uvm_mem_free(*mem);
|
||||
return status;
|
||||
}
|
||||
|
||||
static void cpu_encrypt(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
{
|
||||
size_t offset = 0;
|
||||
char *src_plain = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
|
||||
char *dst_cipher = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
|
||||
char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
|
||||
|
||||
while (offset < size) {
|
||||
uvm_conf_computing_cpu_encrypt(channel, dst_cipher, src_plain, NULL, copy_size, auth_tag_buffer);
|
||||
|
||||
offset += copy_size;
|
||||
dst_cipher += copy_size;
|
||||
src_plain += copy_size;
|
||||
auth_tag_buffer += UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void cpu_acquire_encryption_ivs(uvm_channel_t *channel,
|
||||
size_t size,
|
||||
NvU32 copy_size,
|
||||
UvmCslIv *ivs)
|
||||
{
|
||||
size_t offset = 0;
|
||||
int i = 0;
|
||||
|
||||
for (; offset < size; offset += copy_size)
|
||||
uvm_conf_computing_acquire_encryption_iv(channel, &ivs[i++]);
|
||||
}
|
||||
|
||||
static void cpu_encrypt_rev(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size,
|
||||
UvmCslIv *encrypt_iv)
|
||||
{
|
||||
char *src_plain = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
|
||||
char *dst_cipher = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
|
||||
char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
|
||||
int i;
|
||||
|
||||
// CPU encrypt order is the opposite of the GPU decrypt order
|
||||
for (i = (size / copy_size) - 1; i >= 0; i--) {
|
||||
uvm_conf_computing_cpu_encrypt(channel,
|
||||
dst_cipher + i * copy_size,
|
||||
src_plain + i * copy_size,
|
||||
encrypt_iv + i,
|
||||
copy_size,
|
||||
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
const UvmCslIv *decrypt_iv,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
{
|
||||
size_t i;
|
||||
char *dst_plain = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
|
||||
char *src_cipher = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
|
||||
char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
|
||||
|
||||
for (i = 0; i < size / copy_size; i++) {
|
||||
TEST_NV_CHECK_RET(uvm_conf_computing_cpu_decrypt(channel,
|
||||
dst_plain + i * copy_size,
|
||||
src_cipher + i * copy_size,
|
||||
decrypt_iv + i,
|
||||
copy_size,
|
||||
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
const UvmCslIv *decrypt_iv,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
{
|
||||
int i;
|
||||
char *dst_plain = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
|
||||
char *src_cipher = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
|
||||
char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
|
||||
|
||||
UVM_ASSERT((size / copy_size) <= INT_MAX);
|
||||
|
||||
// CPU decrypt order is the opposite of the GPU decrypt order
|
||||
for (i = (size / copy_size) - 1; i >= 0; i--) {
|
||||
TEST_NV_CHECK_RET(uvm_conf_computing_cpu_decrypt(channel,
|
||||
dst_plain + i * copy_size,
|
||||
src_cipher + i * copy_size,
|
||||
decrypt_iv + i,
|
||||
copy_size,
|
||||
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// GPU address to use as source or destination in CE decrypt/encrypt operations.
|
||||
// If the uvm_mem backing storage is contiguous in the [offset, offset + size)
|
||||
// interval, the physical address gets priority over the virtual counterpart.
|
||||
static uvm_gpu_address_t gpu_address(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU64 offset, NvU32 size)
|
||||
{
|
||||
uvm_gpu_address_t gpu_virtual_address;
|
||||
|
||||
if (uvm_mem_is_physically_contiguous(mem, offset, size))
|
||||
return uvm_mem_gpu_address_physical(mem, gpu, offset, size);
|
||||
|
||||
gpu_virtual_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu);
|
||||
gpu_virtual_address.address += offset;
|
||||
|
||||
return gpu_virtual_address;
|
||||
}
|
||||
|
||||
// Automatically get the correct address for the authentication tag. The
|
||||
// addressing mode of the tag should match that of the reference address
|
||||
// (destination pointer for GPU encrypt, source pointer for GPU encrypt)
|
||||
static uvm_gpu_address_t auth_tag_gpu_address(uvm_mem_t *auth_tag_mem,
|
||||
uvm_gpu_t *gpu,
|
||||
size_t offset,
|
||||
uvm_gpu_address_t reference)
|
||||
{
|
||||
uvm_gpu_address_t auth_tag_gpu_address;
|
||||
|
||||
if (!reference.is_virtual)
|
||||
return uvm_mem_gpu_address_physical(auth_tag_mem, gpu, offset, UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
|
||||
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(auth_tag_mem, gpu);
|
||||
auth_tag_gpu_address.address += offset;
|
||||
|
||||
return auth_tag_gpu_address;
|
||||
}
|
||||
|
||||
// Note: no membar is issued in any of the GPU transfers (encryptions)
|
||||
static void gpu_encrypt(uvm_push_t *push,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
UvmCslIv *decrypt_iv,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
{
|
||||
size_t i;
|
||||
size_t num_iterations = size / copy_size;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
for (i = 0; i < num_iterations; i++) {
|
||||
uvm_gpu_address_t dst_cipher = gpu_address(dst_mem, gpu, i * copy_size, copy_size);
|
||||
uvm_gpu_address_t src_plain = gpu_address(src_mem, gpu, i * copy_size, copy_size);
|
||||
uvm_gpu_address_t auth_tag = auth_tag_gpu_address(auth_tag_mem,
|
||||
gpu,
|
||||
i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
dst_cipher);
|
||||
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, decrypt_iv);
|
||||
|
||||
if (i > 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
|
||||
gpu->parent->ce_hal->encrypt(push, dst_cipher, src_plain, copy_size, auth_tag);
|
||||
decrypt_iv++;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: no membar is issued in any of the GPU transfers (decryptions)
|
||||
static void gpu_decrypt(uvm_push_t *push,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
{
|
||||
size_t i;
|
||||
size_t num_iterations = size / copy_size;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
for (i = 0; i < num_iterations; i++) {
|
||||
uvm_gpu_address_t dst_plain = gpu_address(dst_mem, gpu, i * copy_size, copy_size);
|
||||
uvm_gpu_address_t src_cipher = gpu_address(src_mem, gpu, i * copy_size, copy_size);
|
||||
uvm_gpu_address_t auth_tag = auth_tag_gpu_address(auth_tag_mem,
|
||||
gpu,
|
||||
i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
src_cipher);
|
||||
|
||||
if (i > 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
|
||||
gpu->parent->ce_hal->decrypt(push, dst_plain, src_cipher, copy_size, auth_tag);
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
uvm_channel_type_t decrypt_channel_type,
|
||||
uvm_channel_type_t encrypt_channel_type,
|
||||
size_t size,
|
||||
NvU32 copy_size,
|
||||
bool decrypt_in_order,
|
||||
bool encrypt_in_order)
|
||||
{
|
||||
uvm_push_t push;
|
||||
NvU64 init_value;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_mem_t *src_plain = NULL;
|
||||
uvm_mem_t *src_cipher = NULL;
|
||||
uvm_mem_t *dst_cipher = NULL;
|
||||
uvm_mem_t *dst_plain_gpu = NULL;
|
||||
uvm_mem_t *dst_plain = NULL;
|
||||
uvm_mem_t *auth_tag_mem = NULL;
|
||||
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
UvmCslIv *decrypt_iv = NULL;
|
||||
UvmCslIv *encrypt_iv = NULL;
|
||||
uvm_tracker_t tracker;
|
||||
size_t src_plain_size;
|
||||
|
||||
TEST_CHECK_RET(copy_size <= size);
|
||||
TEST_CHECK_RET(IS_ALIGNED(size, copy_size));
|
||||
|
||||
uvm_tracker_init(&tracker);
|
||||
|
||||
decrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv));
|
||||
if (!decrypt_iv) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
encrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv));
|
||||
if (!encrypt_iv) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &src_cipher, size), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_vidmem_protected(gpu, &dst_plain_gpu, size), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &dst_cipher, size), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &dst_plain, size), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &auth_tag_mem, auth_tag_buffer_size), out);
|
||||
|
||||
// The plaintext CPU buffer size should fit the initialization value
|
||||
src_plain_size = UVM_ALIGN_UP(size, sizeof(init_value));
|
||||
TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &src_plain, src_plain_size), out);
|
||||
|
||||
// Initialize the plaintext CPU buffer using a value that uniquely
|
||||
// identifies the given inputs
|
||||
TEST_CHECK_GOTO((((NvU64) size) < (1ULL << 63)), out);
|
||||
init_value = ((NvU64) decrypt_in_order << 63) | ((NvU64) size) | ((NvU64) copy_size);
|
||||
write_range_cpu(src_plain, init_value);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin(gpu->channel_manager,
|
||||
decrypt_channel_type,
|
||||
&push,
|
||||
"CPU > GPU decrypt"),
|
||||
out);
|
||||
|
||||
// CPU (decrypted) > CPU (encrypted), using CPU, if in-order
|
||||
// acquire IVs if not in-order
|
||||
if (encrypt_in_order)
|
||||
cpu_encrypt(push.channel, src_cipher, src_plain, auth_tag_mem, size, copy_size);
|
||||
else
|
||||
cpu_acquire_encryption_ivs(push.channel, size, copy_size, encrypt_iv);
|
||||
|
||||
// CPU (encrypted) > GPU (decrypted), using GPU
|
||||
gpu_decrypt(&push, dst_plain_gpu, src_cipher, auth_tag_mem, size, copy_size);
|
||||
|
||||
// Use acquired IVs to encrypt in reverse order
|
||||
if (!encrypt_in_order)
|
||||
cpu_encrypt_rev(push.channel, src_cipher, src_plain, auth_tag_mem, size, copy_size, encrypt_iv);
|
||||
|
||||
uvm_push_end(&push);
|
||||
TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), out);
|
||||
|
||||
// GPU (decrypted) > CPU (encrypted), using GPU
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin_acquire(gpu->channel_manager,
|
||||
encrypt_channel_type,
|
||||
&tracker,
|
||||
&push,
|
||||
"GPU > CPU encrypt"),
|
||||
out);
|
||||
|
||||
gpu_encrypt(&push, dst_cipher, dst_plain_gpu, auth_tag_mem, decrypt_iv, size, copy_size);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
|
||||
|
||||
TEST_CHECK_GOTO(!mem_match(src_plain, src_cipher, size), out);
|
||||
|
||||
TEST_CHECK_GOTO(!mem_match(dst_cipher, src_plain, size), out);
|
||||
|
||||
// CPU (encrypted) > CPU (decrypted), using CPU
|
||||
if (decrypt_in_order) {
|
||||
TEST_NV_CHECK_GOTO(cpu_decrypt_in_order(push.channel,
|
||||
dst_plain,
|
||||
dst_cipher,
|
||||
decrypt_iv,
|
||||
auth_tag_mem,
|
||||
size,
|
||||
copy_size),
|
||||
out);
|
||||
}
|
||||
else {
|
||||
TEST_NV_CHECK_GOTO(cpu_decrypt_out_of_order(push.channel,
|
||||
dst_plain,
|
||||
dst_cipher,
|
||||
decrypt_iv,
|
||||
auth_tag_mem,
|
||||
size,
|
||||
copy_size),
|
||||
out);
|
||||
}
|
||||
|
||||
TEST_CHECK_GOTO(mem_match(src_plain, dst_plain, size), out);
|
||||
|
||||
out:
|
||||
uvm_mem_free(auth_tag_mem);
|
||||
uvm_mem_free(dst_plain);
|
||||
uvm_mem_free(dst_plain_gpu);
|
||||
uvm_mem_free(dst_cipher);
|
||||
uvm_mem_free(src_cipher);
|
||||
uvm_mem_free(src_plain);
|
||||
uvm_tracker_deinit(&tracker);
|
||||
uvm_kvfree(decrypt_iv);
|
||||
uvm_kvfree(encrypt_iv);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_encryption_decryption(uvm_gpu_t *gpu,
|
||||
uvm_channel_type_t decrypt_channel_type,
|
||||
uvm_channel_type_t encrypt_channel_type)
|
||||
{
|
||||
bool cpu_decrypt_in_order = true;
|
||||
bool cpu_encrypt_in_order = true;
|
||||
size_t size[] = {UVM_PAGE_SIZE_4K, UVM_PAGE_SIZE_4K * 2, UVM_PAGE_SIZE_2M};
|
||||
size_t copy_size[] = {UVM_PAGE_SIZE_4K, UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_2M};
|
||||
unsigned i;
|
||||
|
||||
struct {
|
||||
bool encrypt_in_order;
|
||||
bool decrypt_in_order;
|
||||
} orders[] = {{true, true}, {true, false}, {false, true}, {false, false}};
|
||||
|
||||
struct {
|
||||
size_t size;
|
||||
NvU32 copy_size;
|
||||
} small_sizes[] = {{1, 1}, {3, 1}, {8, 1}, {2, 2}, {8, 4}, {UVM_PAGE_SIZE_4K - 8, 8}, {UVM_PAGE_SIZE_4K + 8, 8}};
|
||||
|
||||
// Only Confidential Computing uses CE encryption/decryption
|
||||
if (!uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
// Use a size, and copy size, that are not a multiple of common page sizes.
|
||||
for (i = 0; i < ARRAY_SIZE(small_sizes); ++i) {
|
||||
// Skip tests that need large pushbuffer on WLC. Secure work launch
|
||||
// needs to do at least one decrypt operation so tests that only need
|
||||
// one operation work ok. Tests using more operations might overflow
|
||||
// UVM_MAX_WLC_PUSH_SIZE.
|
||||
if (encrypt_channel_type == UVM_CHANNEL_TYPE_WLC && (small_sizes[i].size / small_sizes[i].copy_size > 1))
|
||||
continue;
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_to_gpu_roundtrip(gpu,
|
||||
decrypt_channel_type,
|
||||
encrypt_channel_type,
|
||||
small_sizes[i].size,
|
||||
small_sizes[i].copy_size,
|
||||
cpu_decrypt_in_order,
|
||||
cpu_encrypt_in_order));
|
||||
}
|
||||
|
||||
// Use sizes, and copy sizes, that are a multiple of common page sizes.
|
||||
// This is the most typical usage of encrypt/decrypt in the UVM driver.
|
||||
for (i = 0; i < ARRAY_SIZE(orders); ++i) {
|
||||
unsigned j;
|
||||
|
||||
cpu_encrypt_in_order = orders[i].encrypt_in_order;
|
||||
cpu_decrypt_in_order = orders[i].decrypt_in_order;
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(size); ++j) {
|
||||
unsigned k;
|
||||
|
||||
for (k = 0; k < ARRAY_SIZE(copy_size); ++k) {
|
||||
if (copy_size[k] > size[j])
|
||||
continue;
|
||||
|
||||
// Skip tests that need large pushbuffer on WLC. Secure work
|
||||
// launch needs to do at least one decrypt operation so tests
|
||||
// that only need one operation work ok. Tests using more
|
||||
// operations might overflow UVM_MAX_WLC_PUSH_SIZE.
|
||||
if (encrypt_channel_type == UVM_CHANNEL_TYPE_WLC && (size[j] / copy_size[k] > 1))
|
||||
continue;
|
||||
|
||||
// There is no difference between in-order and out-of-order
|
||||
// decryption when encrypting once.
|
||||
if ((copy_size[k] == size[j]) && !cpu_decrypt_in_order)
|
||||
continue;
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_to_gpu_roundtrip(gpu,
|
||||
decrypt_channel_type,
|
||||
encrypt_channel_type,
|
||||
size[j],
|
||||
copy_size[k],
|
||||
cpu_decrypt_in_order,
|
||||
cpu_encrypt_in_order));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_ce(uvm_va_space_t *va_space, bool skipTimestampTest)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
@@ -660,6 +1229,8 @@ static NV_STATUS test_ce(uvm_va_space_t *va_space, bool skipTimestampTest)
|
||||
if (!skipTimestampTest)
|
||||
TEST_NV_CHECK_RET(test_semaphore_timestamp(gpu));
|
||||
|
||||
TEST_NV_CHECK_RET(test_encryption_decryption(gpu, UVM_CHANNEL_TYPE_CPU_TO_GPU, UVM_CHANNEL_TYPE_GPU_TO_CPU));
|
||||
TEST_NV_CHECK_RET(test_encryption_decryption(gpu, UVM_CHANNEL_TYPE_WLC, UVM_CHANNEL_TYPE_WLC));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -51,7 +51,7 @@
|
||||
#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX (1024 * 1024)
|
||||
|
||||
// Maximum number of channels per pool.
|
||||
#define UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL 8
|
||||
#define UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL UVM_PUSH_MAX_CONCURRENT_PUSHES
|
||||
|
||||
// Semaphore payloads cannot advance too much between calls to
|
||||
// uvm_gpu_tracking_semaphore_update_completed_value(). In practice the jumps
|
||||
@@ -66,7 +66,7 @@
|
||||
|
||||
#define uvm_channel_pool_assert_locked(pool) ( \
|
||||
{ \
|
||||
if (uvm_channel_pool_is_proxy(pool)) \
|
||||
if (uvm_channel_pool_uses_mutex(pool)) \
|
||||
uvm_assert_mutex_locked(&(pool)->mutex); \
|
||||
else \
|
||||
uvm_assert_spinlock_locked(&(pool)->spinlock); \
|
||||
@@ -94,7 +94,27 @@ typedef enum
|
||||
|
||||
// ^^^^^^
|
||||
// Channel types backed by a CE.
|
||||
UVM_CHANNEL_TYPE_COUNT = UVM_CHANNEL_TYPE_CE_COUNT,
|
||||
// ----------------------------------
|
||||
// Channel types not backed by a CE.
|
||||
// vvvvvv
|
||||
|
||||
// SEC2 channels
|
||||
UVM_CHANNEL_TYPE_SEC2 = UVM_CHANNEL_TYPE_CE_COUNT,
|
||||
|
||||
// ----------------------------------
|
||||
// Channel type with fixed schedules
|
||||
|
||||
// Work Launch Channel (WLC) is a specialized channel for launching work on
|
||||
// other channels when the Confidential Computing is feature enabled. It is
|
||||
// paired with LCIC (below)
|
||||
UVM_CHANNEL_TYPE_WLC,
|
||||
|
||||
// Launch Confirmation Indicator Channel (LCIC) is a specialized channel
|
||||
// with fixed schedule. It gets triggered by executing WLC work, and makes
|
||||
// sure that WLC get/put pointers are up-to-date.
|
||||
UVM_CHANNEL_TYPE_LCIC,
|
||||
|
||||
UVM_CHANNEL_TYPE_COUNT,
|
||||
} uvm_channel_type_t;
|
||||
|
||||
typedef enum
|
||||
@@ -112,7 +132,15 @@ typedef enum
|
||||
// There is a single proxy pool and channel per GPU.
|
||||
UVM_CHANNEL_POOL_TYPE_CE_PROXY = (1 << 1),
|
||||
|
||||
UVM_CHANNEL_POOL_TYPE_COUNT = 2,
|
||||
// A pool of SEC2 channels owned by UVM. These channels are backed by a SEC2
|
||||
// engine.
|
||||
UVM_CHANNEL_POOL_TYPE_SEC2 = (1 << 2),
|
||||
|
||||
UVM_CHANNEL_POOL_TYPE_WLC = (1 << 3),
|
||||
|
||||
UVM_CHANNEL_POOL_TYPE_LCIC = (1 << 4),
|
||||
|
||||
UVM_CHANNEL_POOL_TYPE_COUNT = 5,
|
||||
|
||||
// A mask used to select pools of any type.
|
||||
UVM_CHANNEL_POOL_TYPE_MASK = ((1U << UVM_CHANNEL_POOL_TYPE_COUNT) - 1)
|
||||
@@ -136,16 +164,24 @@ struct uvm_gpfifo_entry_struct
|
||||
// this entry.
|
||||
NvU64 tracking_semaphore_value;
|
||||
|
||||
union {
|
||||
struct {
|
||||
// Offset of the pushbuffer in the pushbuffer allocation used by
|
||||
// this entry.
|
||||
NvU32 pushbuffer_offset;
|
||||
|
||||
// Size of the pushbuffer used for this entry.
|
||||
NvU32 pushbuffer_size;
|
||||
};
|
||||
|
||||
// Value of control entry
|
||||
// Exact value of GPFIFO entry copied directly to GPFIFO[PUT] location.
|
||||
NvU64 control_value;
|
||||
};
|
||||
|
||||
// The following fields are only valid when type is
|
||||
// UVM_GPFIFO_ENTRY_TYPE_NORMAL.
|
||||
|
||||
// Offset of the pushbuffer in the pushbuffer allocation used by
|
||||
// this entry.
|
||||
NvU32 pushbuffer_offset;
|
||||
|
||||
// Size of the pushbuffer used for this entry.
|
||||
NvU32 pushbuffer_size;
|
||||
|
||||
// List node used by the pushbuffer tracking
|
||||
struct list_head pending_list_node;
|
||||
|
||||
@@ -160,6 +196,19 @@ typedef struct
|
||||
// Owning channel manager
|
||||
uvm_channel_manager_t *manager;
|
||||
|
||||
// On Volta+ GPUs, all channels in a pool are members of the same TSG, i.e.,
|
||||
// num_tsgs is 1. Pre-Volta GPUs also have a single TSG object, but since HW
|
||||
// does not support TSG for CE engines, a HW TSG is not created, but a TSG
|
||||
// object is required to allocate channels.
|
||||
// When Confidential Computing mode is enabled, the WLC and LCIC channel
|
||||
// types require one TSG for each WLC/LCIC pair of channels. In this case,
|
||||
// we do not use a TSG per channel pool, but instead a TSG per WLC/LCIC
|
||||
// channel pair, num_tsgs equals to the number of channel pairs.
|
||||
uvmGpuTsgHandle *tsg_handles;
|
||||
|
||||
// Number TSG handles owned by this pool.
|
||||
NvU32 num_tsgs;
|
||||
|
||||
// Channels in this pool
|
||||
uvm_channel_t *channels;
|
||||
|
||||
@@ -176,22 +225,24 @@ typedef struct
|
||||
// Lock protecting the state of channels in the pool.
|
||||
//
|
||||
// There are two pool lock types available: spinlock and mutex. The mutex
|
||||
// variant is required when the thread holding the pool lock must
|
||||
// sleep (ex: acquire another mutex) deeper in the call stack, either in UVM
|
||||
// or RM. For example, work submission to proxy channels in SR-IOV heavy
|
||||
// entails calling an RM API that acquires a mutex, so the proxy channel
|
||||
// pool must use the mutex variant.
|
||||
//
|
||||
// Unless the mutex is required, the spinlock is preferred. This is because,
|
||||
// other than for proxy channels, work submission takes little time and does
|
||||
// not involve any RM calls, so UVM can avoid any invocation that may result
|
||||
// on a sleep. All non-proxy channel pools use the spinlock variant, even in
|
||||
// SR-IOV heavy.
|
||||
// variant is required when the thread holding the pool lock must sleep
|
||||
// (ex: acquire another mutex) deeper in the call stack, either in UVM or
|
||||
// RM.
|
||||
union {
|
||||
uvm_spinlock_t spinlock;
|
||||
uvm_mutex_t mutex;
|
||||
};
|
||||
|
||||
// Secure operations require that uvm_push_begin order matches
|
||||
// uvm_push_end order, because the engine's state is used in its internal
|
||||
// operation and each push may modify this state. push_locks is protected by
|
||||
// the channel pool lock.
|
||||
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
|
||||
// Counting semaphore for available and unlocked channels, it must be
|
||||
// acquired before submitting work to a channel when the Confidential
|
||||
// Computing feature is enabled.
|
||||
uvm_semaphore_t push_sem;
|
||||
} uvm_channel_pool_t;
|
||||
|
||||
struct uvm_channel_struct
|
||||
@@ -242,6 +293,74 @@ struct uvm_channel_struct
|
||||
// uvm_channel_end_push().
|
||||
uvm_gpu_tracking_semaphore_t tracking_sem;
|
||||
|
||||
struct
|
||||
{
|
||||
// Secure operations require that uvm_push_begin order matches
|
||||
// uvm_push_end order, because the engine's state is used in
|
||||
// its internal operation and each push may modify this state.
|
||||
uvm_mutex_t push_lock;
|
||||
|
||||
// When the Confidential Computing feature is enabled, every channel has
|
||||
// cryptographic state in HW, which is mirrored here for CPU-side
|
||||
// operations.
|
||||
UvmCslContext ctx;
|
||||
bool is_ctx_initialized;
|
||||
|
||||
// CPU-side CSL crypto operations which operate on the same CSL state
|
||||
// are not thread-safe, so they must be wrapped in locks at the UVM
|
||||
// level. Encryption, decryption and logging operations must be
|
||||
// protected with the ctx_lock.
|
||||
uvm_mutex_t ctx_lock;
|
||||
} csl;
|
||||
|
||||
struct
|
||||
{
|
||||
// The value of GPU side PUT index.
|
||||
// Indirect work submission introduces delay between updating the CPU
|
||||
// put when ending a push, and updating the GPU visible value via
|
||||
// indirect work launch. It is used to order multiple pending indirect
|
||||
// work launches to match the order of push end-s that triggered them.
|
||||
volatile NvU32 gpu_put;
|
||||
|
||||
// Static pushbuffer for channels with static schedule (WLC/LCIC)
|
||||
uvm_rm_mem_t *static_pb_protected_vidmem;
|
||||
|
||||
// Static pushbuffer staging buffer for WLC
|
||||
uvm_rm_mem_t *static_pb_unprotected_sysmem;
|
||||
void *static_pb_unprotected_sysmem_cpu;
|
||||
void *static_pb_unprotected_sysmem_auth_tag_cpu;
|
||||
|
||||
// The above static locations are required by the WLC (and LCIC)
|
||||
// schedule. Protected sysmem location completes WLC's independence
|
||||
// from the pushbuffer allocator.
|
||||
void *static_pb_protected_sysmem;
|
||||
|
||||
// Static tracking semaphore notifier values
|
||||
// Because of LCIC's fixed schedule, the secure semaphore release
|
||||
// mechanism uses two additional static locations for incrementing the
|
||||
// notifier values. See:
|
||||
// . channel_semaphore_secure_release()
|
||||
// . setup_lcic_schedule()
|
||||
// . internal_channel_submit_work_wlc()
|
||||
uvm_rm_mem_t *static_notifier_unprotected_sysmem;
|
||||
NvU32 *static_notifier_entry_unprotected_sysmem_cpu;
|
||||
NvU32 *static_notifier_exit_unprotected_sysmem_cpu;
|
||||
uvm_gpu_address_t static_notifier_entry_unprotected_sysmem_gpu_va;
|
||||
uvm_gpu_address_t static_notifier_exit_unprotected_sysmem_gpu_va;
|
||||
|
||||
// Explicit location for push launch tag used by WLC.
|
||||
// Encryption auth tags have to be located in unprotected sysmem.
|
||||
void *launch_auth_tag_cpu;
|
||||
NvU64 launch_auth_tag_gpu_va;
|
||||
|
||||
// Used to decrypt the push back to protected sysmem.
|
||||
// This happens when profilers register callbacks for migration data.
|
||||
uvm_push_crypto_bundle_t *push_crypto_bundles;
|
||||
|
||||
// Accompanying authentication tags for the crypto bundles
|
||||
uvm_rm_mem_t *push_crypto_bundle_auth_tags;
|
||||
} conf_computing;
|
||||
|
||||
// RM channel information
|
||||
union
|
||||
{
|
||||
@@ -337,9 +456,50 @@ struct uvm_channel_manager_struct
|
||||
// Create a channel manager for the GPU
|
||||
NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
|
||||
|
||||
static bool uvm_pool_type_is_valid(uvm_channel_pool_type_t pool_type)
|
||||
{
|
||||
return (is_power_of_2(pool_type) && (pool_type < UVM_CHANNEL_POOL_TYPE_MASK));
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_sec2(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
|
||||
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_SEC2);
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_wlc(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
|
||||
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_WLC);
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_lcic(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
|
||||
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_LCIC);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_sec2(uvm_channel_t *channel)
|
||||
{
|
||||
return uvm_channel_pool_is_sec2(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_wlc(uvm_channel_t *channel)
|
||||
{
|
||||
return uvm_channel_pool_is_wlc(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_lcic(uvm_channel_t *channel)
|
||||
{
|
||||
return uvm_channel_pool_is_lcic(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
|
||||
return pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
|
||||
}
|
||||
@@ -351,9 +511,7 @@ static bool uvm_channel_is_proxy(uvm_channel_t *channel)
|
||||
|
||||
static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
|
||||
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_pool_is_proxy(pool);
|
||||
return !uvm_channel_pool_is_sec2(pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_ce(uvm_channel_t *channel)
|
||||
@@ -361,6 +519,8 @@ static bool uvm_channel_is_ce(uvm_channel_t *channel)
|
||||
return uvm_channel_pool_is_ce(channel->pool);
|
||||
}
|
||||
|
||||
bool uvm_channel_pool_uses_mutex(uvm_channel_pool_t *pool);
|
||||
|
||||
// Proxy channels are used to push page tree related methods, so their channel
|
||||
// type is UVM_CHANNEL_TYPE_MEMOPS.
|
||||
static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
|
||||
@@ -415,6 +575,13 @@ NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager
|
||||
// beginning.
|
||||
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager);
|
||||
|
||||
// Check if WLC/LCIC mechanism is ready/setup
|
||||
// Should only return false during initialization
|
||||
static bool uvm_channel_manager_is_wlc_ready(uvm_channel_manager_t *manager)
|
||||
{
|
||||
return (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] != NULL) &&
|
||||
(manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] != NULL);
|
||||
}
|
||||
// Get the GPU VA of semaphore_channel's tracking semaphore within the VA space
|
||||
// associated with access_channel.
|
||||
//
|
||||
@@ -486,6 +653,11 @@ static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
|
||||
return channel->pool->manager->gpu;
|
||||
}
|
||||
|
||||
static uvm_pushbuffer_t *uvm_channel_get_pushbuffer(uvm_channel_t *channel)
|
||||
{
|
||||
return channel->pool->manager->pushbuffer;
|
||||
}
|
||||
|
||||
// Index of a channel within the owning pool
|
||||
static unsigned uvm_channel_index_in_pool(const uvm_channel_t *channel)
|
||||
{
|
||||
|
||||
@@ -60,6 +60,11 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
TEST_CHECK_RET(gpu != NULL);
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_all(gpu, UVM_RM_MEM_TYPE_SYS, buffer_size, 0, &mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
@@ -69,7 +74,7 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Initial memset");
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel));
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel)).address;
|
||||
|
||||
// Semaphore release as part of uvm_push_end() will do the membar
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
@@ -104,7 +109,7 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)
|
||||
value + 1);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
gpu_va_base = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel));
|
||||
gpu_va_base = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel)).address;
|
||||
gpu_va_src = gpu_va_base + (value % values_count) * sizeof(NvU32);
|
||||
gpu_va_dst = gpu_va_base + ((value + 1) % values_count) * sizeof(NvU32);
|
||||
|
||||
@@ -200,6 +205,9 @@ static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu)
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
for (i = 0; i < 512; ++i) {
|
||||
@@ -341,8 +349,8 @@ static void snapshot_counter(uvm_push_t *push,
|
||||
return;
|
||||
|
||||
is_proxy_channel = uvm_channel_is_proxy(push->channel);
|
||||
counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel);
|
||||
snapshot_gpu_va = uvm_rm_mem_get_gpu_va(snapshot_mem, gpu, is_proxy_channel) + index * 2 * sizeof(NvU32);
|
||||
counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel).address;
|
||||
snapshot_gpu_va = uvm_rm_mem_get_gpu_va(snapshot_mem, gpu, is_proxy_channel).address + index * 2 * sizeof(NvU32);
|
||||
|
||||
// Copy the last and first counter to a snapshot for later verification.
|
||||
|
||||
@@ -367,7 +375,7 @@ static void set_counter(uvm_push_t *push, uvm_rm_mem_t *counter_mem, NvU32 value
|
||||
bool is_proxy_channel;
|
||||
|
||||
is_proxy_channel = uvm_channel_is_proxy(push->channel);
|
||||
counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel);
|
||||
counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel).address;
|
||||
|
||||
gpu->parent->ce_hal->memset_v_4(push, counter_gpu_va, value, count * sizeof(NvU32));
|
||||
}
|
||||
@@ -427,7 +435,7 @@ static void test_memset_rm_mem(uvm_push_t *push, uvm_rm_mem_t *rm_mem, NvU32 val
|
||||
UVM_ASSERT(rm_mem->size % 4 == 0);
|
||||
|
||||
gpu = uvm_push_get_gpu(push);
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_channel_is_proxy(push->channel));
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_channel_is_proxy(push->channel)).address;
|
||||
|
||||
gpu->parent->ce_hal->memset_v_4(push, gpu_va, value, rm_mem->size);
|
||||
}
|
||||
@@ -672,6 +680,72 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
// The following test is inspired by uvm_push_test.c:test_concurrent_pushes.
|
||||
// This test verifies that concurrent pushes using the same channel pool
|
||||
// select different channels, when the Confidential Computing feature is
|
||||
// enabled.
|
||||
NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_channel_pool_t *pool;
|
||||
uvm_push_t *pushes;
|
||||
uvm_gpu_t *gpu;
|
||||
NvU32 i;
|
||||
NvU32 num_pushes;
|
||||
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
|
||||
if (!uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
uvm_thread_context_lock_disable_tracking();
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_type_t channel_type;
|
||||
|
||||
for (channel_type = 0; channel_type < UVM_CHANNEL_TYPE_COUNT; channel_type++) {
|
||||
pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
|
||||
TEST_CHECK_RET(pool != NULL);
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
|
||||
if (pool->num_channels < 2)
|
||||
continue;
|
||||
|
||||
num_pushes = min(pool->num_channels, (NvU32)UVM_PUSH_MAX_CONCURRENT_PUSHES);
|
||||
|
||||
pushes = uvm_kvmalloc_zero(sizeof(*pushes) * num_pushes);
|
||||
TEST_CHECK_RET(pushes != NULL);
|
||||
|
||||
for (i = 0; i < num_pushes; i++) {
|
||||
uvm_push_t *push = &pushes[i];
|
||||
status = uvm_push_begin(gpu->channel_manager, channel_type, push, "concurrent push %u", i);
|
||||
TEST_NV_CHECK_GOTO(status, error);
|
||||
if (i > 0)
|
||||
TEST_CHECK_GOTO(pushes[i-1].channel != push->channel, error);
|
||||
}
|
||||
for (i = 0; i < num_pushes; i++) {
|
||||
uvm_push_t *push = &pushes[i];
|
||||
status = uvm_push_end_and_wait(push);
|
||||
TEST_NV_CHECK_GOTO(status, error);
|
||||
}
|
||||
|
||||
uvm_kvfree(pushes);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_thread_context_lock_enable_tracking();
|
||||
|
||||
return status;
|
||||
error:
|
||||
uvm_thread_context_lock_enable_tracking();
|
||||
uvm_kvfree(pushes);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
@@ -683,6 +757,14 @@ NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
|
||||
// Skip WLC channels as those can't accept ctrl gpfifos
|
||||
// after their schedule is set up
|
||||
if (uvm_channel_pool_is_wlc(pool))
|
||||
continue;
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
|
||||
@@ -714,6 +796,14 @@ NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
|
||||
// Skip WLC channels as those can't accept ctrl gpfifos
|
||||
// after their schedule is set up
|
||||
if (uvm_channel_pool_is_wlc(pool))
|
||||
continue;
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
uvm_push_t push;
|
||||
@@ -757,6 +847,11 @@ NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_manager_t *manager = gpu->channel_manager;
|
||||
|
||||
@@ -850,6 +945,9 @@ static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
uvm_push_t push;
|
||||
@@ -897,6 +995,10 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
status = test_conf_computing_channel_selection(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// The following tests have side effects, they reset the GPU's
|
||||
// channel_manager.
|
||||
status = test_channel_pushbuffer_extension_base(va_space);
|
||||
@@ -937,12 +1039,18 @@ static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
uvm_va_space_down_read_rm(va_space);
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(uvm_va_space_find_first_gpu(va_space)))
|
||||
goto done;
|
||||
|
||||
status = stress_test_all_gpus_in_va(va_space,
|
||||
params->num_streams,
|
||||
params->iterations,
|
||||
params->seed,
|
||||
params->verbose);
|
||||
|
||||
done:
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
|
||||
|
||||
@@ -211,6 +211,11 @@ static inline NvBool uvm_uuid_is_cpu(const NvProcessorUuid *uuid)
|
||||
{
|
||||
return memcmp(uuid, &NV_PROCESSOR_UUID_CPU_DEFAULT, sizeof(*uuid)) == 0;
|
||||
}
|
||||
#define UVM_SIZE_1KB (1024ULL)
|
||||
#define UVM_SIZE_1MB (1024 * UVM_SIZE_1KB)
|
||||
#define UVM_SIZE_1GB (1024 * UVM_SIZE_1MB)
|
||||
#define UVM_SIZE_1TB (1024 * UVM_SIZE_1GB)
|
||||
#define UVM_SIZE_1PB (1024 * UVM_SIZE_1TB)
|
||||
|
||||
#define UVM_ALIGN_DOWN(x, a) ({ \
|
||||
typeof(x) _a = a; \
|
||||
@@ -352,6 +357,7 @@ typedef enum
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING,
|
||||
UVM_FD_VA_SPACE,
|
||||
UVM_FD_MM,
|
||||
UVM_FD_COUNT
|
||||
} uvm_fd_type_t;
|
||||
|
||||
@@ -388,6 +394,10 @@ bool uvm_file_is_nvidia_uvm(struct file *filp);
|
||||
// NULL returns the value of the pointer.
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val);
|
||||
|
||||
// Returns the pointer stored in filp->private_data if the type
|
||||
// matches, otherwise returns NULL.
|
||||
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type);
|
||||
|
||||
// Reads the first word in the supplied struct page.
|
||||
static inline void uvm_touch_page(struct page *page)
|
||||
{
|
||||
@@ -400,4 +410,7 @@ static inline void uvm_touch_page(struct page *page)
|
||||
kunmap(page);
|
||||
}
|
||||
|
||||
// Return true if the VMA is one used by UVM managed allocations.
|
||||
bool uvm_vma_is_managed(struct vm_area_struct *vma);
|
||||
|
||||
#endif /* _UVM_COMMON_H */
|
||||
|
||||
501
kernel-open/nvidia-uvm/uvm_conf_computing.c
Normal file
501
kernel-open/nvidia-uvm/uvm_conf_computing.c
Normal file
@@ -0,0 +1,501 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_conf_computing.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_processors.h"
|
||||
#include "uvm_tracker.h"
|
||||
#include "nv_uvm_interface.h"
|
||||
#include "uvm_va_block.h"
|
||||
|
||||
|
||||
static UvmGpuConfComputeMode uvm_conf_computing_get_mode(const uvm_parent_gpu_t *parent)
|
||||
{
|
||||
return parent->rm_info.gpuConfComputeCaps.mode;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_mode_enabled_parent(const uvm_parent_gpu_t *parent)
|
||||
{
|
||||
return uvm_conf_computing_get_mode(parent) != UVM_GPU_CONF_COMPUTE_MODE_NONE;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_mode_enabled(const uvm_gpu_t *gpu)
|
||||
{
|
||||
return uvm_conf_computing_mode_enabled_parent(gpu->parent);
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu)
|
||||
{
|
||||
return uvm_conf_computing_get_mode(gpu->parent) == UVM_GPU_CONF_COMPUTE_MODE_HCC;
|
||||
}
|
||||
|
||||
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent)
|
||||
{
|
||||
uvm_gpu_t *first_gpu;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
// The Confidential Computing state of the GPU should match that of the
|
||||
// system.
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled_parent(parent) == g_uvm_global.conf_computing_enabled);
|
||||
|
||||
// TODO: Bug 2844714: since we have no routine to traverse parent GPUs,
|
||||
// find first child GPU and get its parent.
|
||||
first_gpu = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);
|
||||
if (first_gpu == NULL)
|
||||
return;
|
||||
|
||||
// All GPUs derive Confidential Computing status from their parent. By
|
||||
// current policy all parent GPUs have identical Confidential Computing
|
||||
// status.
|
||||
UVM_ASSERT(uvm_conf_computing_get_mode(parent) == uvm_conf_computing_get_mode(first_gpu->parent));
|
||||
}
|
||||
|
||||
static void dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer)
|
||||
{
|
||||
uvm_assert_mutex_locked(&dma_buffer_pool->lock);
|
||||
|
||||
list_del(&dma_buffer->node);
|
||||
uvm_tracker_wait_deinit(&dma_buffer->tracker);
|
||||
|
||||
uvm_mem_free(dma_buffer->alloc);
|
||||
uvm_mem_free(dma_buffer->auth_tag);
|
||||
uvm_kvfree(dma_buffer);
|
||||
}
|
||||
|
||||
static uvm_gpu_t *dma_buffer_pool_to_gpu(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
|
||||
{
|
||||
return container_of(dma_buffer_pool, uvm_gpu_t, conf_computing.dma_buffer_pool);
|
||||
}
|
||||
|
||||
// Allocate and map a new DMA stage buffer to CPU and GPU (VA)
|
||||
static NV_STATUS dma_buffer_create(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
|
||||
uvm_conf_computing_dma_buffer_t **dma_buffer_out)
|
||||
{
|
||||
uvm_gpu_t *dma_owner;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_mem_t *alloc = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t auth_tags_size = (UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
|
||||
dma_buffer = uvm_kvmalloc_zero(sizeof(*dma_buffer));
|
||||
if (!dma_buffer)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
dma_owner = dma_buffer_pool_to_gpu(dma_buffer_pool);
|
||||
uvm_tracker_init(&dma_buffer->tracker);
|
||||
INIT_LIST_HEAD(&dma_buffer->node);
|
||||
|
||||
status = uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(UVM_CONF_COMPUTING_DMA_BUFFER_SIZE, dma_owner, NULL, &alloc);
|
||||
if (status != NV_OK)
|
||||
goto err;
|
||||
|
||||
dma_buffer->alloc = alloc;
|
||||
|
||||
status = uvm_mem_map_gpu_kernel(alloc, dma_owner);
|
||||
if (status != NV_OK)
|
||||
goto err;
|
||||
|
||||
status = uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(auth_tags_size, dma_owner, NULL, &alloc);
|
||||
if (status != NV_OK)
|
||||
goto err;
|
||||
|
||||
dma_buffer->auth_tag = alloc;
|
||||
|
||||
status = uvm_mem_map_gpu_kernel(alloc, dma_owner);
|
||||
if (status != NV_OK)
|
||||
goto err;
|
||||
|
||||
*dma_buffer_out = dma_buffer;
|
||||
|
||||
return status;
|
||||
|
||||
err:
|
||||
dma_buffer_destroy_locked(dma_buffer_pool, dma_buffer);
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_conf_computing_dma_buffer_pool_sync(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
|
||||
{
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
|
||||
if (dma_buffer_pool->num_dma_buffers == 0)
|
||||
return;
|
||||
|
||||
uvm_mutex_lock(&dma_buffer_pool->lock);
|
||||
list_for_each_entry(dma_buffer, &dma_buffer_pool->free_dma_buffers, node)
|
||||
uvm_tracker_wait(&dma_buffer->tracker);
|
||||
uvm_mutex_unlock(&dma_buffer_pool->lock);
|
||||
}
|
||||
|
||||
static void conf_computing_dma_buffer_pool_deinit(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
|
||||
{
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_conf_computing_dma_buffer_t *next_buff;
|
||||
|
||||
if (dma_buffer_pool->num_dma_buffers == 0)
|
||||
return;
|
||||
|
||||
// Because the pool is teared down at the same time the GPU is unregistered
|
||||
// the lock is required only to quiet assertions not for functional reasons
|
||||
// see dma_buffer_destroy_locked()).
|
||||
uvm_mutex_lock(&dma_buffer_pool->lock);
|
||||
|
||||
list_for_each_entry_safe(dma_buffer, next_buff, &dma_buffer_pool->free_dma_buffers, node) {
|
||||
dma_buffer_destroy_locked(dma_buffer_pool, dma_buffer);
|
||||
dma_buffer_pool->num_dma_buffers--;
|
||||
}
|
||||
|
||||
UVM_ASSERT(dma_buffer_pool->num_dma_buffers == 0);
|
||||
UVM_ASSERT(list_empty(&dma_buffer_pool->free_dma_buffers));
|
||||
uvm_mutex_unlock(&dma_buffer_pool->lock);
|
||||
}
|
||||
|
||||
static void dma_buffer_pool_add(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer)
|
||||
{
|
||||
uvm_assert_mutex_locked(&dma_buffer_pool->lock);
|
||||
list_add_tail(&dma_buffer->node, &dma_buffer_pool->free_dma_buffers);
|
||||
}
|
||||
|
||||
static NV_STATUS conf_computing_dma_buffer_pool_init(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
|
||||
{
|
||||
size_t i;
|
||||
uvm_gpu_t *gpu;
|
||||
size_t num_dma_buffers = 32;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
UVM_ASSERT(dma_buffer_pool->num_dma_buffers == 0);
|
||||
|
||||
gpu = dma_buffer_pool_to_gpu(dma_buffer_pool);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
|
||||
INIT_LIST_HEAD(&dma_buffer_pool->free_dma_buffers);
|
||||
uvm_mutex_init(&dma_buffer_pool->lock, UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL);
|
||||
dma_buffer_pool->num_dma_buffers = num_dma_buffers;
|
||||
|
||||
uvm_mutex_lock(&dma_buffer_pool->lock);
|
||||
for (i = 0; i < num_dma_buffers; i++) {
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
|
||||
status = dma_buffer_create(dma_buffer_pool, &dma_buffer);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
dma_buffer_pool_add(dma_buffer_pool, dma_buffer);
|
||||
}
|
||||
uvm_mutex_unlock(&dma_buffer_pool->lock);
|
||||
|
||||
if (i < num_dma_buffers)
|
||||
conf_computing_dma_buffer_pool_deinit(dma_buffer_pool);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS dma_buffer_pool_expand_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
|
||||
{
|
||||
size_t i;
|
||||
uvm_gpu_t *gpu;
|
||||
size_t nb_to_alloc;
|
||||
NV_STATUS status = NV_OK;
|
||||
UVM_ASSERT(dma_buffer_pool->num_dma_buffers > 0);
|
||||
|
||||
gpu = dma_buffer_pool_to_gpu(dma_buffer_pool);
|
||||
nb_to_alloc = dma_buffer_pool->num_dma_buffers;
|
||||
for (i = 0; i < nb_to_alloc; ++i) {
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
|
||||
status = dma_buffer_create(dma_buffer_pool, &dma_buffer);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
dma_buffer_pool_add(dma_buffer_pool, dma_buffer);
|
||||
}
|
||||
|
||||
dma_buffer_pool->num_dma_buffers += i;
|
||||
|
||||
if (i == 0)
|
||||
return status;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_dma_buffer_alloc(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
|
||||
uvm_conf_computing_dma_buffer_t **dma_buffer_out,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer = NULL;
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(dma_buffer_pool->num_dma_buffers > 0);
|
||||
|
||||
// TODO: Bug 3385623: Heuristically expand DMA memory pool
|
||||
uvm_mutex_lock(&dma_buffer_pool->lock);
|
||||
if (list_empty(&dma_buffer_pool->free_dma_buffers)) {
|
||||
status = dma_buffer_pool_expand_locked(dma_buffer_pool);
|
||||
|
||||
if (status != NV_OK) {
|
||||
uvm_mutex_unlock(&dma_buffer_pool->lock);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
// We're guaranteed that at least one DMA stage buffer is available at this
|
||||
// point.
|
||||
dma_buffer = list_first_entry(&dma_buffer_pool->free_dma_buffers, uvm_conf_computing_dma_buffer_t, node);
|
||||
list_del_init(&dma_buffer->node);
|
||||
uvm_mutex_unlock(&dma_buffer_pool->lock);
|
||||
|
||||
status = uvm_tracker_wait_for_other_gpus(&dma_buffer->tracker, dma_buffer->alloc->dma_owner);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
if (out_tracker)
|
||||
status = uvm_tracker_add_tracker_safe(out_tracker, &dma_buffer->tracker);
|
||||
else
|
||||
status = uvm_tracker_wait(&dma_buffer->tracker);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
uvm_page_mask_zero(&dma_buffer->encrypted_page_mask);
|
||||
*dma_buffer_out = dma_buffer;
|
||||
|
||||
return status;
|
||||
|
||||
error:
|
||||
uvm_tracker_deinit(&dma_buffer->tracker);
|
||||
uvm_conf_computing_dma_buffer_free(dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_conf_computing_dma_buffer_free(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer,
|
||||
uvm_tracker_t *tracker)
|
||||
{
|
||||
|
||||
NV_STATUS status;
|
||||
|
||||
if (!dma_buffer)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(dma_buffer_pool->num_dma_buffers > 0);
|
||||
|
||||
uvm_tracker_remove_completed(&dma_buffer->tracker);
|
||||
if (tracker) {
|
||||
uvm_tracker_remove_completed(tracker);
|
||||
status = uvm_tracker_add_tracker_safe(&dma_buffer->tracker, tracker);
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(status == uvm_global_get_status());
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&dma_buffer_pool->lock);
|
||||
dma_buffer_pool_add(dma_buffer_pool, dma_buffer);
|
||||
uvm_mutex_unlock(&dma_buffer_pool->lock);
|
||||
}
|
||||
|
||||
static void dummy_iv_mem_deinit(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_mem_free(gpu->conf_computing.iv_mem);
|
||||
}
|
||||
|
||||
static NV_STATUS dummy_iv_mem_init(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
if (!uvm_conf_computing_mode_is_hcc(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_mem_alloc_sysmem_dma(sizeof(UvmCslIv), gpu, NULL, &gpu->conf_computing.iv_mem);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = uvm_mem_map_gpu_kernel(gpu->conf_computing.iv_mem, gpu);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
return NV_OK;
|
||||
|
||||
error:
|
||||
dummy_iv_mem_deinit(gpu);
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
if (!uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = conf_computing_dma_buffer_pool_init(&gpu->conf_computing.dma_buffer_pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = dummy_iv_mem_init(gpu);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
return NV_OK;
|
||||
|
||||
error:
|
||||
uvm_conf_computing_gpu_deinit(gpu);
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu)
|
||||
{
|
||||
dummy_iv_mem_deinit(gpu);
|
||||
conf_computing_dma_buffer_pool_deinit(&gpu->conf_computing.dma_buffer_pool);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// TODO: Bug 4014720: If nvUvmInterfaceCslIncrementIv returns with
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES then the IV needs to be rotated via
|
||||
// nvUvmInterfaceCslRotateIv.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, 1, iv);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// TODO: Bug 4014720: If nvUvmInterfaceCslIncrementIv returns with
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES then the IV needs to be rotated via
|
||||
// nvUvmInterfaceCslRotateIv.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
|
||||
void *dst_cipher,
|
||||
const void *src_plain,
|
||||
UvmCslIv *encrypt_iv,
|
||||
size_t size,
|
||||
void *auth_tag_buffer)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(size);
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
status = nvUvmInterfaceCslEncrypt(&channel->csl.ctx,
|
||||
size,
|
||||
(NvU8 const *) src_plain,
|
||||
encrypt_iv,
|
||||
(NvU8 *) dst_cipher,
|
||||
(NvU8 *) auth_tag_buffer);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// nvUvmInterfaceCslEncrypt fails when a 64-bit encryption counter
|
||||
// overflows. This is not supposed to happen on CC.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
void *dst_plain,
|
||||
const void *src_cipher,
|
||||
const UvmCslIv *src_iv,
|
||||
size_t size,
|
||||
const void *auth_tag_buffer)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
status = nvUvmInterfaceCslDecrypt(&channel->csl.ctx,
|
||||
size,
|
||||
(const NvU8 *) src_cipher,
|
||||
src_iv,
|
||||
(NvU8 *) dst_plain,
|
||||
NULL,
|
||||
0,
|
||||
(const NvU8 *) auth_tag_buffer);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
void *dst_plain,
|
||||
const void *src_cipher,
|
||||
const void *auth_tag_buffer,
|
||||
NvU8 valid)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
// There is no dedicated lock for the CSL context associated with replayable
|
||||
// faults. The mutual exclusion required by the RM CSL API is enforced by
|
||||
// relying on the GPU replayable service lock (ISR lock), since fault
|
||||
// decryption is invoked as part of fault servicing.
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
|
||||
UVM_ASSERT(!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu));
|
||||
|
||||
status = nvUvmInterfaceCslDecrypt(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
|
||||
parent_gpu->fault_buffer_hal->entry_size(parent_gpu),
|
||||
(const NvU8 *) src_cipher,
|
||||
NULL,
|
||||
(NvU8 *) dst_plain,
|
||||
&valid,
|
||||
sizeof(valid),
|
||||
(const NvU8 *) auth_tag_buffer);
|
||||
|
||||
if (status != NV_OK)
|
||||
UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
// See comment in uvm_conf_computing_fault_decrypt
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
|
||||
UVM_ASSERT(!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu));
|
||||
|
||||
status = nvUvmInterfaceCslIncrementIv(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
|
||||
UVM_CSL_OPERATION_DECRYPT,
|
||||
increment,
|
||||
NULL);
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
202
kernel-open/nvidia-uvm/uvm_conf_computing.h
Normal file
202
kernel-open/nvidia-uvm/uvm_conf_computing.h
Normal file
@@ -0,0 +1,202 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef __UVM_CONF_COMPUTING_H__
|
||||
#define __UVM_CONF_COMPUTING_H__
|
||||
|
||||
#include "nv_uvm_types.h"
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_tracker.h"
|
||||
#include "uvm_va_block_types.h"
|
||||
|
||||
#include "linux/list.h"
|
||||
|
||||
#define UVM_CONF_COMPUTING_AUTH_TAG_SIZE (UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES)
|
||||
|
||||
// An authentication tag pointer is required by HW to be 16-bytes aligned.
|
||||
#define UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT 16
|
||||
|
||||
// An IV pointer is required by HW to be 16-bytes aligned.
|
||||
//
|
||||
// Use sizeof(UvmCslIv) to refer to the IV size.
|
||||
#define UVM_CONF_COMPUTING_IV_ALIGNMENT 16
|
||||
|
||||
// SEC2 decrypt operation buffers are required to be 16-bytes aligned.
|
||||
#define UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT 16
|
||||
|
||||
// CE encrypt/decrypt can be unaligned if the entire buffer lies in a single
|
||||
// 32B segment. Otherwise, it needs to be 32B aligned.
|
||||
#define UVM_CONF_COMPUTING_BUF_ALIGNMENT 32
|
||||
|
||||
#define UVM_CONF_COMPUTING_DMA_BUFFER_SIZE UVM_VA_BLOCK_SIZE
|
||||
|
||||
// SEC2 supports at most a stream of 64 entries in the method stream for
|
||||
// signing. Each entry is made of the method address and method data, therefore
|
||||
// the maximum buffer size is: UVM_METHOD_SIZE * 2 * 64 = 512.
|
||||
// UVM, however, won't use this amount of entries, in the worst case scenario,
|
||||
// we push a semaphore_releases or a decrypt. A SEC2 semaphore_release uses 6 1U
|
||||
// entries, whereas a SEC2 decrypt uses 10 1U entries. For 10 entries,
|
||||
// UVM_METHOD_SIZE * 2 * 10 = 80.
|
||||
#define UVM_CONF_COMPUTING_SIGN_BUF_MAX_SIZE 80
|
||||
|
||||
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent);
|
||||
|
||||
bool uvm_conf_computing_mode_enabled_parent(const uvm_parent_gpu_t *parent);
|
||||
bool uvm_conf_computing_mode_enabled(const uvm_gpu_t *gpu);
|
||||
bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// List of free DMA buffers (uvm_conf_computing_dma_buffer_t).
|
||||
// A free DMA buffer can be grabbed anytime, though the tracker
|
||||
// inside it may still have pending work.
|
||||
struct list_head free_dma_buffers;
|
||||
|
||||
// Used to grow the pool when full.
|
||||
size_t num_dma_buffers;
|
||||
|
||||
// Lock protecting the dma_buffer_pool
|
||||
uvm_mutex_t lock;
|
||||
} uvm_conf_computing_dma_buffer_pool_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Backing DMA allocation
|
||||
uvm_mem_t *alloc;
|
||||
|
||||
// Used internally by the pool management code to track the state of
|
||||
// a free buffer.
|
||||
uvm_tracker_t tracker;
|
||||
|
||||
// When the DMA buffer is used as the destination of a GPU encryption, SEC2
|
||||
// writes the authentication tag here. Later when the buffer is decrypted
|
||||
// on the CPU the authentication tag is used again (read) for CSL to verify
|
||||
// the authenticity. The allocation is big enough for one authentication
|
||||
// tag per PAGE_SIZE page in the alloc buffer.
|
||||
uvm_mem_t *auth_tag;
|
||||
|
||||
// CSL supports out-of-order decryption, the decrypt IV is used similarly
|
||||
// to the authentication tag. The allocation is big enough for one IV per
|
||||
// PAGE_SIZE page in the alloc buffer. The granularity between the decrypt
|
||||
// IV and authentication tag must match.
|
||||
UvmCslIv decrypt_iv[(UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE)];
|
||||
|
||||
// Bitmap of the encrypted pages in the backing allocation
|
||||
uvm_page_mask_t encrypted_page_mask;
|
||||
|
||||
// See uvm_conf_computing_dma_pool lists
|
||||
struct list_head node;
|
||||
} uvm_conf_computing_dma_buffer_t;
|
||||
|
||||
// Retrieve a DMA buffer from the given DMA allocation pool.
|
||||
// NV_OK Stage buffer successfully retrieved
|
||||
// NV_ERR_NO_MEMORY No free DMA buffers are available for grab, and
|
||||
// expanding the memory pool to get new ones failed.
|
||||
//
|
||||
// out_dma_buffer is only valid if NV_OK is returned. The caller is responsible
|
||||
// for calling uvm_conf_computing_dma_buffer_free once the operations on this
|
||||
// buffer are done.
|
||||
// When out_tracker is passed to the function, the buffer's dependencies are
|
||||
// added to the tracker. The caller is guaranteed that all pending tracker
|
||||
// entries come from the same GPU as the pool's owner. Before being able to use
|
||||
// the DMA buffer, the caller is responsible for either acquiring or waiting
|
||||
// on out_tracker. If out_tracker is NULL, the wait happens in the allocation
|
||||
// itself.
|
||||
// Upon success the encrypted_page_mask is cleared as part of the allocation.
|
||||
NV_STATUS uvm_conf_computing_dma_buffer_alloc(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
|
||||
uvm_conf_computing_dma_buffer_t **out_dma_buffer,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// Free a DMA buffer to the DMA allocation pool. All DMA buffers must be freed
|
||||
// prior to GPU deinit.
|
||||
//
|
||||
// The tracker is optional and a NULL tracker indicates that no new operation
|
||||
// has been pushed for the buffer. A non-NULL tracker indicates any additional
|
||||
// pending operations on the buffer pushed by the caller that need to be
|
||||
// synchronized before freeing or re-using the buffer.
|
||||
void uvm_conf_computing_dma_buffer_free(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer,
|
||||
uvm_tracker_t *tracker);
|
||||
|
||||
// Synchronize trackers in all entries in the GPU's DMA pool
|
||||
void uvm_conf_computing_dma_buffer_pool_sync(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool);
|
||||
|
||||
|
||||
// Initialization and deinitialization of Confidential Computing data structures
|
||||
// for the given GPU.
|
||||
NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu);
|
||||
void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu);
|
||||
|
||||
// Logs encryption information from the GPU and returns the IV.
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv);
|
||||
|
||||
// Acquires next CPU encryption IV and returns it.
|
||||
void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv);
|
||||
|
||||
// CPU side encryption helper with explicit IV, which is obtained from
|
||||
// uvm_conf_computing_acquire_encryption_iv. Without an explicit IV
|
||||
// the function uses the next IV in order. Encrypts data in src_plain and
|
||||
// write the cipher text in dst_cipher. src_plain and dst_cipher can't overlap.
|
||||
// The IV is invalidated and can't be used again after this operation.
|
||||
void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
|
||||
void *dst_cipher,
|
||||
const void *src_plain,
|
||||
UvmCslIv *encrypt_iv,
|
||||
size_t size,
|
||||
void *auth_tag_buffer);
|
||||
|
||||
// CPU side decryption helper. Decrypts data from src_cipher and writes the
|
||||
// plain text in dst_plain. src_cipher and dst_plain can't overlap. IV obtained
|
||||
// from uvm_conf_computing_log_gpu_encryption() needs to be be passed to src_iv.
|
||||
NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
void *dst_plain,
|
||||
const void *src_cipher,
|
||||
const UvmCslIv *src_iv,
|
||||
size_t size,
|
||||
const void *auth_tag_buffer);
|
||||
|
||||
// CPU decryption of a single replayable fault, encrypted by GSP-RM.
|
||||
//
|
||||
// Replayable fault decryption depends not only on the encrypted fault contents,
|
||||
// and the authentication tag, but also on the plaintext valid bit associated
|
||||
// with the fault.
|
||||
//
|
||||
// When decrypting data previously encrypted by the Copy Engine, use
|
||||
// uvm_conf_computing_cpu_decrypt instead.
|
||||
//
|
||||
// Locking: this function must be invoked while holding the replayable ISR lock.
|
||||
NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
void *dst_plain,
|
||||
const void *src_cipher,
|
||||
const void *auth_tag_buffer,
|
||||
NvU8 valid);
|
||||
|
||||
// Increment the CPU-side decrypt IV of the CSL context associated with
|
||||
// replayable faults. The function is a no-op if the given increment is zero.
|
||||
//
|
||||
// The IV associated with a fault CSL context is a 64-bit counter.
|
||||
//
|
||||
// Locking: this function must be invoked while holding the replayable ISR lock.
|
||||
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment);
|
||||
#endif // __UVM_CONF_COMPUTING_H__
|
||||
@@ -37,6 +37,7 @@ typedef struct uvm_ce_hal_struct uvm_ce_hal_t;
|
||||
typedef struct uvm_arch_hal_struct uvm_arch_hal_t;
|
||||
typedef struct uvm_fault_buffer_hal_struct uvm_fault_buffer_hal_t;
|
||||
typedef struct uvm_access_counter_buffer_hal_struct uvm_access_counter_buffer_hal_t;
|
||||
typedef struct uvm_sec2_hal_struct uvm_sec2_hal_t;
|
||||
typedef struct uvm_gpu_semaphore_struct uvm_gpu_semaphore_t;
|
||||
typedef struct uvm_gpu_tracking_semaphore_struct uvm_gpu_tracking_semaphore_t;
|
||||
typedef struct uvm_gpu_semaphore_pool_struct uvm_gpu_semaphore_pool_t;
|
||||
@@ -49,6 +50,7 @@ typedef struct uvm_channel_struct uvm_channel_t;
|
||||
typedef struct uvm_user_channel_struct uvm_user_channel_t;
|
||||
typedef struct uvm_push_struct uvm_push_t;
|
||||
typedef struct uvm_push_info_struct uvm_push_info_t;
|
||||
typedef struct uvm_push_crypto_bundle_struct uvm_push_crypto_bundle_t;
|
||||
typedef struct uvm_push_acquire_info_struct uvm_push_acquire_info_t;
|
||||
typedef struct uvm_pushbuffer_struct uvm_pushbuffer_t;
|
||||
typedef struct uvm_gpfifo_entry_struct uvm_gpfifo_entry_t;
|
||||
|
||||
@@ -71,11 +71,6 @@ static void uvm_unregister_callbacks(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void sev_init(const UvmPlatformInfo *platform_info)
|
||||
{
|
||||
g_uvm_global.sev_enabled = platform_info->sevEnabled;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_global_init(void)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@@ -124,8 +119,7 @@ NV_STATUS uvm_global_init(void)
|
||||
|
||||
uvm_ats_init(&platform_info);
|
||||
g_uvm_global.num_simulated_devices = 0;
|
||||
|
||||
sev_init(&platform_info);
|
||||
g_uvm_global.conf_computing_enabled = platform_info.confComputingEnabled;
|
||||
|
||||
status = uvm_gpu_init();
|
||||
if (status != NV_OK) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -143,11 +143,16 @@ struct uvm_global_struct
|
||||
struct page *page;
|
||||
} unload_state;
|
||||
|
||||
// AMD Secure Encrypted Virtualization (SEV) status. True if VM has SEV
|
||||
// enabled. This field is set once during global initialization
|
||||
// (uvm_global_init), and can be read afterwards without acquiring any
|
||||
// locks.
|
||||
bool sev_enabled;
|
||||
// True if the VM has AMD's SEV, or equivalent HW security extensions such
|
||||
// as Intel's TDX, enabled. The flag is always false on the host.
|
||||
//
|
||||
// This value moves in tandem with that of Confidential Computing in the
|
||||
// GPU(s) in all supported configurations, so it is used as a proxy for the
|
||||
// Confidential Computing state.
|
||||
//
|
||||
// This field is set once during global initialization (uvm_global_init),
|
||||
// and can be read afterwards without acquiring any locks.
|
||||
bool conf_computing_enabled;
|
||||
};
|
||||
|
||||
// Initialize global uvm state
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
#include "uvm_gpu_access_counters.h"
|
||||
#include "uvm_ats.h"
|
||||
#include "uvm_test.h"
|
||||
#include "uvm_conf_computing.h"
|
||||
|
||||
#include "uvm_linux.h"
|
||||
|
||||
@@ -66,21 +67,6 @@ static uvm_user_channel_t *get_user_channel(uvm_rb_tree_node_t *node)
|
||||
return container_of(node, uvm_user_channel_t, instance_ptr.node);
|
||||
}
|
||||
|
||||
static void fill_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo *gpu_info)
|
||||
{
|
||||
char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
|
||||
parent_gpu->rm_info = *gpu_info;
|
||||
|
||||
format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &parent_gpu->uuid);
|
||||
snprintf(parent_gpu->name,
|
||||
sizeof(parent_gpu->name),
|
||||
"ID %u: %s: %s",
|
||||
uvm_id_value(parent_gpu->id),
|
||||
parent_gpu->rm_info.name,
|
||||
uuid_buffer);
|
||||
}
|
||||
|
||||
static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
|
||||
{
|
||||
switch (link_type) {
|
||||
@@ -94,44 +80,68 @@ static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
|
||||
return UVM_GPU_LINK_NVLINK_3;
|
||||
case UVM_LINK_TYPE_NVLINK_4:
|
||||
return UVM_GPU_LINK_NVLINK_4;
|
||||
case UVM_LINK_TYPE_C2C:
|
||||
return UVM_GPU_LINK_C2C;
|
||||
default:
|
||||
return UVM_GPU_LINK_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS get_gpu_caps(uvm_parent_gpu_t *parent_gpu)
|
||||
static void fill_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo *gpu_info)
|
||||
{
|
||||
char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
|
||||
parent_gpu->rm_info = *gpu_info;
|
||||
|
||||
parent_gpu->system_bus.link = get_gpu_link_type(gpu_info->sysmemLink);
|
||||
UVM_ASSERT(parent_gpu->system_bus.link != UVM_GPU_LINK_INVALID);
|
||||
|
||||
parent_gpu->system_bus.link_rate_mbyte_per_s = gpu_info->sysmemLinkRateMBps;
|
||||
|
||||
if (gpu_info->systemMemoryWindowSize > 0) {
|
||||
// memory_window_end is inclusive but uvm_gpu_is_coherent() checks
|
||||
// memory_window_end > memory_window_start as its condition.
|
||||
UVM_ASSERT(gpu_info->systemMemoryWindowSize > 1);
|
||||
parent_gpu->system_bus.memory_window_start = gpu_info->systemMemoryWindowStart;
|
||||
parent_gpu->system_bus.memory_window_end = gpu_info->systemMemoryWindowStart +
|
||||
gpu_info->systemMemoryWindowSize - 1;
|
||||
}
|
||||
|
||||
parent_gpu->nvswitch_info.is_nvswitch_connected = gpu_info->connectedToSwitch;
|
||||
|
||||
// nvswitch is routed via physical pages, where the upper 13-bits of the
|
||||
// 47-bit address space holds the routing information for each peer.
|
||||
// Currently, this is limited to a 16GB framebuffer window size.
|
||||
if (parent_gpu->nvswitch_info.is_nvswitch_connected)
|
||||
parent_gpu->nvswitch_info.fabric_memory_window_start = gpu_info->nvswitchMemoryWindowStart;
|
||||
|
||||
format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &parent_gpu->uuid);
|
||||
snprintf(parent_gpu->name,
|
||||
sizeof(parent_gpu->name),
|
||||
"ID %u: %s: %s",
|
||||
uvm_id_value(parent_gpu->id),
|
||||
parent_gpu->rm_info.name,
|
||||
uuid_buffer);
|
||||
}
|
||||
|
||||
static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
UvmGpuCaps gpu_caps;
|
||||
|
||||
memset(&gpu_caps, 0, sizeof(gpu_caps));
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceQueryCaps(parent_gpu->rm_device, &gpu_caps));
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceQueryCaps(uvm_gpu_device_handle(gpu), &gpu_caps));
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
parent_gpu->sysmem_link = get_gpu_link_type(gpu_caps.sysmemLink);
|
||||
UVM_ASSERT(parent_gpu->sysmem_link != UVM_GPU_LINK_INVALID);
|
||||
|
||||
parent_gpu->sysmem_link_rate_mbyte_per_s = gpu_caps.sysmemLinkRateMBps;
|
||||
parent_gpu->nvswitch_info.is_nvswitch_connected = gpu_caps.connectedToSwitch;
|
||||
|
||||
// nvswitch is routed via physical pages, where the upper 13-bits of the
|
||||
// 47-bit address space holds the routing information for each peer.
|
||||
// Currently, this is limited to a 16GB framebuffer window size.
|
||||
if (parent_gpu->nvswitch_info.is_nvswitch_connected)
|
||||
parent_gpu->nvswitch_info.fabric_memory_window_start = gpu_caps.nvswitchMemoryWindowStart;
|
||||
|
||||
if (gpu_caps.numaEnabled) {
|
||||
parent_gpu->numa_info.enabled = true;
|
||||
parent_gpu->numa_info.node_id = gpu_caps.numaNodeId;
|
||||
parent_gpu->numa_info.system_memory_window_start = gpu_caps.systemMemoryWindowStart;
|
||||
parent_gpu->numa_info.system_memory_window_end = gpu_caps.systemMemoryWindowStart +
|
||||
gpu_caps.systemMemoryWindowSize -
|
||||
1;
|
||||
UVM_ASSERT(uvm_gpu_is_coherent(gpu->parent));
|
||||
gpu->mem_info.numa.enabled = true;
|
||||
gpu->mem_info.numa.node_id = gpu_caps.numaNodeId;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(!g_uvm_global.ats.enabled);
|
||||
UVM_ASSERT(!uvm_gpu_is_coherent(gpu->parent));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
@@ -347,26 +357,30 @@ NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)
|
||||
static void gpu_info_print_ce_caps(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
{
|
||||
NvU32 i;
|
||||
UvmGpuCopyEnginesCaps ces_caps;
|
||||
UvmGpuCopyEnginesCaps *ces_caps;
|
||||
NV_STATUS status;
|
||||
|
||||
memset(&ces_caps, 0, sizeof(ces_caps));
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceQueryCopyEnginesCaps(uvm_gpu_device_handle(gpu), &ces_caps));
|
||||
ces_caps = uvm_kvmalloc_zero(sizeof(*ces_caps));
|
||||
if (!ces_caps) {
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "supported_ces: unavailable (no memory)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceQueryCopyEnginesCaps(uvm_gpu_device_handle(gpu), ces_caps));
|
||||
if (status != NV_OK) {
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "supported_ces: unavailable (query failed)\n");
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "supported_ces:\n");
|
||||
for (i = 0; i < UVM_COPY_ENGINE_COUNT_MAX; ++i) {
|
||||
UvmGpuCopyEngineCaps *ce_caps = ces_caps.copyEngineCaps + i;
|
||||
UvmGpuCopyEngineCaps *ce_caps = ces_caps->copyEngineCaps + i;
|
||||
|
||||
if (!ce_caps->supported)
|
||||
continue;
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " ce %u pce mask 0x%08x grce %u shared %u sysmem read %u sysmem write %u sysmem %u nvlink p2p %u "
|
||||
"p2p %u\n",
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " ce %u pce mask 0x%08x grce %u shared %u sysmem read %u sysmem write %u sysmem %u "
|
||||
"nvlink p2p %u p2p %u\n",
|
||||
i,
|
||||
ce_caps->cePceMask,
|
||||
ce_caps->grce,
|
||||
@@ -377,6 +391,9 @@ static void gpu_info_print_ce_caps(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
ce_caps->nvlinkP2p,
|
||||
ce_caps->p2p);
|
||||
}
|
||||
|
||||
out:
|
||||
uvm_kvfree(ces_caps);
|
||||
}
|
||||
|
||||
static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
|
||||
@@ -394,7 +411,7 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
|
||||
|
||||
static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 6);
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 7);
|
||||
|
||||
switch (link_type) {
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_INVALID);
|
||||
@@ -403,6 +420,7 @@ static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_2);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_3);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_4);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_C2C);
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
}
|
||||
@@ -410,7 +428,6 @@ static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
{
|
||||
const UvmGpuInfo *gpu_info = &gpu->parent->rm_info;
|
||||
uvm_numa_info_t *numa_info = &gpu->parent->numa_info;
|
||||
NvU64 num_pages_in;
|
||||
NvU64 num_pages_out;
|
||||
NvU64 mapped_cpu_pages_size;
|
||||
@@ -429,9 +446,9 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
return;
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "CPU link type %s\n",
|
||||
uvm_gpu_link_type_string(gpu->parent->sysmem_link));
|
||||
uvm_gpu_link_type_string(gpu->parent->system_bus.link));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "CPU link bandwidth %uMBps\n",
|
||||
gpu->parent->sysmem_link_rate_mbyte_per_s);
|
||||
gpu->parent->system_bus.link_rate_mbyte_per_s);
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "architecture 0x%X\n", gpu_info->gpuArch);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "implementation 0x%X\n", gpu_info->gpuImplementation);
|
||||
@@ -453,13 +470,13 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
gpu->mem_info.max_allocatable_address,
|
||||
gpu->mem_info.max_allocatable_address / (1024 * 1024));
|
||||
|
||||
if (numa_info->enabled) {
|
||||
NvU64 window_size = numa_info->system_memory_window_end - numa_info->system_memory_window_start + 1;
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "numa_node_id %u\n", numa_info->node_id);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "system_memory_window_start 0x%llx\n",
|
||||
numa_info->system_memory_window_start);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "system_memory_window_end 0x%llx\n",
|
||||
numa_info->system_memory_window_end);
|
||||
if (gpu->mem_info.numa.enabled) {
|
||||
NvU64 window_size = gpu->parent->system_bus.memory_window_end - gpu->parent->system_bus.memory_window_start + 1;
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "numa_node_id %u\n", uvm_gpu_numa_node(gpu));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "memory_window_start 0x%llx\n",
|
||||
gpu->parent->system_bus.memory_window_start);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "memory_window_end 0x%llx\n",
|
||||
gpu->parent->system_bus.memory_window_end);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "system_memory_window_size 0x%llx (%llu MBs)\n",
|
||||
window_size,
|
||||
window_size / (1024 * 1024));
|
||||
@@ -550,6 +567,10 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
|
||||
gpu_info_print_ce_caps(gpu, s);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "dma_buffer_pool_num_buffers %lu\n",
|
||||
gpu->conf_computing.dma_buffer_pool.num_dma_buffers);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -843,7 +864,7 @@ static void deinit_procfs_peer_cap_files(uvm_gpu_peer_t *peer_caps)
|
||||
proc_remove(peer_caps->procfs.peer_file[1]);
|
||||
}
|
||||
|
||||
static NV_STATUS init_semaphore_pool(uvm_gpu_t *gpu)
|
||||
static NV_STATUS init_semaphore_pools(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *other_gpu;
|
||||
@@ -852,7 +873,17 @@ static NV_STATUS init_semaphore_pool(uvm_gpu_t *gpu)
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// When the Confidential Computing feature is enabled, a separate secure
|
||||
// pool is created that holds page allocated in the CPR of vidmem.
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
status = uvm_gpu_semaphore_secure_pool_create(gpu, &gpu->secure_semaphore_pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
for_each_global_gpu(other_gpu) {
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
break;
|
||||
if (other_gpu == gpu)
|
||||
continue;
|
||||
status = uvm_gpu_semaphore_pool_map_gpu(other_gpu->semaphore_pool, gpu);
|
||||
@@ -863,7 +894,7 @@ static NV_STATUS init_semaphore_pool(uvm_gpu_t *gpu)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void deinit_semaphore_pool(uvm_gpu_t *gpu)
|
||||
static void deinit_semaphore_pools(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_gpu_t *other_gpu;
|
||||
|
||||
@@ -874,6 +905,7 @@ static void deinit_semaphore_pool(uvm_gpu_t *gpu)
|
||||
}
|
||||
|
||||
uvm_gpu_semaphore_pool_destroy(gpu->semaphore_pool);
|
||||
uvm_gpu_semaphore_pool_destroy(gpu->secure_semaphore_pool);
|
||||
}
|
||||
|
||||
static NV_STATUS find_unused_global_gpu_id(uvm_parent_gpu_t *parent_gpu, uvm_global_gpu_id_t *out_id)
|
||||
@@ -1067,6 +1099,8 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_conf_computing_check_parent_gpu(parent_gpu);
|
||||
|
||||
parent_gpu->pci_dev = gpu_platform_info->pci_dev;
|
||||
parent_gpu->closest_cpu_numa_node = dev_to_node(&parent_gpu->pci_dev->dev);
|
||||
parent_gpu->dma_addressable_start = gpu_platform_info->dma_addressable_start;
|
||||
@@ -1102,12 +1136,6 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
uvm_mmu_init_gpu_chunk_sizes(parent_gpu);
|
||||
|
||||
status = get_gpu_caps(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to get GPU caps: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_ats_add_gpu(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_ats_add_gpu failed: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
|
||||
@@ -1166,6 +1194,12 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
return status;
|
||||
}
|
||||
|
||||
status = get_gpu_caps(gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to get GPU caps: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_mmu_init_gpu_peer_addresses(gpu);
|
||||
|
||||
status = alloc_and_init_address_space(gpu);
|
||||
@@ -1198,7 +1232,7 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
return status;
|
||||
}
|
||||
|
||||
status = init_semaphore_pool(gpu);
|
||||
status = init_semaphore_pools(gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to initialize the semaphore pool: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
@@ -1228,6 +1262,14 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_conf_computing_gpu_init(gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to initialize Confidential Compute: %s for GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
status = init_procfs_files(gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init procfs files: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
@@ -1403,6 +1445,8 @@ static void remove_gpus_from_gpu(uvm_gpu_t *gpu)
|
||||
// Sync all trackers in PMM
|
||||
uvm_pmm_gpu_sync(&gpu->pmm);
|
||||
|
||||
// Sync all trackers in the GPU's DMA allocation pool
|
||||
uvm_conf_computing_dma_buffer_pool_sync(&gpu->conf_computing.dma_buffer_pool);
|
||||
}
|
||||
|
||||
// Remove all references to the given GPU from its parent, since it is being
|
||||
@@ -1485,7 +1529,7 @@ static void deinit_gpu(uvm_gpu_t *gpu)
|
||||
// pain during development.
|
||||
deconfigure_address_space(gpu);
|
||||
|
||||
deinit_semaphore_pool(gpu);
|
||||
deinit_semaphore_pools(gpu);
|
||||
|
||||
uvm_pmm_sysmem_mappings_deinit(&gpu->pmm_reverse_sysmem_mappings);
|
||||
|
||||
@@ -1536,6 +1580,13 @@ static void remove_gpu(uvm_gpu_t *gpu)
|
||||
if (free_parent)
|
||||
destroy_nvlink_peers(gpu);
|
||||
|
||||
// uvm_mem_free and other uvm_mem APIs invoked by the Confidential Compute
|
||||
// deinitialization must be called before the GPU is removed from the global
|
||||
// table.
|
||||
//
|
||||
// TODO: Bug 2008200: Add and remove the GPU in a more reasonable spot.
|
||||
uvm_conf_computing_gpu_deinit(gpu);
|
||||
|
||||
// TODO: Bug 2844714: If the parent is not being freed, the following
|
||||
// gpu_table_lock is only needed to protect concurrent
|
||||
// find_first_valid_gpu() in BH from the __clear_bit here. After
|
||||
@@ -2213,9 +2264,12 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(p2p_caps_params->p2pLink != UVM_LINK_TYPE_C2C);
|
||||
|
||||
// check for peer-to-peer compatibility (PCI-E or NvLink).
|
||||
peer_caps->link_type = get_gpu_link_type(p2p_caps_params->p2pLink);
|
||||
if (peer_caps->link_type == UVM_GPU_LINK_INVALID
|
||||
|| peer_caps->link_type == UVM_GPU_LINK_C2C
|
||||
)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
@@ -2225,8 +2279,8 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
|
||||
peer_caps->is_indirect_peer = (p2p_caps_params->indirectAccess == NV_TRUE);
|
||||
|
||||
if (peer_caps->is_indirect_peer) {
|
||||
UVM_ASSERT(gpu0->parent->numa_info.enabled);
|
||||
UVM_ASSERT(gpu1->parent->numa_info.enabled);
|
||||
UVM_ASSERT(gpu0->mem_info.numa.enabled);
|
||||
UVM_ASSERT(gpu1->mem_info.numa.enabled);
|
||||
|
||||
status = uvm_pmm_gpu_indirect_peer_init(&gpu0->pmm, gpu1);
|
||||
if (status != NV_OK)
|
||||
@@ -2415,8 +2469,7 @@ static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu)
|
||||
|
||||
// Indirect peers are only supported when onlined as NUMA nodes, because
|
||||
// we want to use vm_insert_page and dma_map_page.
|
||||
if (p2p_caps_params.indirectAccess &&
|
||||
(!gpu->parent->numa_info.enabled || !other_gpu->parent->numa_info.enabled))
|
||||
if (p2p_caps_params.indirectAccess && (!gpu->mem_info.numa.enabled || !other_gpu->mem_info.numa.enabled))
|
||||
continue;
|
||||
|
||||
status = enable_nvlink_peer_access(gpu, other_gpu, &p2p_caps_params);
|
||||
@@ -2601,6 +2654,9 @@ uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu)
|
||||
if (uvm_gpu_is_virt_mode_sriov_heavy(gpu))
|
||||
return UVM_APERTURE_VID;
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return UVM_APERTURE_VID;
|
||||
|
||||
return UVM_APERTURE_DEFAULT;
|
||||
}
|
||||
|
||||
|
||||
@@ -46,6 +46,8 @@
|
||||
#include "uvm_rb_tree.h"
|
||||
#include "uvm_perf_prefetch.h"
|
||||
#include "nv-kthread-q.h"
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include "uvm_conf_computing.h"
|
||||
|
||||
// Buffer length to store uvm gpu id, RM device name and gpu uuid.
|
||||
#define UVM_GPU_NICE_NAME_BUFFER_LENGTH (sizeof("ID 999: : ") + \
|
||||
@@ -133,6 +135,12 @@ struct uvm_service_block_context_struct
|
||||
|
||||
// This is set if the page migrated to/from the GPU and CPU.
|
||||
bool did_migrate;
|
||||
|
||||
// Sequence number used to start a mmu notifier read side critical
|
||||
// section.
|
||||
unsigned long notifier_seq;
|
||||
|
||||
struct vm_fault *vmf;
|
||||
} cpu_fault;
|
||||
|
||||
//
|
||||
@@ -168,6 +176,71 @@ struct uvm_service_block_context_struct
|
||||
uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
|
||||
// VMA. Used for batching ATS faults in a vma.
|
||||
uvm_page_mask_t read_fault_mask;
|
||||
|
||||
// Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
|
||||
// SAM VMA. Used for batching ATS faults in a vma.
|
||||
uvm_page_mask_t write_fault_mask;
|
||||
|
||||
// Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. Used to return ATS fault status.
|
||||
uvm_page_mask_t faults_serviced_mask;
|
||||
|
||||
// Mask of successfully serviced read faults on pages in write_fault_mask.
|
||||
uvm_page_mask_t reads_serviced_mask;
|
||||
|
||||
// Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
|
||||
// SAM VMA. This is used as input to the prefetcher.
|
||||
uvm_page_mask_t faulted_mask;
|
||||
|
||||
// Client type of the service requestor.
|
||||
uvm_fault_client_type_t client_type;
|
||||
|
||||
// New residency ID of the faulting region.
|
||||
uvm_processor_id_t residency_id;
|
||||
|
||||
// New residency NUMA node ID of the faulting region.
|
||||
int residency_node;
|
||||
|
||||
struct
|
||||
{
|
||||
// True if preferred_location was set on this faulting region.
|
||||
// UVM_VA_BLOCK_SIZE sized region in the faulting region bound by the
|
||||
// VMA is is prefetched if preferred_location was set and if first_touch
|
||||
// is true;
|
||||
bool has_preferred_location;
|
||||
|
||||
// True if the UVM_VA_BLOCK_SIZE sized region isn't resident on any
|
||||
// node. False if any page in the region is resident somewhere.
|
||||
bool first_touch;
|
||||
|
||||
// Mask of prefetched pages in a UVM_VA_BLOCK_SIZE aligned region of a
|
||||
// SAM VMA.
|
||||
uvm_page_mask_t prefetch_pages_mask;
|
||||
|
||||
// PFN info of the faulting region
|
||||
unsigned long pfns[PAGES_PER_UVM_VA_BLOCK];
|
||||
|
||||
// Faulting/preferred processor residency mask of the faulting region.
|
||||
uvm_page_mask_t residency_mask;
|
||||
|
||||
#if defined(NV_MMU_INTERVAL_NOTIFIER)
|
||||
// MMU notifier used to compute residency of this faulting region.
|
||||
struct mmu_interval_notifier notifier;
|
||||
#endif
|
||||
|
||||
uvm_va_space_t *va_space;
|
||||
|
||||
// Prefetch temporary state.
|
||||
uvm_perf_prefetch_bitmap_tree_t bitmap_tree;
|
||||
} prefetch_state;
|
||||
|
||||
} uvm_ats_fault_context_t;
|
||||
|
||||
struct uvm_fault_service_batch_context_struct
|
||||
{
|
||||
// Array of elements fetched from the GPU fault buffer. The number of
|
||||
@@ -200,6 +273,8 @@ struct uvm_fault_service_batch_context_struct
|
||||
|
||||
NvU32 num_replays;
|
||||
|
||||
uvm_ats_fault_context_t ats_context;
|
||||
|
||||
// Unique id (per-GPU) generated for tools events recording
|
||||
NvU32 batch_id;
|
||||
|
||||
@@ -338,6 +413,9 @@ typedef struct
|
||||
// Unique id (per-GPU) generated for tools events recording
|
||||
NvU32 batch_id;
|
||||
|
||||
// Information required to service ATS faults.
|
||||
uvm_ats_fault_context_t ats_context;
|
||||
|
||||
// Information required to invalidate stale ATS PTEs from the GPU TLBs
|
||||
uvm_ats_fault_invalidate_t ats_invalidate;
|
||||
} non_replayable;
|
||||
@@ -349,22 +427,6 @@ typedef struct
|
||||
NvU64 disable_prefetch_faults_timestamp;
|
||||
} uvm_fault_buffer_info_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// True if the platform supports HW coherence (P9) and RM has exposed the
|
||||
// GPU's memory as a NUMA node to the kernel.
|
||||
bool enabled;
|
||||
|
||||
// Range in the system physical address space where the memory of this GPU
|
||||
// is mapped
|
||||
NvU64 system_memory_window_start;
|
||||
NvU64 system_memory_window_end;
|
||||
|
||||
NvU64 memblock_size;
|
||||
|
||||
unsigned node_id;
|
||||
} uvm_numa_info_t;
|
||||
|
||||
struct uvm_access_counter_service_batch_context_struct
|
||||
{
|
||||
uvm_access_counter_buffer_entry_t *notification_cache;
|
||||
@@ -502,6 +564,10 @@ typedef struct
|
||||
|
||||
// Page tables with the mapping.
|
||||
uvm_page_table_range_vec_t *range_vec;
|
||||
|
||||
// Used during init to indicate whether the mapping has been fully
|
||||
// initialized.
|
||||
bool ready;
|
||||
} uvm_gpu_identity_mapping_t;
|
||||
|
||||
// Root chunk mapping
|
||||
@@ -524,6 +590,7 @@ typedef enum
|
||||
UVM_GPU_LINK_NVLINK_2,
|
||||
UVM_GPU_LINK_NVLINK_3,
|
||||
UVM_GPU_LINK_NVLINK_4,
|
||||
UVM_GPU_LINK_C2C,
|
||||
UVM_GPU_LINK_MAX
|
||||
} uvm_gpu_link_type_t;
|
||||
|
||||
@@ -581,6 +648,14 @@ struct uvm_gpu_struct
|
||||
// Max (inclusive) physical address of this GPU's memory that the driver
|
||||
// can allocate through PMM (PMA).
|
||||
NvU64 max_allocatable_address;
|
||||
|
||||
struct
|
||||
{
|
||||
// True if the platform supports HW coherence and the GPU's memory
|
||||
// is exposed as a NUMA node to the kernel.
|
||||
bool enabled;
|
||||
unsigned int node_id;
|
||||
} numa;
|
||||
} mem_info;
|
||||
|
||||
struct
|
||||
@@ -637,6 +712,8 @@ struct uvm_gpu_struct
|
||||
|
||||
uvm_gpu_semaphore_pool_t *semaphore_pool;
|
||||
|
||||
uvm_gpu_semaphore_pool_t *secure_semaphore_pool;
|
||||
|
||||
uvm_channel_manager_t *channel_manager;
|
||||
|
||||
uvm_pmm_gpu_t pmm;
|
||||
@@ -696,6 +773,25 @@ struct uvm_gpu_struct
|
||||
// mappings (instead of kernel), and it is used in most configurations.
|
||||
uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings;
|
||||
|
||||
struct
|
||||
{
|
||||
uvm_conf_computing_dma_buffer_pool_t dma_buffer_pool;
|
||||
|
||||
// Dummy memory used to store the IV contents during CE encryption.
|
||||
// This memory location is also only available after CE channels
|
||||
// because we use them to write PTEs for allocations such as this one.
|
||||
// This location is used when a physical addressing for the IV buffer
|
||||
// is required. See uvm_hal_hopper_ce_encrypt().
|
||||
uvm_mem_t *iv_mem;
|
||||
|
||||
// Dummy memory used to store the IV contents during CE encryption.
|
||||
// Because of the limitations of `iv_mem', and the need to have such
|
||||
// buffer at channel initialization, we use an RM allocation.
|
||||
// This location is used when a virtual addressing for the IV buffer
|
||||
// is required. See uvm_hal_hopper_ce_encrypt().
|
||||
uvm_rm_mem_t *iv_rm_mem;
|
||||
} conf_computing;
|
||||
|
||||
// ECC handling
|
||||
// In order to trap ECC errors as soon as possible the driver has the hw
|
||||
// interrupt register mapped directly. If an ECC interrupt is ever noticed
|
||||
@@ -833,6 +929,10 @@ struct uvm_parent_gpu_struct
|
||||
uvm_arch_hal_t *arch_hal;
|
||||
uvm_fault_buffer_hal_t *fault_buffer_hal;
|
||||
uvm_access_counter_buffer_hal_t *access_counter_buffer_hal;
|
||||
uvm_sec2_hal_t *sec2_hal;
|
||||
|
||||
// Whether CE supports physical addressing mode for writes to vidmem
|
||||
bool ce_phys_vidmem_write_supported;
|
||||
|
||||
uvm_gpu_peer_copy_mode_t peer_copy_mode;
|
||||
|
||||
@@ -954,9 +1054,6 @@ struct uvm_parent_gpu_struct
|
||||
// Fault buffer info. This is only valid if supports_replayable_faults is set to true
|
||||
uvm_fault_buffer_info_t fault_buffer_info;
|
||||
|
||||
// NUMA info, mainly for ATS
|
||||
uvm_numa_info_t numa_info;
|
||||
|
||||
// PMM lazy free processing queue.
|
||||
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
|
||||
nv_kthread_q_t lazy_free_q;
|
||||
@@ -1049,8 +1146,22 @@ struct uvm_parent_gpu_struct
|
||||
NvU64 fabric_memory_window_start;
|
||||
} nvswitch_info;
|
||||
|
||||
uvm_gpu_link_type_t sysmem_link;
|
||||
NvU32 sysmem_link_rate_mbyte_per_s;
|
||||
struct
|
||||
{
|
||||
// Note that this represents the link to system memory, not the link the
|
||||
// system used to discover the GPU. There are some cases such as NVLINK2
|
||||
// where the GPU is still on the PCIe bus, but it accesses memory over
|
||||
// this link rather than PCIe.
|
||||
uvm_gpu_link_type_t link;
|
||||
NvU32 link_rate_mbyte_per_s;
|
||||
|
||||
// Range in the system physical address space where the memory of this
|
||||
// GPU is exposed as coherent. memory_window_end is inclusive.
|
||||
// memory_window_start == memory_window_end indicates that no window is
|
||||
// present (coherence is not supported).
|
||||
NvU64 memory_window_start;
|
||||
NvU64 memory_window_end;
|
||||
} system_bus;
|
||||
};
|
||||
|
||||
static const char *uvm_gpu_name(uvm_gpu_t *gpu)
|
||||
@@ -1146,23 +1257,20 @@ NV_STATUS uvm_gpu_init_va_space(uvm_va_space_t *va_space);
|
||||
|
||||
void uvm_gpu_exit_va_space(uvm_va_space_t *va_space);
|
||||
|
||||
static uvm_numa_info_t *uvm_gpu_numa_info(uvm_gpu_t *gpu)
|
||||
static unsigned int uvm_gpu_numa_node(uvm_gpu_t *gpu)
|
||||
{
|
||||
UVM_ASSERT(gpu->parent->numa_info.enabled);
|
||||
|
||||
return &gpu->parent->numa_info;
|
||||
UVM_ASSERT(gpu->mem_info.numa.enabled);
|
||||
return gpu->mem_info.numa.node_id;
|
||||
}
|
||||
|
||||
static uvm_gpu_phys_address_t uvm_gpu_page_to_phys_address(uvm_gpu_t *gpu, struct page *page)
|
||||
{
|
||||
uvm_numa_info_t *numa_info = uvm_gpu_numa_info(gpu);
|
||||
|
||||
unsigned long sys_addr = page_to_pfn(page) << PAGE_SHIFT;
|
||||
unsigned long gpu_offset = sys_addr - numa_info->system_memory_window_start;
|
||||
unsigned long gpu_offset = sys_addr - gpu->parent->system_bus.memory_window_start;
|
||||
|
||||
UVM_ASSERT(page_to_nid(page) == numa_info->node_id);
|
||||
UVM_ASSERT(sys_addr >= numa_info->system_memory_window_start);
|
||||
UVM_ASSERT(sys_addr + PAGE_SIZE - 1 <= numa_info->system_memory_window_end);
|
||||
UVM_ASSERT(page_to_nid(page) == uvm_gpu_numa_node(gpu));
|
||||
UVM_ASSERT(sys_addr >= gpu->parent->system_bus.memory_window_start);
|
||||
UVM_ASSERT(sys_addr + PAGE_SIZE - 1 <= gpu->parent->system_bus.memory_window_end);
|
||||
|
||||
return uvm_gpu_phys_address(UVM_APERTURE_VID, gpu_offset);
|
||||
}
|
||||
@@ -1270,8 +1378,8 @@ static bool uvm_gpus_are_indirect_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
uvm_gpu_peer_t *peer_caps = uvm_gpu_peer_caps(gpu0, gpu1);
|
||||
|
||||
if (peer_caps->link_type != UVM_GPU_LINK_INVALID && peer_caps->is_indirect_peer) {
|
||||
UVM_ASSERT(gpu0->parent->numa_info.enabled);
|
||||
UVM_ASSERT(gpu1->parent->numa_info.enabled);
|
||||
UVM_ASSERT(gpu0->mem_info.numa.enabled);
|
||||
UVM_ASSERT(gpu1->mem_info.numa.enabled);
|
||||
UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_PCIE);
|
||||
UVM_ASSERT(!uvm_gpus_are_nvswitch_connected(gpu0, gpu1));
|
||||
return true;
|
||||
@@ -1291,6 +1399,9 @@ static uvm_gpu_address_t uvm_gpu_address_virtual_from_vidmem_phys(uvm_gpu_t *gpu
|
||||
UVM_ASSERT(uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu));
|
||||
UVM_ASSERT(pa <= gpu->mem_info.max_allocatable_address);
|
||||
|
||||
if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu))
|
||||
UVM_ASSERT(gpu->static_flat_mapping.ready);
|
||||
|
||||
return uvm_gpu_address_virtual(gpu->parent->flat_vidmem_va_base + pa);
|
||||
}
|
||||
|
||||
@@ -1308,6 +1419,23 @@ static uvm_gpu_address_t uvm_gpu_address_virtual_from_sysmem_phys(uvm_gpu_t *gpu
|
||||
return uvm_gpu_address_virtual(gpu->parent->flat_sysmem_va_base + pa);
|
||||
}
|
||||
|
||||
// Given a GPU or CPU physical address (not peer), retrieve an address suitable
|
||||
// for CE access.
|
||||
static uvm_gpu_address_t uvm_gpu_address_copy(uvm_gpu_t *gpu, uvm_gpu_phys_address_t phys_addr)
|
||||
{
|
||||
UVM_ASSERT(phys_addr.aperture == UVM_APERTURE_VID || phys_addr.aperture == UVM_APERTURE_SYS);
|
||||
|
||||
if (phys_addr.aperture == UVM_APERTURE_VID) {
|
||||
if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu))
|
||||
return uvm_gpu_address_virtual_from_vidmem_phys(gpu, phys_addr.address);
|
||||
}
|
||||
else if (uvm_mmu_gpu_needs_dynamic_sysmem_mapping(gpu)) {
|
||||
return uvm_gpu_address_virtual_from_sysmem_phys(gpu, phys_addr.address);
|
||||
}
|
||||
|
||||
return uvm_gpu_address_from_phys(phys_addr);
|
||||
}
|
||||
|
||||
static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_gpu_id_t peer_id)
|
||||
{
|
||||
return &gpu->peer_mappings[uvm_id_gpu_index(peer_id)];
|
||||
@@ -1383,6 +1511,11 @@ bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
// addresses.
|
||||
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
|
||||
|
||||
static bool uvm_gpu_is_coherent(const uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return parent_gpu->system_bus.memory_window_end > parent_gpu->system_bus.memory_window_start;
|
||||
}
|
||||
|
||||
static bool uvm_gpu_has_pushbuffer_segments(uvm_gpu_t *gpu)
|
||||
{
|
||||
return gpu->parent->max_host_va > (1ull << 40);
|
||||
@@ -1446,6 +1579,7 @@ typedef enum
|
||||
{
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
|
||||
} uvm_gpu_buffer_flush_mode_t;
|
||||
|
||||
#endif // __UVM_GPU_H__
|
||||
|
||||
@@ -210,13 +210,13 @@ static NV_STATUS config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY gran
|
||||
*bytes = 64 * 1024ULL;
|
||||
break;
|
||||
case UVM_ACCESS_COUNTER_GRANULARITY_2M:
|
||||
*bytes = 2 * 1024 * 1024ULL;
|
||||
*bytes = 2 * UVM_SIZE_1MB;
|
||||
break;
|
||||
case UVM_ACCESS_COUNTER_GRANULARITY_16M:
|
||||
*bytes = 16 * 1024 * 1024ULL;
|
||||
*bytes = 16 * UVM_SIZE_1MB;
|
||||
break;
|
||||
case UVM_ACCESS_COUNTER_GRANULARITY_16G:
|
||||
*bytes = 16 * 1024 * 1024 * 1024ULL;
|
||||
*bytes = 16 * UVM_SIZE_1GB;
|
||||
break;
|
||||
default:
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
@@ -404,7 +404,8 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(parent_gpu->access_counter_buffer_hal != NULL);
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceInitAccessCntrInfo(parent_gpu->rm_device,
|
||||
&access_counters->rm_info));
|
||||
&access_counters->rm_info,
|
||||
0));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init notify buffer info from RM: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
@@ -707,6 +708,7 @@ static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_fl
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
|
||||
// Read PUT pointer from the GPU if requested
|
||||
UVM_ASSERT(flush_mode != UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT);
|
||||
if (flush_mode == UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT)
|
||||
access_counters->cached_put = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferPut);
|
||||
|
||||
@@ -1007,6 +1009,7 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
NvU64 address = uvm_va_block_cpu_page_address(va_block, page_index);
|
||||
bool read_duplicate = false;
|
||||
uvm_processor_id_t new_residency;
|
||||
const uvm_va_policy_t *policy;
|
||||
|
||||
// Ensure that the migratability iterator covers the current address
|
||||
while (iter.end < address)
|
||||
@@ -1033,21 +1036,23 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
|
||||
// If the underlying VMA is gone, skip HMM migrations.
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
status = uvm_hmm_find_vma(&service_context->block_context, address);
|
||||
status = uvm_hmm_find_vma(service_context->block_context.mm,
|
||||
&service_context->block_context.hmm.vma,
|
||||
address);
|
||||
if (status == NV_ERR_INVALID_ADDRESS)
|
||||
continue;
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
service_context->block_context.policy = uvm_va_policy_get(va_block, address);
|
||||
policy = uvm_va_policy_get(va_block, address);
|
||||
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&service_context->block_context,
|
||||
page_index,
|
||||
processor,
|
||||
uvm_fault_access_type_mask_bit(UVM_FAULT_ACCESS_TYPE_PREFETCH),
|
||||
service_context->block_context.policy,
|
||||
policy,
|
||||
&thrashing_hint,
|
||||
UVM_SERVICE_OPERATION_ACCESS_COUNTERS,
|
||||
&read_duplicate);
|
||||
@@ -1092,12 +1097,17 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
if (!uvm_processor_mask_empty(&service_context->resident_processors)) {
|
||||
while (first_page_index <= last_page_index) {
|
||||
uvm_page_index_t outer = last_page_index + 1;
|
||||
const uvm_va_policy_t *policy;
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
status = uvm_hmm_find_policy_vma_and_outer(va_block,
|
||||
&service_context->block_context,
|
||||
first_page_index,
|
||||
&outer);
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
if (service_context->block_context.mm) {
|
||||
status = uvm_hmm_find_policy_vma_and_outer(va_block,
|
||||
&service_context->block_context.hmm.vma,
|
||||
first_page_index,
|
||||
&policy,
|
||||
&outer);
|
||||
}
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
@@ -1198,6 +1208,11 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
service_context->num_retries = 0;
|
||||
service_context->block_context.mm = mm;
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_hmm_service_context_init(service_context);
|
||||
uvm_hmm_migrate_begin_wait(va_block);
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
reverse_mappings_to_va_block_page_mask(va_block, reverse_mappings, num_reverse_mappings, accessed_pages);
|
||||
@@ -1211,6 +1226,9 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
uvm_hmm_migrate_finish(va_block);
|
||||
|
||||
if (status == NV_OK)
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
}
|
||||
|
||||
@@ -85,76 +85,86 @@ static void uvm_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
static unsigned schedule_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
if (parent_gpu->isr.is_suspended)
|
||||
return 0;
|
||||
|
||||
// handling gets set to false for all handlers during removal, so quit if
|
||||
// the GPU is in the process of being removed.
|
||||
if (parent_gpu->isr.replayable_faults.handling) {
|
||||
if (!parent_gpu->isr.replayable_faults.handling)
|
||||
return 0;
|
||||
|
||||
// Use raw call instead of UVM helper. Ownership will be recorded in the
|
||||
// bottom half. See comment replayable_faults_isr_bottom_half().
|
||||
if (down_trylock(&parent_gpu->isr.replayable_faults.service_lock.sem) == 0) {
|
||||
if (uvm_gpu_replayable_faults_pending(parent_gpu)) {
|
||||
nv_kref_get(&parent_gpu->gpu_kref);
|
||||
// Use raw call instead of UVM helper. Ownership will be recorded in the
|
||||
// bottom half. See comment replayable_faults_isr_bottom_half().
|
||||
if (down_trylock(&parent_gpu->isr.replayable_faults.service_lock.sem) != 0)
|
||||
return 0;
|
||||
|
||||
// Interrupts need to be disabled here to avoid an interrupt
|
||||
// storm
|
||||
uvm_gpu_replayable_faults_intr_disable(parent_gpu);
|
||||
|
||||
// Schedule a bottom half, but do *not* release the GPU ISR
|
||||
// lock. The bottom half releases the GPU ISR lock as part of
|
||||
// its cleanup.
|
||||
nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
|
||||
&parent_gpu->isr.replayable_faults.bottom_half_q_item);
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
up(&parent_gpu->isr.replayable_faults.service_lock.sem);
|
||||
}
|
||||
}
|
||||
if (!uvm_gpu_replayable_faults_pending(parent_gpu)) {
|
||||
up(&parent_gpu->isr.replayable_faults.service_lock.sem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
nv_kref_get(&parent_gpu->gpu_kref);
|
||||
|
||||
// Interrupts need to be disabled here to avoid an interrupt storm
|
||||
uvm_gpu_replayable_faults_intr_disable(parent_gpu);
|
||||
|
||||
// Schedule a bottom half, but do *not* release the GPU ISR lock. The bottom
|
||||
// half releases the GPU ISR lock as part of its cleanup.
|
||||
nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
|
||||
&parent_gpu->isr.replayable_faults.bottom_half_q_item);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
bool scheduled;
|
||||
|
||||
if (parent_gpu->isr.is_suspended)
|
||||
return 0;
|
||||
|
||||
// handling gets set to false for all handlers during removal, so quit if
|
||||
// the GPU is in the process of being removed.
|
||||
if (parent_gpu->isr.non_replayable_faults.handling) {
|
||||
// Non-replayable_faults are stored in a synchronized circular queue
|
||||
// shared by RM/UVM. Therefore, we can query the number of pending
|
||||
// faults. This type of faults are not replayed and since RM advances
|
||||
// GET to PUT when copying the fault packets to the queue, no further
|
||||
// interrupts will be triggered by the gpu and faults may stay
|
||||
// unserviced. Therefore, if there is a fault in the queue, we schedule
|
||||
// a bottom half unconditionally.
|
||||
if (uvm_gpu_non_replayable_faults_pending(parent_gpu)) {
|
||||
bool scheduled;
|
||||
nv_kref_get(&parent_gpu->gpu_kref);
|
||||
if (!parent_gpu->isr.non_replayable_faults.handling)
|
||||
return 0;
|
||||
|
||||
scheduled = nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
|
||||
&parent_gpu->isr.non_replayable_faults.bottom_half_q_item) != 0;
|
||||
// Non-replayable_faults are stored in a synchronized circular queue
|
||||
// shared by RM/UVM. Therefore, we can query the number of pending
|
||||
// faults. This type of faults are not replayed and since RM advances
|
||||
// GET to PUT when copying the fault packets to the queue, no further
|
||||
// interrupts will be triggered by the gpu and faults may stay
|
||||
// unserviced. Therefore, if there is a fault in the queue, we schedule
|
||||
// a bottom half unconditionally.
|
||||
if (!uvm_gpu_non_replayable_faults_pending(parent_gpu))
|
||||
return 0;
|
||||
|
||||
// If the q_item did not get scheduled because it was already
|
||||
// queued, that instance will handle the pending faults. Just
|
||||
// drop the GPU kref.
|
||||
if (!scheduled)
|
||||
uvm_parent_gpu_kref_put(parent_gpu);
|
||||
nv_kref_get(&parent_gpu->gpu_kref);
|
||||
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
scheduled = nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
|
||||
&parent_gpu->isr.non_replayable_faults.bottom_half_q_item) != 0;
|
||||
|
||||
return 0;
|
||||
// If the q_item did not get scheduled because it was already
|
||||
// queued, that instance will handle the pending faults. Just
|
||||
// drop the GPU kref.
|
||||
if (!scheduled)
|
||||
uvm_parent_gpu_kref_put(parent_gpu);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
if (parent_gpu->isr.is_suspended)
|
||||
return 0;
|
||||
|
||||
if (!parent_gpu->isr.access_counters.handling_ref_count)
|
||||
return 0;
|
||||
|
||||
if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem))
|
||||
if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
|
||||
return 0;
|
||||
|
||||
if (!uvm_gpu_access_counters_pending(parent_gpu)) {
|
||||
@@ -199,7 +209,7 @@ static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
unsigned num_handlers_scheduled = 0;
|
||||
NV_STATUS status;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (!in_interrupt() && in_atomic()) {
|
||||
// Early-out if we're not in interrupt context, but memory allocations
|
||||
@@ -238,18 +248,15 @@ static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
|
||||
|
||||
++parent_gpu->isr.interrupt_count;
|
||||
|
||||
if (parent_gpu->isr.is_suspended) {
|
||||
status = NV_ERR_NO_INTR_PENDING;
|
||||
}
|
||||
else {
|
||||
num_handlers_scheduled += schedule_replayable_faults_handler(parent_gpu);
|
||||
num_handlers_scheduled += schedule_non_replayable_faults_handler(parent_gpu);
|
||||
num_handlers_scheduled += schedule_access_counters_handler(parent_gpu);
|
||||
num_handlers_scheduled += schedule_replayable_faults_handler(parent_gpu);
|
||||
num_handlers_scheduled += schedule_non_replayable_faults_handler(parent_gpu);
|
||||
num_handlers_scheduled += schedule_access_counters_handler(parent_gpu);
|
||||
|
||||
if (num_handlers_scheduled == 0)
|
||||
status = NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
if (num_handlers_scheduled == 0) {
|
||||
if (parent_gpu->isr.is_suspended)
|
||||
status = NV_ERR_NO_INTR_PENDING;
|
||||
else
|
||||
status = NV_OK;
|
||||
status = NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
}
|
||||
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
@@ -511,6 +518,9 @@ static void replayable_faults_isr_bottom_half(void *args)
|
||||
uvm_gpu_replayable_faults_isr_unlock(parent_gpu);
|
||||
|
||||
put_kref:
|
||||
// It is OK to drop a reference on the parent GPU if a bottom half has
|
||||
// been retriggered within uvm_gpu_replayable_faults_isr_unlock, because the
|
||||
// rescheduling added an additional reference.
|
||||
uvm_parent_gpu_kref_put(parent_gpu);
|
||||
}
|
||||
|
||||
@@ -591,6 +601,51 @@ static void access_counters_isr_bottom_half_entry(void *args)
|
||||
UVM_ENTRY_VOID(access_counters_isr_bottom_half(args));
|
||||
}
|
||||
|
||||
static void replayable_faults_retrigger_bottom_half(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
bool retrigger = false;
|
||||
|
||||
// When Confidential Computing is enabled, UVM does not (indirectly) trigger
|
||||
// the replayable fault interrupt by updating GET. This is because, in this
|
||||
// configuration, GET is a dummy register used to inform GSP-RM (the owner
|
||||
// of the HW replayable fault buffer) of the latest entry consumed by the
|
||||
// UVM driver. The real GET register is owned by GSP-RM.
|
||||
//
|
||||
// The retriggering of a replayable faults bottom half happens then
|
||||
// manually, by scheduling a bottom half for later if there is any pending
|
||||
// work in the fault buffer accessible by UVM. The retriggering adddresses
|
||||
// two problematic scenarios caused by GET updates not setting any
|
||||
// interrupt:
|
||||
//
|
||||
// (1) UVM didn't process all the entries up to cached PUT
|
||||
//
|
||||
// (2) UVM did process all the entries up to cached PUT, but GPS-RM
|
||||
// added new entries such that cached PUT is out-of-date
|
||||
//
|
||||
// In both cases, re-enablement of interrupts would have caused the
|
||||
// replayable fault to be triggered in a non-CC setup, because the updated
|
||||
// value of GET is different from PUT. But this not the case in Confidential
|
||||
// Computing, so a bottom half needs to be manually scheduled in order to
|
||||
// ensure that all faults are serviced.
|
||||
//
|
||||
// While in the typical case the retriggering happens within a replayable
|
||||
// fault bottom half, it can also happen within a non-interrupt path such as
|
||||
// uvm_gpu_fault_buffer_flush.
|
||||
if (uvm_conf_computing_mode_enabled_parent(parent_gpu))
|
||||
retrigger = true;
|
||||
|
||||
if (!retrigger)
|
||||
return;
|
||||
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
// If there is pending work, schedule a replayable faults bottom
|
||||
// half. It is valid for a bottom half (q_item) to reschedule itself.
|
||||
(void) schedule_replayable_faults_handler(parent_gpu);
|
||||
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
}
|
||||
|
||||
void uvm_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
|
||||
@@ -632,9 +687,9 @@ void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
// service_lock mutex is released.
|
||||
|
||||
if (parent_gpu->isr.replayable_faults.handling) {
|
||||
// Turn page fault interrupts back on, unless remove_gpu() has already removed this GPU
|
||||
// from the GPU table. remove_gpu() indicates that situation by setting
|
||||
// gpu->replayable_faults.handling to false.
|
||||
// Turn page fault interrupts back on, unless remove_gpu() has already
|
||||
// removed this GPU from the GPU table. remove_gpu() indicates that
|
||||
// situation by setting gpu->replayable_faults.handling to false.
|
||||
//
|
||||
// This path can only be taken from the bottom half. User threads
|
||||
// calling this function must have previously retained the GPU, so they
|
||||
@@ -671,6 +726,8 @@ void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_up_out_of_order(&parent_gpu->isr.replayable_faults.service_lock);
|
||||
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
replayable_faults_retrigger_bottom_half(parent_gpu);
|
||||
}
|
||||
|
||||
void uvm_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
@@ -177,31 +177,34 @@ bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
return has_pending_faults == NV_TRUE;
|
||||
}
|
||||
|
||||
static NvU32 fetch_non_replayable_fault_buffer_entries(uvm_gpu_t *gpu)
|
||||
static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu, NvU32 *cached_faults)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 i = 0;
|
||||
NvU32 cached_faults = 0;
|
||||
uvm_fault_buffer_entry_t *fault_cache;
|
||||
NvU32 entry_size = gpu->parent->fault_buffer_hal->entry_size(gpu->parent);
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
NvU32 i;
|
||||
NvU32 entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
char *current_hw_entry = (char *)non_replayable_faults->shadow_buffer_copy;
|
||||
uvm_fault_buffer_entry_t *fault_entry = non_replayable_faults->fault_cache;
|
||||
|
||||
fault_cache = non_replayable_faults->fault_cache;
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.non_replayable_faults.service_lock));
|
||||
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.non_replayable_faults.service_lock));
|
||||
UVM_ASSERT(gpu->parent->non_replayable_faults_supported);
|
||||
status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
|
||||
current_hw_entry,
|
||||
cached_faults);
|
||||
|
||||
status = nvUvmInterfaceGetNonReplayableFaults(&gpu->parent->fault_buffer_info.rm_info,
|
||||
non_replayable_faults->shadow_buffer_copy,
|
||||
&cached_faults);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("nvUvmInterfaceGetNonReplayableFaults() failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
parent_gpu->name);
|
||||
|
||||
uvm_global_set_fatal_error(status);
|
||||
return status;
|
||||
}
|
||||
|
||||
// Parse all faults
|
||||
for (i = 0; i < cached_faults; ++i) {
|
||||
uvm_fault_buffer_entry_t *fault_entry = &non_replayable_faults->fault_cache[i];
|
||||
|
||||
gpu->parent->fault_buffer_hal->parse_non_replayable_entry(gpu->parent, current_hw_entry, fault_entry);
|
||||
for (i = 0; i < *cached_faults; ++i) {
|
||||
parent_gpu->fault_buffer_hal->parse_non_replayable_entry(parent_gpu, current_hw_entry, fault_entry);
|
||||
|
||||
// The GPU aligns the fault addresses to 4k, but all of our tracking is
|
||||
// done in PAGE_SIZE chunks which might be larger.
|
||||
@@ -226,9 +229,10 @@ static NvU32 fetch_non_replayable_fault_buffer_entries(uvm_gpu_t *gpu)
|
||||
}
|
||||
|
||||
current_hw_entry += entry_size;
|
||||
fault_entry++;
|
||||
}
|
||||
|
||||
return cached_faults;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// In SRIOV, the UVM (guest) driver does not have access to the privileged
|
||||
@@ -339,6 +343,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
bool read_duplicate;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
const uvm_va_policy_t *policy;
|
||||
|
||||
UVM_ASSERT(!fault_entry->is_fatal);
|
||||
|
||||
@@ -348,7 +353,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
UVM_ASSERT(fault_entry->fault_address >= va_block->start);
|
||||
UVM_ASSERT(fault_entry->fault_address <= va_block->end);
|
||||
|
||||
service_context->block_context.policy = uvm_va_policy_get(va_block, fault_entry->fault_address);
|
||||
policy = uvm_va_policy_get(va_block, fault_entry->fault_address);
|
||||
|
||||
if (service_context->num_retries == 0) {
|
||||
// notify event to tools/performance heuristics. For now we use a
|
||||
@@ -357,7 +362,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_perf_event_notify_gpu_fault(&va_space->perf_events,
|
||||
va_block,
|
||||
gpu->id,
|
||||
service_context->block_context.policy->preferred_location,
|
||||
policy->preferred_location,
|
||||
fault_entry,
|
||||
++non_replayable_faults->batch_id,
|
||||
false);
|
||||
@@ -392,7 +397,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
page_index,
|
||||
gpu->id,
|
||||
fault_entry->access_type_mask,
|
||||
service_context->block_context.policy,
|
||||
policy,
|
||||
&thrashing_hint,
|
||||
UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS,
|
||||
&read_duplicate);
|
||||
@@ -435,6 +440,11 @@ static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||||
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
|
||||
service_context->num_retries = 0;
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_hmm_service_context_init(service_context);
|
||||
uvm_hmm_migrate_begin_wait(va_block);
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, &va_block_retry,
|
||||
@@ -449,6 +459,9 @@ static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
uvm_hmm_migrate_finish(va_block);
|
||||
|
||||
return status == NV_OK? tracker_status: status;
|
||||
}
|
||||
|
||||
@@ -512,6 +525,14 @@ static void schedule_kill_channel(uvm_gpu_t *gpu,
|
||||
&user_channel->kill_channel.kill_channel_q_item);
|
||||
}
|
||||
|
||||
static void service_fault_fatal(uvm_fault_buffer_entry_t *fault_entry, NV_STATUS status)
|
||||
{
|
||||
UVM_ASSERT(fault_entry->fault_access_type != UVM_FAULT_ACCESS_TYPE_PREFETCH);
|
||||
|
||||
fault_entry->is_fatal = true;
|
||||
fault_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||||
}
|
||||
|
||||
static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_fault_buffer_entry_t *fault_entry,
|
||||
@@ -521,6 +542,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
|
||||
NV_STATUS status = lookup_status;
|
||||
NV_STATUS fatal_fault_status = NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
UVM_ASSERT(!fault_entry->is_fatal);
|
||||
|
||||
@@ -537,27 +559,63 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
return status;
|
||||
|
||||
if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
|
||||
struct vm_area_struct *vma;
|
||||
uvm_va_range_t *va_range_next;
|
||||
NvU64 fault_address = fault_entry->fault_address;
|
||||
uvm_fault_access_type_t fault_access_type = fault_entry->fault_access_type;
|
||||
uvm_ats_fault_context_t *ats_context = &non_replayable_faults->ats_context;
|
||||
|
||||
uvm_page_mask_zero(&ats_context->read_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->write_fault_mask);
|
||||
|
||||
ats_context->client_type = UVM_FAULT_CLIENT_TYPE_HUB;
|
||||
|
||||
ats_invalidate->write_faults_in_batch = false;
|
||||
|
||||
// The VA isn't managed. See if ATS knows about it.
|
||||
status = uvm_ats_service_fault_entry(gpu_va_space, fault_entry, ats_invalidate);
|
||||
va_range_next = uvm_va_space_iter_first(gpu_va_space->va_space, fault_entry->fault_address, ~0ULL);
|
||||
|
||||
// Invalidate ATS TLB entries if needed
|
||||
if (status == NV_OK) {
|
||||
status = uvm_ats_invalidate_tlbs(gpu_va_space,
|
||||
ats_invalidate,
|
||||
&non_replayable_faults->fault_service_tracker);
|
||||
// The VA isn't managed. See if ATS knows about it.
|
||||
vma = find_vma_intersection(mm, fault_address, fault_address + 1);
|
||||
if (!vma || uvm_ats_check_in_gmmu_region(gpu_va_space->va_space, fault_address, va_range_next)) {
|
||||
|
||||
// Do not return error due to logical errors in the application
|
||||
status = NV_OK;
|
||||
}
|
||||
else {
|
||||
NvU64 base = UVM_VA_BLOCK_ALIGN_DOWN(fault_address);
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
|
||||
uvm_page_index_t page_index = (fault_address - base) / PAGE_SIZE;
|
||||
uvm_page_mask_t *fault_mask = (fault_access_type >= UVM_FAULT_ACCESS_TYPE_WRITE) ?
|
||||
&ats_context->write_fault_mask :
|
||||
&ats_context->read_fault_mask;
|
||||
|
||||
uvm_page_mask_set(fault_mask, page_index);
|
||||
|
||||
status = uvm_ats_service_faults(gpu_va_space, vma, base, ats_context);
|
||||
if (status == NV_OK) {
|
||||
// Invalidate ATS TLB entries if needed
|
||||
if (uvm_page_mask_test(faults_serviced_mask, page_index)) {
|
||||
status = uvm_ats_invalidate_tlbs(gpu_va_space,
|
||||
ats_invalidate,
|
||||
&non_replayable_faults->fault_service_tracker);
|
||||
fatal_fault_status = NV_OK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
fatal_fault_status = status;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(fault_entry->fault_access_type != UVM_FAULT_ACCESS_TYPE_PREFETCH);
|
||||
fault_entry->is_fatal = true;
|
||||
fault_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||||
fatal_fault_status = status;
|
||||
|
||||
// Do not return error due to logical errors in the application
|
||||
status = NV_OK;
|
||||
}
|
||||
|
||||
if (fatal_fault_status != NV_OK)
|
||||
service_fault_fatal(fault_entry, fatal_fault_status);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -621,10 +679,17 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
fault_entry->fault_source.channel_id = user_channel->hw_channel_id;
|
||||
|
||||
if (!fault_entry->is_fatal) {
|
||||
status = uvm_va_block_find_create(fault_entry->va_space,
|
||||
fault_entry->fault_address,
|
||||
va_block_context,
|
||||
&va_block);
|
||||
if (mm) {
|
||||
status = uvm_va_block_find_create(fault_entry->va_space,
|
||||
fault_entry->fault_address,
|
||||
&va_block_context->hmm.vma,
|
||||
&va_block);
|
||||
}
|
||||
else {
|
||||
status = uvm_va_block_find_create_managed(fault_entry->va_space,
|
||||
fault_entry->fault_address,
|
||||
&va_block);
|
||||
}
|
||||
if (status == NV_OK)
|
||||
status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry);
|
||||
else
|
||||
@@ -652,31 +717,35 @@ exit_no_channel:
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
|
||||
if (status != NV_OK)
|
||||
UVM_DBG_PRINT("Error servicing non-replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 cached_faults;
|
||||
|
||||
// If this handler is modified to handle fewer than all of the outstanding
|
||||
// faults, then special handling will need to be added to uvm_suspend()
|
||||
// to guarantee that fault processing has completed before control is
|
||||
// returned to the RM.
|
||||
while ((cached_faults = fetch_non_replayable_fault_buffer_entries(gpu)) > 0) {
|
||||
do {
|
||||
NV_STATUS status;
|
||||
NvU32 i;
|
||||
|
||||
status = fetch_non_replayable_fault_buffer_entries(gpu->parent, &cached_faults);
|
||||
if (status != NV_OK)
|
||||
return;
|
||||
|
||||
// Differently to replayable faults, we do not batch up and preprocess
|
||||
// non-replayable faults since getting multiple faults on the same
|
||||
// memory region is not very likely
|
||||
for (i = 0; i < cached_faults; ++i) {
|
||||
status = service_fault(gpu, &gpu->parent->fault_buffer_info.non_replayable.fault_cache[i]);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (status != NV_OK)
|
||||
UVM_DBG_PRINT("Error servicing non-replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
|
||||
} while (cached_faults > 0);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -75,4 +75,7 @@ void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
// only called from the ISR bottom half
|
||||
void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu);
|
||||
|
||||
// Returns true if UVM owns the hardware replayable fault buffer
|
||||
bool uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
#endif // __UVM_GPU_PAGE_FAULT_H__
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
#include "uvm_channel.h" // For UVM_GPU_SEMAPHORE_MAX_JUMP
|
||||
#include "uvm_conf_computing.h"
|
||||
|
||||
#define UVM_SEMAPHORE_SIZE 4
|
||||
#define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE
|
||||
@@ -44,6 +45,9 @@ struct uvm_gpu_semaphore_pool_struct
|
||||
// List of all the semaphore pages belonging to the pool
|
||||
struct list_head pages;
|
||||
|
||||
// Pages aperture.
|
||||
uvm_aperture_t aperture;
|
||||
|
||||
// Count of free semaphores among all the pages
|
||||
NvU32 free_semaphores_count;
|
||||
|
||||
@@ -66,11 +70,24 @@ struct uvm_gpu_semaphore_pool_page_struct
|
||||
DECLARE_BITMAP(free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
};
|
||||
|
||||
static bool gpu_semaphore_pool_is_secure(uvm_gpu_semaphore_pool_t *pool)
|
||||
{
|
||||
return uvm_conf_computing_mode_enabled(pool->gpu) && (pool->aperture == UVM_APERTURE_VID);
|
||||
}
|
||||
|
||||
static bool gpu_semaphore_is_secure(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
return gpu_semaphore_pool_is_secure(semaphore->page->pool);
|
||||
}
|
||||
|
||||
static NvU32 get_index(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU32 offset;
|
||||
NvU32 index;
|
||||
|
||||
if (gpu_semaphore_is_secure(semaphore))
|
||||
return semaphore->conf_computing.index;
|
||||
|
||||
UVM_ASSERT(semaphore->payload != NULL);
|
||||
UVM_ASSERT(semaphore->page != NULL);
|
||||
|
||||
@@ -118,6 +135,14 @@ static bool is_canary(NvU32 val)
|
||||
return (val & ~UVM_SEMAPHORE_CANARY_MASK) == UVM_SEMAPHORE_CANARY_BASE;
|
||||
}
|
||||
|
||||
static bool semaphore_uses_canary(uvm_gpu_semaphore_pool_t *pool)
|
||||
{
|
||||
// A pool allocated in the CPR of vidmem cannot be read/written from the
|
||||
// CPU.
|
||||
return !gpu_semaphore_pool_is_secure(pool) && UVM_IS_DEBUG();
|
||||
return UVM_IS_DEBUG();
|
||||
}
|
||||
|
||||
// Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore
|
||||
// pool?
|
||||
static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
|
||||
@@ -125,13 +150,34 @@ static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
|
||||
return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
|
||||
}
|
||||
|
||||
// Secure semaphore pools are allocated in the CPR of vidmem and only mapped to
|
||||
// the owning GPU as no other processor have access to it.
|
||||
static NV_STATUS pool_alloc_secure_page(uvm_gpu_semaphore_pool_t *pool,
|
||||
uvm_gpu_semaphore_pool_page_t *pool_page,
|
||||
uvm_rm_mem_type_t memory_type)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(gpu_semaphore_pool_is_secure(pool));
|
||||
status = uvm_rm_mem_alloc(pool->gpu,
|
||||
memory_type,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
|
||||
&pool_page->memory);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_semaphore_pool_page_t *pool_page;
|
||||
NvU32 *payloads;
|
||||
size_t i;
|
||||
uvm_rm_mem_type_t rm_mem_type = UVM_RM_MEM_TYPE_SYS;
|
||||
uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
|
||||
|
||||
uvm_assert_mutex_locked(&pool->mutex);
|
||||
|
||||
@@ -142,13 +188,24 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
|
||||
pool_page->pool = pool;
|
||||
|
||||
// Whenever the Confidential Computing feature is enabled, engines can
|
||||
// access semaphores only in the CPR of vidmem. Mapping to other GPUs is
|
||||
// also disabled.
|
||||
if (gpu_semaphore_pool_is_secure(pool)) {
|
||||
status = pool_alloc_secure_page(pool, pool_page, memory_type);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
}
|
||||
else {
|
||||
status = uvm_rm_mem_alloc_and_map_all(pool->gpu,
|
||||
rm_mem_type,
|
||||
memory_type,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
0,
|
||||
&pool_page->memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Verify the GPU can access the semaphore pool.
|
||||
UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
|
||||
@@ -159,8 +216,7 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
list_add(&pool_page->all_pages_node, &pool->pages);
|
||||
pool->free_semaphores_count += UVM_SEMAPHORE_COUNT_PER_PAGE;
|
||||
|
||||
// Initialize the semaphore payloads to known values
|
||||
if (UVM_IS_DEBUG()) {
|
||||
if (semaphore_uses_canary(pool)) {
|
||||
payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
|
||||
for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
|
||||
payloads[i] = make_canary(0);
|
||||
@@ -176,8 +232,6 @@ error:
|
||||
static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
|
||||
{
|
||||
uvm_gpu_semaphore_pool_t *pool;
|
||||
NvU32 *payloads;
|
||||
size_t i;
|
||||
|
||||
UVM_ASSERT(page);
|
||||
pool = page->pool;
|
||||
@@ -190,9 +244,9 @@ static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
|
||||
"count: %u\n",
|
||||
pool->free_semaphores_count);
|
||||
|
||||
// Check for semaphore release-after-free
|
||||
if (UVM_IS_DEBUG()) {
|
||||
payloads = uvm_rm_mem_get_cpu_va(page->memory);
|
||||
if (semaphore_uses_canary(pool)) {
|
||||
size_t i;
|
||||
NvU32 *payloads = uvm_rm_mem_get_cpu_va(page->memory);
|
||||
for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
|
||||
UVM_ASSERT(is_canary(payloads[i]));
|
||||
}
|
||||
@@ -223,11 +277,18 @@ NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaph
|
||||
if (semaphore_index == UVM_SEMAPHORE_COUNT_PER_PAGE)
|
||||
continue;
|
||||
|
||||
semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) + semaphore_index * UVM_SEMAPHORE_SIZE);
|
||||
if (gpu_semaphore_pool_is_secure(pool)) {
|
||||
semaphore->conf_computing.index = semaphore_index;
|
||||
}
|
||||
else {
|
||||
semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) +
|
||||
semaphore_index * UVM_SEMAPHORE_SIZE);
|
||||
}
|
||||
|
||||
semaphore->page = page;
|
||||
|
||||
// Check for semaphore release-after-free
|
||||
UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));
|
||||
if (semaphore_uses_canary(pool))
|
||||
UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));
|
||||
|
||||
uvm_gpu_semaphore_set_payload(semaphore, 0);
|
||||
|
||||
@@ -266,7 +327,7 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
|
||||
|
||||
// Write a known value lower than the current payload in an attempt to catch
|
||||
// release-after-free and acquire-after-free.
|
||||
if (UVM_IS_DEBUG())
|
||||
if (semaphore_uses_canary(pool))
|
||||
uvm_gpu_semaphore_set_payload(semaphore, make_canary(uvm_gpu_semaphore_get_payload(semaphore)));
|
||||
|
||||
uvm_mutex_lock(&pool->mutex);
|
||||
@@ -294,12 +355,26 @@ NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t
|
||||
|
||||
pool->free_semaphores_count = 0;
|
||||
pool->gpu = gpu;
|
||||
pool->aperture = UVM_APERTURE_SYS;
|
||||
|
||||
*pool_out = pool;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
|
||||
status = uvm_gpu_semaphore_pool_create(gpu, pool_out);
|
||||
if (status == NV_OK)
|
||||
(*pool_out)->aperture = UVM_APERTURE_VID;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool)
|
||||
{
|
||||
uvm_gpu_semaphore_pool_page_t *page;
|
||||
@@ -375,13 +450,16 @@ NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu
|
||||
NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space)
|
||||
{
|
||||
NvU32 index = get_index(semaphore);
|
||||
NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space);
|
||||
NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space).address;
|
||||
|
||||
return base_va + UVM_SEMAPHORE_SIZE * index;
|
||||
}
|
||||
|
||||
NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
if (gpu_semaphore_is_secure(semaphore))
|
||||
return UVM_GPU_READ_ONCE(semaphore->conf_computing.cached_payload);
|
||||
|
||||
return UVM_GPU_READ_ONCE(*semaphore->payload);
|
||||
}
|
||||
|
||||
@@ -398,6 +476,10 @@ void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload
|
||||
// being optimized out on non-SMP configs (we need them for interacting with
|
||||
// the GPU correctly even on non-SMP).
|
||||
mb();
|
||||
|
||||
if (gpu_semaphore_is_secure(semaphore))
|
||||
UVM_GPU_WRITE_ONCE(semaphore->conf_computing.cached_payload, payload);
|
||||
else
|
||||
UVM_GPU_WRITE_ONCE(*semaphore->payload, payload);
|
||||
}
|
||||
|
||||
@@ -425,9 +507,22 @@ static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_
|
||||
return true;
|
||||
}
|
||||
|
||||
bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
|
||||
{
|
||||
uvm_gpu_t *gpu = tracking_semaphore->semaphore.page->pool->gpu;
|
||||
|
||||
UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_lock_order_t order = UVM_LOCK_ORDER_LEAF;
|
||||
|
||||
memset(tracking_sem, 0, sizeof(*tracking_sem));
|
||||
|
||||
@@ -437,7 +532,14 @@ NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_g
|
||||
|
||||
UVM_ASSERT(uvm_gpu_semaphore_get_payload(&tracking_sem->semaphore) == 0);
|
||||
|
||||
uvm_spin_lock_init(&tracking_sem->lock, UVM_LOCK_ORDER_LEAF);
|
||||
if (uvm_conf_computing_mode_enabled(pool->gpu))
|
||||
order = UVM_LOCK_ORDER_SECURE_SEMAPHORE;
|
||||
|
||||
if (tracking_semaphore_uses_mutex(tracking_sem))
|
||||
uvm_mutex_init(&tracking_sem->m_lock, order);
|
||||
else
|
||||
uvm_spin_lock_init(&tracking_sem->s_lock, order);
|
||||
|
||||
atomic64_set(&tracking_sem->completed_value, 0);
|
||||
tracking_sem->queued_value = 0;
|
||||
|
||||
@@ -449,15 +551,119 @@ void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem)
|
||||
uvm_gpu_semaphore_free(&tracking_sem->semaphore);
|
||||
}
|
||||
|
||||
static bool should_skip_secure_semaphore_update(NvU32 last_observed_notifier, NvU32 gpu_notifier)
|
||||
{
|
||||
// No new value, or the GPU is currently writing the new encrypted material
|
||||
// and no change in value would still result in corrupted data.
|
||||
return (last_observed_notifier == gpu_notifier) || (gpu_notifier % 2);
|
||||
}
|
||||
|
||||
static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
UvmCslIv local_iv;
|
||||
NvU32 local_payload;
|
||||
NvU32 new_sem_value;
|
||||
NvU32 gpu_notifier;
|
||||
NvU32 last_observed_notifier;
|
||||
NvU32 new_gpu_notifier = 0;
|
||||
NvU32 iv_index = 0;
|
||||
|
||||
// A channel can have multiple entries pending and the tracking semaphore
|
||||
// update of each entry can race with this function. Since the semaphore
|
||||
// needs to be updated to release a used entry, we never need more
|
||||
// than 'num_gpfifo_entries' re-tries.
|
||||
unsigned tries_left = channel->num_gpfifo_entries;
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU8 local_auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
|
||||
UvmCslIv *ivs_cpu_addr = semaphore->conf_computing.ivs;
|
||||
void *auth_tag_cpu_addr = uvm_rm_mem_get_cpu_va(semaphore->conf_computing.auth_tag);
|
||||
NvU32 *gpu_notifier_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.notifier);
|
||||
NvU32 *payload_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.encrypted_payload);
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
|
||||
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
|
||||
UVM_ASSERT(last_observed_notifier <= gpu_notifier);
|
||||
|
||||
if (should_skip_secure_semaphore_update(last_observed_notifier, gpu_notifier))
|
||||
return;
|
||||
|
||||
do {
|
||||
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
|
||||
|
||||
// Odd notifier value means there's an update in progress.
|
||||
if (gpu_notifier % 2)
|
||||
continue;
|
||||
|
||||
// Make sure no memory accesses happen before we read the notifier
|
||||
smp_mb__after_atomic();
|
||||
|
||||
iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
|
||||
memcpy(local_auth_tag, auth_tag_cpu_addr, sizeof(local_auth_tag));
|
||||
local_payload = UVM_READ_ONCE(*payload_cpu_addr);
|
||||
memcpy(&local_iv, &ivs_cpu_addr[iv_index], sizeof(local_iv));
|
||||
|
||||
// Make sure the second read of notifier happens after
|
||||
// all memory accesses.
|
||||
smp_mb__before_atomic();
|
||||
new_gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
|
||||
tries_left--;
|
||||
} while ((tries_left > 0) && ((gpu_notifier != new_gpu_notifier) || (gpu_notifier % 2)));
|
||||
|
||||
if (!tries_left) {
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (gpu_notifier == new_gpu_notifier) {
|
||||
status = uvm_conf_computing_cpu_decrypt(channel,
|
||||
&new_sem_value,
|
||||
&local_payload,
|
||||
&local_iv,
|
||||
sizeof(new_sem_value),
|
||||
&local_auth_tag);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
uvm_gpu_semaphore_set_payload(semaphore, new_sem_value);
|
||||
UVM_WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier);
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
error:
|
||||
// Decryption failure is a fatal error as well as running out of try left.
|
||||
// Upon testing, all decryption happened within one try, anything that
|
||||
// would require ten retry would be considered active tampering with the
|
||||
// data structures.
|
||||
uvm_global_set_fatal_error(status);
|
||||
}
|
||||
|
||||
static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
|
||||
{
|
||||
NvU64 old_value = atomic64_read(&tracking_semaphore->completed_value);
|
||||
// The semaphore value is the bottom 32 bits of completed_value
|
||||
NvU32 old_sem_value = (NvU32)old_value;
|
||||
NvU32 new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
|
||||
NvU32 new_sem_value;
|
||||
NvU64 new_value;
|
||||
|
||||
uvm_assert_spinlock_locked(&tracking_semaphore->lock);
|
||||
if (tracking_semaphore_uses_mutex(tracking_semaphore))
|
||||
uvm_assert_mutex_locked(&tracking_semaphore->m_lock);
|
||||
else
|
||||
uvm_assert_spinlock_locked(&tracking_semaphore->s_lock);
|
||||
|
||||
if (tracking_semaphore->semaphore.conf_computing.encrypted_payload) {
|
||||
// TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore
|
||||
// mechanism to all semaphore
|
||||
uvm_channel_t *channel = container_of(tracking_semaphore, uvm_channel_t, tracking_sem);
|
||||
uvm_gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
|
||||
}
|
||||
|
||||
new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
|
||||
|
||||
// The following logic to update the completed value is very subtle, it
|
||||
// helps to read https://www.kernel.org/doc/Documentation/memory-barriers.txt
|
||||
@@ -466,7 +672,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
|
||||
if (old_sem_value == new_sem_value) {
|
||||
// No progress since the last update.
|
||||
// No additional memory barrier required in this case as completed_value
|
||||
// is always updated under the spinlock that this thread just acquired.
|
||||
// is always updated under the lock that this thread just acquired.
|
||||
// That guarantees full ordering with all the accesses the thread that
|
||||
// updated completed_value did under the lock including the GPU
|
||||
// semaphore read.
|
||||
@@ -493,7 +699,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
|
||||
(NvU64)(uintptr_t)tracking_semaphore->semaphore.payload,
|
||||
old_value, new_value);
|
||||
|
||||
// Use an atomic write even though the spinlock is held so that the value can
|
||||
// Use an atomic write even though the lock is held so that the value can
|
||||
// be (carefully) read atomically outside of the lock.
|
||||
//
|
||||
// atomic64_set() on its own doesn't imply any memory barriers and we need
|
||||
@@ -521,9 +727,9 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
|
||||
// guarantees that no accesses will be ordered above the atomic (and hence
|
||||
// the GPU semaphore read).
|
||||
//
|
||||
// Notably the soon following uvm_spin_unlock() is a release barrier that
|
||||
// allows later memory accesses to be reordered above it and hence doesn't
|
||||
// provide the necessary ordering with the GPU semaphore read.
|
||||
// Notably the soon following unlock is a release barrier that allows later
|
||||
// memory accesses to be reordered above it and hence doesn't provide the
|
||||
// necessary ordering with the GPU semaphore read.
|
||||
//
|
||||
// Also notably this would still need to be handled if we ever switch to
|
||||
// atomic64_set_release() and atomic64_read_acquire() for accessing
|
||||
@@ -540,11 +746,17 @@ NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semapho
|
||||
// Check that the GPU which owns the semaphore is still present
|
||||
UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
|
||||
|
||||
uvm_spin_lock(&tracking_semaphore->lock);
|
||||
if (tracking_semaphore_uses_mutex(tracking_semaphore))
|
||||
uvm_mutex_lock(&tracking_semaphore->m_lock);
|
||||
else
|
||||
uvm_spin_lock(&tracking_semaphore->s_lock);
|
||||
|
||||
completed = update_completed_value_locked(tracking_semaphore);
|
||||
|
||||
uvm_spin_unlock(&tracking_semaphore->lock);
|
||||
if (tracking_semaphore_uses_mutex(tracking_semaphore))
|
||||
uvm_mutex_unlock(&tracking_semaphore->m_lock);
|
||||
else
|
||||
uvm_spin_unlock(&tracking_semaphore->s_lock);
|
||||
|
||||
return completed;
|
||||
}
|
||||
|
||||
@@ -47,6 +47,16 @@ struct uvm_gpu_semaphore_struct
|
||||
|
||||
// Pointer to the memory location
|
||||
NvU32 *payload;
|
||||
struct {
|
||||
NvU16 index;
|
||||
NvU32 cached_payload;
|
||||
uvm_rm_mem_t *encrypted_payload;
|
||||
uvm_rm_mem_t *notifier;
|
||||
uvm_rm_mem_t *auth_tag;
|
||||
UvmCslIv *ivs;
|
||||
NvU32 last_pushed_notifier;
|
||||
NvU32 last_observed_notifier;
|
||||
} conf_computing;
|
||||
};
|
||||
|
||||
// A primitive used for tracking progress of the GPU
|
||||
@@ -67,7 +77,10 @@ struct uvm_gpu_tracking_semaphore_struct
|
||||
atomic64_t completed_value;
|
||||
|
||||
// Lock protecting updates to the completed_value
|
||||
uvm_spinlock_t lock;
|
||||
union {
|
||||
uvm_spinlock_t s_lock;
|
||||
uvm_mutex_t m_lock;
|
||||
};
|
||||
|
||||
// Last queued value
|
||||
// All accesses to the queued value should be handled by the user of the GPU
|
||||
@@ -78,6 +91,12 @@ struct uvm_gpu_tracking_semaphore_struct
|
||||
// Create a semaphore pool for a GPU.
|
||||
NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);
|
||||
|
||||
// When the Confidential Computing feature is enabled, semaphore pools
|
||||
// associated with CE channels are allocated in the CPR of vidmem and as such
|
||||
// have all the associated access restrictions. Because of this, they're called
|
||||
// secure pools and secure semaphores are allocated out of said secure pools.
|
||||
NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);
|
||||
|
||||
// Destroy a semaphore pool
|
||||
// Locking:
|
||||
// - Global lock needs to be held in read mode (for unmapping from all GPUs)
|
||||
@@ -90,6 +109,9 @@ void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool);
|
||||
// Allocate a semaphore from the pool.
|
||||
// The semaphore will be mapped on all GPUs currently registered with the UVM
|
||||
// driver, and on all new GPUs which will be registered in the future.
|
||||
// Unless the Confidential Computing feature is enabled and the pool is a
|
||||
// secure pool. In this case, it is only mapped to the GPU that holds the
|
||||
// allocation.
|
||||
// The mappings are added to UVM's internal address space, and (in SR-IOV heavy)
|
||||
// to the proxy address space.
|
||||
//
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -53,6 +53,7 @@ MODULE_PARM_DESC(uvm_downgrade_force_membar_sys, "Force all TLB invalidation dow
|
||||
#define ARCH_OP_COUNT (sizeof(uvm_arch_hal_t) / sizeof(void *))
|
||||
#define FAULT_BUFFER_OP_COUNT (sizeof(uvm_fault_buffer_hal_t) / sizeof(void *))
|
||||
#define ACCESS_COUNTER_BUFFER_OP_COUNT (sizeof(uvm_access_counter_buffer_hal_t) / sizeof(void *))
|
||||
#define SEC2_OP_COUNT (sizeof(uvm_sec2_hal_t) / sizeof(void *))
|
||||
|
||||
// Table for copy engine functions.
|
||||
// Each entry is associated with a copy engine class through the 'class' field.
|
||||
@@ -73,6 +74,7 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.offset_in_out = uvm_hal_maxwell_ce_offset_in_out,
|
||||
.phys_mode = uvm_hal_maxwell_ce_phys_mode,
|
||||
.plc_mode = uvm_hal_maxwell_ce_plc_mode,
|
||||
.memcopy_copy_type = uvm_hal_maxwell_ce_memcopy_copy_type,
|
||||
.memcopy_is_valid = uvm_hal_ce_memcopy_is_valid_stub,
|
||||
.memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
|
||||
.memcopy = uvm_hal_maxwell_ce_memcopy,
|
||||
@@ -82,6 +84,8 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.memset_4 = uvm_hal_maxwell_ce_memset_4,
|
||||
.memset_8 = uvm_hal_maxwell_ce_memset_8,
|
||||
.memset_v_4 = uvm_hal_maxwell_ce_memset_v_4,
|
||||
.encrypt = uvm_hal_maxwell_ce_encrypt_unsupported,
|
||||
.decrypt = uvm_hal_maxwell_ce_decrypt_unsupported,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -149,11 +153,14 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.semaphore_reduction_inc = uvm_hal_hopper_ce_semaphore_reduction_inc,
|
||||
.offset_out = uvm_hal_hopper_ce_offset_out,
|
||||
.offset_in_out = uvm_hal_hopper_ce_offset_in_out,
|
||||
.memcopy_copy_type = uvm_hal_hopper_ce_memcopy_copy_type,
|
||||
.memset_1 = uvm_hal_hopper_ce_memset_1,
|
||||
.memset_4 = uvm_hal_hopper_ce_memset_4,
|
||||
.memset_8 = uvm_hal_hopper_ce_memset_8,
|
||||
.memcopy_is_valid = uvm_hal_hopper_ce_memcopy_is_valid,
|
||||
.memset_is_valid = uvm_hal_hopper_ce_memset_is_valid,
|
||||
.encrypt = uvm_hal_hopper_ce_encrypt,
|
||||
.decrypt = uvm_hal_hopper_ce_decrypt,
|
||||
},
|
||||
},
|
||||
};
|
||||
@@ -366,11 +373,12 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
.read_get = uvm_hal_maxwell_fault_buffer_read_get_unsupported,
|
||||
.write_get = uvm_hal_maxwell_fault_buffer_write_get_unsupported,
|
||||
.get_ve_id = uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported,
|
||||
.parse_entry = uvm_hal_maxwell_fault_buffer_parse_entry_unsupported,
|
||||
.parse_replayable_entry = uvm_hal_maxwell_fault_buffer_parse_replayable_entry_unsupported,
|
||||
.entry_is_valid = uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported,
|
||||
.entry_clear_valid = uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported,
|
||||
.entry_size = uvm_hal_maxwell_fault_buffer_entry_size_unsupported,
|
||||
.parse_non_replayable_entry = uvm_hal_maxwell_fault_buffer_parse_non_replayable_entry_unsupported,
|
||||
.get_fault_type = uvm_hal_maxwell_fault_buffer_get_fault_type_unsupported,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -388,10 +396,11 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
.read_put = uvm_hal_pascal_fault_buffer_read_put,
|
||||
.read_get = uvm_hal_pascal_fault_buffer_read_get,
|
||||
.write_get = uvm_hal_pascal_fault_buffer_write_get,
|
||||
.parse_entry = uvm_hal_pascal_fault_buffer_parse_entry,
|
||||
.parse_replayable_entry = uvm_hal_pascal_fault_buffer_parse_replayable_entry,
|
||||
.entry_is_valid = uvm_hal_pascal_fault_buffer_entry_is_valid,
|
||||
.entry_clear_valid = uvm_hal_pascal_fault_buffer_entry_clear_valid,
|
||||
.entry_size = uvm_hal_pascal_fault_buffer_entry_size,
|
||||
.get_fault_type = uvm_hal_pascal_fault_buffer_get_fault_type,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -402,8 +411,9 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
.read_get = uvm_hal_volta_fault_buffer_read_get,
|
||||
.write_get = uvm_hal_volta_fault_buffer_write_get,
|
||||
.get_ve_id = uvm_hal_volta_fault_buffer_get_ve_id,
|
||||
.parse_entry = uvm_hal_volta_fault_buffer_parse_entry,
|
||||
.parse_replayable_entry = uvm_hal_volta_fault_buffer_parse_replayable_entry,
|
||||
.parse_non_replayable_entry = uvm_hal_volta_fault_buffer_parse_non_replayable_entry,
|
||||
.get_fault_type = uvm_hal_volta_fault_buffer_get_fault_type,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -495,6 +505,59 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t sec2_table[] =
|
||||
{
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
|
||||
.u.sec2_ops = {
|
||||
.init = uvm_hal_maxwell_sec2_init_noop,
|
||||
.decrypt = uvm_hal_maxwell_sec2_decrypt_unsupported,
|
||||
.semaphore_release = uvm_hal_maxwell_sec2_semaphore_release_unsupported,
|
||||
.semaphore_timestamp = uvm_hal_maxwell_sec2_semaphore_timestamp_unsupported,
|
||||
}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
|
||||
.u.sec2_ops = {}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
|
||||
.u.sec2_ops = {}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
|
||||
.u.sec2_ops = {}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
|
||||
.u.sec2_ops = {}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
|
||||
.u.sec2_ops = {}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
|
||||
.u.sec2_ops = {}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
|
||||
.u.sec2_ops = {
|
||||
.init = uvm_hal_hopper_sec2_init,
|
||||
.semaphore_release = uvm_hal_hopper_sec2_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_hopper_sec2_semaphore_timestamp_unsupported,
|
||||
.decrypt = uvm_hal_hopper_sec2_decrypt,
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
static inline uvm_hal_class_ops_t *ops_find_by_id(uvm_hal_class_ops_t *table, NvU32 row_count, NvU32 id)
|
||||
{
|
||||
NvLength i;
|
||||
@@ -598,6 +661,15 @@ NV_STATUS uvm_hal_init_table(void)
|
||||
return status;
|
||||
}
|
||||
|
||||
status = ops_init_from_parent(sec2_table,
|
||||
ARRAY_SIZE(sec2_table),
|
||||
SEC2_OP_COUNT,
|
||||
offsetof(uvm_hal_class_ops_t, u.sec2_ops));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("ops_init_from_parent(sec2_table) failed: %s\n", nvstatusToString(status));
|
||||
return status;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -648,6 +720,14 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counter_buffer_hal = &class_ops->u.access_counter_buffer_ops;
|
||||
|
||||
class_ops = ops_find_by_id(sec2_table, ARRAY_SIZE(sec2_table), gpu_info->gpuArch);
|
||||
if (class_ops == NULL) {
|
||||
UVM_ERR_PRINT("SEC2 HAL not found, GPU %s, arch: 0x%X\n", parent_gpu->name, gpu_info->gpuArch);
|
||||
return NV_ERR_INVALID_CLASS;
|
||||
}
|
||||
|
||||
parent_gpu->sec2_hal = &class_ops->u.sec2_ops;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -658,6 +738,9 @@ static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
// TODO: Bug 200692962: Add support for access counters in vGPU
|
||||
if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE)
|
||||
parent_gpu->access_counters_supported = false;
|
||||
// Access counters are not supported in CC.
|
||||
else if (uvm_conf_computing_mode_enabled_parent(parent_gpu))
|
||||
parent_gpu->access_counters_supported = false;
|
||||
}
|
||||
|
||||
void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
@@ -711,7 +794,7 @@ uvm_membar_t uvm_hal_downgrade_membar_type(uvm_gpu_t *gpu, bool is_local_vidmem)
|
||||
// memory, including those from other processors like the CPU or peer GPUs,
|
||||
// must come through this GPU's L2. In all current architectures, MEMBAR_GPU
|
||||
// is sufficient to resolve ordering at the L2 level.
|
||||
if (is_local_vidmem && !gpu->parent->numa_info.enabled && !uvm_downgrade_force_membar_sys)
|
||||
if (is_local_vidmem && !uvm_gpu_is_coherent(gpu->parent) && !uvm_downgrade_force_membar_sys)
|
||||
return UVM_MEMBAR_GPU;
|
||||
|
||||
// If the mapped memory was remote, or if a coherence protocol can cache
|
||||
@@ -895,7 +978,7 @@ void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
|
||||
{
|
||||
}
|
||||
|
||||
bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t num_elements, size_t element_size)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -40,6 +40,8 @@ typedef void (*uvm_hal_init_t)(uvm_push_t *push);
|
||||
void uvm_hal_maxwell_ce_init(uvm_push_t *push);
|
||||
void uvm_hal_maxwell_host_init_noop(uvm_push_t *push);
|
||||
void uvm_hal_pascal_host_init(uvm_push_t *push);
|
||||
void uvm_hal_maxwell_sec2_init_noop(uvm_push_t *push);
|
||||
void uvm_hal_hopper_sec2_init(uvm_push_t *push);
|
||||
|
||||
// Host method validation
|
||||
typedef bool (*uvm_hal_host_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
@@ -207,9 +209,11 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
typedef void (*uvm_hal_semaphore_release_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_sec2_semaphore_release_unsupported(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_volta_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_hopper_sec2_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
@@ -228,15 +232,30 @@ void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_volta_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_hopper_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
|
||||
void uvm_hal_maxwell_sec2_semaphore_timestamp_unsupported(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_hopper_sec2_semaphore_timestamp_unsupported(uvm_push_t *push, NvU64 gpu_va);
|
||||
|
||||
typedef void (*uvm_hal_semaphore_acquire_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_turing_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
||||
void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
||||
void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
||||
void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
||||
typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry,
|
||||
NvU64 pushbuffer_va,
|
||||
NvU32 pushbuffer_length,
|
||||
uvm_gpfifo_sync_t sync_flag);
|
||||
void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry,
|
||||
NvU64 pushbuffer_va,
|
||||
NvU32 pushbuffer_length,
|
||||
uvm_gpfifo_sync_t sync_flag);
|
||||
void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry,
|
||||
NvU64 pushbuffer_va,
|
||||
NvU32 pushbuffer_length,
|
||||
uvm_gpfifo_sync_t sync_flag);
|
||||
void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry,
|
||||
NvU64 pushbuffer_va,
|
||||
NvU32 pushbuffer_length,
|
||||
uvm_gpfifo_sync_t sync_flag);
|
||||
|
||||
typedef void (*uvm_hal_host_set_gpfifo_noop_t)(NvU64 *fifo_entry);
|
||||
void uvm_hal_maxwell_host_set_gpfifo_noop(NvU64 *fifo_entry);
|
||||
@@ -273,6 +292,10 @@ typedef NvU32 (*uvm_hal_ce_plc_mode_t)(void);
|
||||
NvU32 uvm_hal_maxwell_ce_plc_mode(void);
|
||||
NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void);
|
||||
|
||||
typedef NvU32 (*uvm_hal_ce_memcopy_type_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
NvU32 uvm_hal_maxwell_ce_memcopy_copy_type(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
NvU32 uvm_hal_hopper_ce_memcopy_copy_type(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
|
||||
// CE method validation
|
||||
typedef bool (*uvm_hal_ce_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
@@ -309,10 +332,19 @@ void uvm_hal_maxwell_ce_memcopy_v_to_v(uvm_push_t *push, NvU64 dst, NvU64 src, s
|
||||
// The validation happens at the start of the memset (uvm_hal_memset_*_t)
|
||||
// execution. Use uvm_hal_ce_memset_is_valid_stub to skip the validation for
|
||||
// a given architecture.
|
||||
typedef bool (*uvm_hal_ce_memset_is_valid)(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
typedef bool (*uvm_hal_ce_memset_is_valid)(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
size_t num_elements,
|
||||
size_t element_size);
|
||||
bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t num_elements, size_t element_size);
|
||||
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
size_t num_elements,
|
||||
size_t element_size);
|
||||
bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
size_t num_elements,
|
||||
size_t element_size);
|
||||
|
||||
// Memset size bytes at dst to a given N-byte input value.
|
||||
//
|
||||
@@ -342,6 +374,54 @@ void uvm_hal_hopper_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 va
|
||||
void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
|
||||
void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
|
||||
|
||||
// Encrypts the contents of the source buffer into the destination buffer, up to
|
||||
// the given size. The authentication tag of the encrypted contents is written
|
||||
// to auth_tag, so it can be verified later on by a decrypt operation.
|
||||
//
|
||||
// The addressing modes of the destination and authentication tag addresses
|
||||
// should match. If the addressing mode is physical, then the address apertures
|
||||
// should also match.
|
||||
typedef void (*uvm_hal_ce_encrypt_t)(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
uvm_gpu_address_t src,
|
||||
NvU32 size,
|
||||
uvm_gpu_address_t auth_tag);
|
||||
|
||||
// Decrypts the contents of the source buffer into the destination buffer, up to
|
||||
// the given size. The method also verifies the integrity of the encrypted
|
||||
// buffer by calculating its authentication tag, and comparing it with the one
|
||||
// provided as argument.
|
||||
//
|
||||
// The addressing modes of the source and authentication tag addresses should
|
||||
// match. If the addressing mode is physical, then the address apertures should
|
||||
// also match.
|
||||
typedef void (*uvm_hal_ce_decrypt_t)(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
uvm_gpu_address_t src,
|
||||
NvU32 size,
|
||||
uvm_gpu_address_t auth_tag);
|
||||
|
||||
void uvm_hal_maxwell_ce_encrypt_unsupported(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
uvm_gpu_address_t src,
|
||||
NvU32 size,
|
||||
uvm_gpu_address_t auth_tag);
|
||||
void uvm_hal_maxwell_ce_decrypt_unsupported(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
uvm_gpu_address_t src,
|
||||
NvU32 size,
|
||||
uvm_gpu_address_t auth_tag);
|
||||
void uvm_hal_hopper_ce_encrypt(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
uvm_gpu_address_t src,
|
||||
NvU32 size,
|
||||
uvm_gpu_address_t auth_tag);
|
||||
void uvm_hal_hopper_ce_decrypt(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
uvm_gpu_address_t src,
|
||||
NvU32 size,
|
||||
uvm_gpu_address_t auth_tag);
|
||||
|
||||
// Increments the semaphore by 1, or resets to 0 if the incremented value would
|
||||
// exceed the payload.
|
||||
//
|
||||
@@ -405,15 +485,29 @@ typedef NvU32 (*uvm_hal_fault_buffer_read_get_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
typedef void (*uvm_hal_fault_buffer_write_get_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
typedef NvU8 (*uvm_hal_fault_buffer_get_ve_id_t)(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
|
||||
|
||||
// Parse the entry on the given buffer index. This also clears the valid bit of
|
||||
// the entry in the buffer.
|
||||
typedef void (*uvm_hal_fault_buffer_parse_entry_t)(uvm_parent_gpu_t *gpu,
|
||||
NvU32 index,
|
||||
uvm_fault_buffer_entry_t *buffer_entry);
|
||||
// Parse the replayable entry at the given buffer index. This also clears the
|
||||
// valid bit of the entry in the buffer.
|
||||
typedef NV_STATUS (*uvm_hal_fault_buffer_parse_replayable_entry_t)(uvm_parent_gpu_t *gpu,
|
||||
NvU32 index,
|
||||
uvm_fault_buffer_entry_t *buffer_entry);
|
||||
|
||||
NV_STATUS uvm_hal_maxwell_fault_buffer_parse_replayable_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 index,
|
||||
uvm_fault_buffer_entry_t *buffer_entry);
|
||||
|
||||
NV_STATUS uvm_hal_pascal_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 index,
|
||||
uvm_fault_buffer_entry_t *buffer_entry);
|
||||
|
||||
NV_STATUS uvm_hal_volta_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 index,
|
||||
uvm_fault_buffer_entry_t *buffer_entry);
|
||||
|
||||
typedef bool (*uvm_hal_fault_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
typedef void (*uvm_hal_fault_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
typedef NvU32 (*uvm_hal_fault_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
typedef void (*uvm_hal_fault_buffer_replay_t)(uvm_push_t *push, uvm_fault_replay_type_t type);
|
||||
typedef uvm_fault_type_t (*uvm_hal_fault_buffer_get_fault_type_t)(const NvU32 *fault_entry);
|
||||
typedef void (*uvm_hal_fault_cancel_global_t)(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr);
|
||||
typedef void (*uvm_hal_fault_cancel_targeted_t)(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t instance_ptr,
|
||||
@@ -427,25 +521,24 @@ NvU32 uvm_hal_maxwell_fault_buffer_read_put_unsupported(uvm_parent_gpu_t *parent
|
||||
NvU32 uvm_hal_maxwell_fault_buffer_read_get_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_fault_buffer_write_get_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
|
||||
void uvm_hal_maxwell_fault_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 index,
|
||||
uvm_fault_buffer_entry_t *buffer_entry);
|
||||
uvm_fault_type_t uvm_hal_maxwell_fault_buffer_get_fault_type_unsupported(const NvU32 *fault_entry);
|
||||
|
||||
void uvm_hal_pascal_enable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_pascal_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_pascal_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
NvU32 uvm_hal_pascal_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
|
||||
NvU32 uvm_hal_pascal_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_pascal_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
void uvm_hal_pascal_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 index,
|
||||
uvm_fault_buffer_entry_t *buffer_entry);
|
||||
|
||||
uvm_fault_type_t uvm_hal_pascal_fault_buffer_get_fault_type(const NvU32 *fault_entry);
|
||||
|
||||
NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
|
||||
NvU32 uvm_hal_volta_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
NvU8 uvm_hal_volta_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
|
||||
void uvm_hal_volta_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 index,
|
||||
uvm_fault_buffer_entry_t *buffer_entry);
|
||||
|
||||
uvm_fault_type_t uvm_hal_volta_fault_buffer_get_fault_type(const NvU32 *fault_entry);
|
||||
|
||||
void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_turing_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
|
||||
@@ -586,6 +679,28 @@ void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
|
||||
void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
|
||||
// The source and destination addresses must be 16-byte aligned. Note that the
|
||||
// best performance is achieved with 256-byte alignment. The decrypt size must
|
||||
// be larger than 0, and a multiple of 4 bytes.
|
||||
//
|
||||
// The authentication tag address must also be 16-byte aligned.
|
||||
// The authentication tag buffer size is UVM_CONF_COMPUTING_AUTH_TAG_SIZE bytes
|
||||
// defined in uvm_conf_computing.h.
|
||||
//
|
||||
// Decrypts the src buffer into the dst buffer of the given size.
|
||||
// The method also verifies integrity of the src buffer by calculating its
|
||||
// authentication tag and comparing it with the provided one.
|
||||
//
|
||||
// Note: SEC2 does not support encryption.
|
||||
typedef void (*uvm_hal_sec2_decrypt_t)(uvm_push_t *push, NvU64 dst_va, NvU64 src_va, NvU32 size, NvU64 auth_tag_va);
|
||||
|
||||
void uvm_hal_maxwell_sec2_decrypt_unsupported(uvm_push_t *push,
|
||||
NvU64 dst_va,
|
||||
NvU64 src_va,
|
||||
NvU32 size,
|
||||
NvU64 auth_tag_va);
|
||||
void uvm_hal_hopper_sec2_decrypt(uvm_push_t *push, NvU64 dst_va, NvU64 src_va, NvU32 size, NvU64 auth_tag_va);
|
||||
|
||||
struct uvm_host_hal_struct
|
||||
{
|
||||
uvm_hal_init_t init;
|
||||
@@ -629,6 +744,7 @@ struct uvm_ce_hal_struct
|
||||
uvm_hal_ce_offset_in_out_t offset_in_out;
|
||||
uvm_hal_ce_phys_mode_t phys_mode;
|
||||
uvm_hal_ce_plc_mode_t plc_mode;
|
||||
uvm_hal_ce_memcopy_type_t memcopy_copy_type;
|
||||
uvm_hal_ce_memcopy_is_valid memcopy_is_valid;
|
||||
uvm_hal_ce_memcopy_patch_src memcopy_patch_src;
|
||||
uvm_hal_memcopy_t memcopy;
|
||||
@@ -639,6 +755,8 @@ struct uvm_ce_hal_struct
|
||||
uvm_hal_memset_8_t memset_8;
|
||||
uvm_hal_memset_v_4_t memset_v_4;
|
||||
uvm_hal_semaphore_reduction_inc_t semaphore_reduction_inc;
|
||||
uvm_hal_ce_encrypt_t encrypt;
|
||||
uvm_hal_ce_decrypt_t decrypt;
|
||||
};
|
||||
|
||||
struct uvm_arch_hal_struct
|
||||
@@ -660,11 +778,12 @@ struct uvm_fault_buffer_hal_struct
|
||||
uvm_hal_fault_buffer_read_get_t read_get;
|
||||
uvm_hal_fault_buffer_write_get_t write_get;
|
||||
uvm_hal_fault_buffer_get_ve_id_t get_ve_id;
|
||||
uvm_hal_fault_buffer_parse_entry_t parse_entry;
|
||||
uvm_hal_fault_buffer_parse_replayable_entry_t parse_replayable_entry;
|
||||
uvm_hal_fault_buffer_entry_is_valid_t entry_is_valid;
|
||||
uvm_hal_fault_buffer_entry_clear_valid_t entry_clear_valid;
|
||||
uvm_hal_fault_buffer_entry_size_t entry_size;
|
||||
uvm_hal_fault_buffer_parse_non_replayable_entry_t parse_non_replayable_entry;
|
||||
uvm_hal_fault_buffer_get_fault_type_t get_fault_type;
|
||||
};
|
||||
|
||||
struct uvm_access_counter_buffer_hal_struct
|
||||
@@ -678,6 +797,14 @@ struct uvm_access_counter_buffer_hal_struct
|
||||
uvm_hal_access_counter_buffer_entry_size_t entry_size;
|
||||
};
|
||||
|
||||
struct uvm_sec2_hal_struct
|
||||
{
|
||||
uvm_hal_init_t init;
|
||||
uvm_hal_sec2_decrypt_t decrypt;
|
||||
uvm_hal_semaphore_release_t semaphore_release;
|
||||
uvm_hal_semaphore_timestamp_t semaphore_timestamp;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// id is either a hardware class or GPU architecture
|
||||
@@ -700,6 +827,8 @@ typedef struct
|
||||
// access_counter_buffer_ops: id is an architecture
|
||||
uvm_access_counter_buffer_hal_t access_counter_buffer_ops;
|
||||
|
||||
// sec2_ops: id is an architecture
|
||||
uvm_sec2_hal_t sec2_ops;
|
||||
} u;
|
||||
} uvm_hal_class_ops_t;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2019 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -111,6 +111,11 @@ typedef struct
|
||||
|
||||
// Whether the address is virtual
|
||||
bool is_virtual;
|
||||
|
||||
// Whether the address resides in a non-protected memory region when the
|
||||
// Confidential Computing feature is enabled. Default is protected.
|
||||
// Ignored if the feature is disabled and should not be used.
|
||||
bool is_unprotected;
|
||||
} uvm_gpu_address_t;
|
||||
|
||||
// Create a virtual GPU address
|
||||
@@ -123,6 +128,13 @@ static uvm_gpu_address_t uvm_gpu_address_virtual(NvU64 va)
|
||||
return address;
|
||||
}
|
||||
|
||||
static uvm_gpu_address_t uvm_gpu_address_virtual_unprotected(NvU64 va)
|
||||
{
|
||||
uvm_gpu_address_t address = uvm_gpu_address_virtual(va);
|
||||
address.is_unprotected = true;
|
||||
return address;
|
||||
}
|
||||
|
||||
// Create a physical GPU address
|
||||
static uvm_gpu_address_t uvm_gpu_address_physical(uvm_aperture_t aperture, NvU64 pa)
|
||||
{
|
||||
@@ -258,8 +270,8 @@ typedef enum
|
||||
UVM_FAULT_CANCEL_VA_MODE_COUNT,
|
||||
} uvm_fault_cancel_va_mode_t;
|
||||
|
||||
// Types of faults that can show up in the fault buffer. Non-UVM related faults are grouped in FATAL category
|
||||
// since we don't care about the specific type
|
||||
// Types of faults that can show up in the fault buffer. Non-UVM related faults
|
||||
// are grouped in FATAL category since we don't care about the specific type.
|
||||
typedef enum
|
||||
{
|
||||
UVM_FAULT_TYPE_INVALID_PDE = 0,
|
||||
@@ -272,7 +284,8 @@ typedef enum
|
||||
// READ to WRITE-ONLY (ATS)
|
||||
UVM_FAULT_TYPE_READ,
|
||||
|
||||
// The next values are considered fatal and are not handled by the UVM driver
|
||||
// The next values are considered fatal and are not handled by the UVM
|
||||
// driver
|
||||
UVM_FAULT_TYPE_FATAL,
|
||||
|
||||
// Values required for tools
|
||||
@@ -311,10 +324,24 @@ typedef enum
|
||||
UVM_MMU_ENGINE_TYPE_COUNT,
|
||||
} uvm_mmu_engine_type_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
// Allow entry to be fetched before the previous entry finishes ESCHED
|
||||
// execution.
|
||||
UVM_GPFIFO_SYNC_PROCEED = 0,
|
||||
|
||||
// Fetch of this entry has to wait until the previous entry has finished
|
||||
// executing by ESCHED.
|
||||
// For a complete engine sync the previous entry needs to include
|
||||
// WAIT_FOR_IDLE command or other engine synchronization.
|
||||
UVM_GPFIFO_SYNC_WAIT,
|
||||
} uvm_gpfifo_sync_t;
|
||||
|
||||
const char *uvm_mmu_engine_type_string(uvm_mmu_engine_type_t mmu_engine_type);
|
||||
|
||||
// HW unit that triggered the fault. We include the fields required for fault cancelling. Including more information
|
||||
// might be useful for performance heuristics in the future
|
||||
// HW unit that triggered the fault. We include the fields required for fault
|
||||
// cancelling. Including more information might be useful for performance
|
||||
// heuristics in the future.
|
||||
typedef struct
|
||||
{
|
||||
uvm_fault_client_type_t client_type : order_base_2(UVM_FAULT_CLIENT_TYPE_COUNT) + 1;
|
||||
@@ -429,7 +456,8 @@ typedef enum
|
||||
// Completes when all fault replays are in-flight
|
||||
UVM_FAULT_REPLAY_TYPE_START = 0,
|
||||
|
||||
// Completes when all faulting accesses have been correctly translated or faulted again
|
||||
// Completes when all faulting accesses have been correctly translated or
|
||||
// faulted again
|
||||
UVM_FAULT_REPLAY_TYPE_START_ACK_ALL,
|
||||
|
||||
UVM_FAULT_REPLAY_TYPE_MAX
|
||||
@@ -467,18 +495,18 @@ struct uvm_access_counter_buffer_entry_struct
|
||||
{
|
||||
struct
|
||||
{
|
||||
// Instance pointer of one of the channels in the TSG that triggered the
|
||||
// notification
|
||||
// Instance pointer of one of the channels in the TSG that triggered
|
||||
// the notification.
|
||||
uvm_gpu_phys_address_t instance_ptr;
|
||||
|
||||
uvm_mmu_engine_type_t mmu_engine_type;
|
||||
|
||||
NvU32 mmu_engine_id;
|
||||
|
||||
// Identifier of the subcontext that performed the memory accesses that
|
||||
// triggered the notification. This value, combined with the instance_ptr,
|
||||
// is needed to obtain the GPU VA space of the process that triggered the
|
||||
// notification.
|
||||
// Identifier of the subcontext that performed the memory accesses
|
||||
// that triggered the notification. This value, combined with the
|
||||
// instance_ptr, is needed to obtain the GPU VA space of the process
|
||||
// that triggered the notification.
|
||||
NvU32 ve_id;
|
||||
|
||||
// VA space for the address that triggered the notification
|
||||
@@ -524,8 +552,8 @@ static uvm_prot_t uvm_fault_access_type_to_prot(uvm_fault_access_type_t access_t
|
||||
return UVM_PROT_READ_WRITE;
|
||||
|
||||
default:
|
||||
// Prefetch faults, if not ignored, are handled like read faults and require
|
||||
// a mapping with, at least, READ_ONLY access permission
|
||||
// Prefetch faults, if not ignored, are handled like read faults and
|
||||
// requirea mapping with, at least, READ_ONLY access permission.
|
||||
return UVM_PROT_READ_ONLY;
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -49,9 +49,7 @@ typedef struct
|
||||
bool uvm_hmm_is_enabled_system_wide(void);
|
||||
|
||||
// Initialize HMM for the given the va_space.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
|
||||
// and the va_space lock must be held in write mode.
|
||||
NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space);
|
||||
void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space);
|
||||
|
||||
// Destroy any HMM state for the given the va_space.
|
||||
// Locking: va_space lock must be held in write mode.
|
||||
@@ -84,58 +82,58 @@ typedef struct
|
||||
NvU64 addr,
|
||||
uvm_va_block_t **va_block_ptr);
|
||||
|
||||
// Find an existing HMM va_block when processing a CPU fault and try to
|
||||
// isolate and lock the faulting page.
|
||||
// Return NV_ERR_INVALID_ADDRESS if the block is not found,
|
||||
// NV_ERR_BUSY_RETRY if the page could not be locked, and
|
||||
// NV_OK if the block is found and the page is locked. Also,
|
||||
// uvm_hmm_cpu_fault_finish() must be called if NV_OK is returned.
|
||||
// Locking: This must be called with the vma->vm_mm locked and the va_space
|
||||
// read locked.
|
||||
NV_STATUS uvm_hmm_va_block_cpu_find(uvm_va_space_t *va_space,
|
||||
uvm_service_block_context_t *service_context,
|
||||
struct vm_fault *vmf,
|
||||
uvm_va_block_t **va_block_ptr);
|
||||
|
||||
// This must be called after uvm_va_block_cpu_fault() if
|
||||
// uvm_hmm_va_block_cpu_find() returns NV_OK.
|
||||
// Locking: This must be called with the vma->vm_mm locked and the va_space
|
||||
// read locked.
|
||||
void uvm_hmm_cpu_fault_finish(uvm_service_block_context_t *service_context);
|
||||
|
||||
// Find or create a new HMM va_block.
|
||||
//
|
||||
// Return NV_ERR_INVALID_ADDRESS if there is no VMA associated with the
|
||||
// address 'addr' or the VMA does not have at least PROT_READ permission.
|
||||
// The caller is also responsible for checking that there is no UVM
|
||||
// va_range covering the given address before calling this function.
|
||||
// If va_block_context is not NULL, the VMA is cached in
|
||||
// va_block_context->hmm.vma.
|
||||
// The VMA is returned in vma_out if it's not NULL.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read and the va_space lock at least for read.
|
||||
NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct **vma_out,
|
||||
uvm_va_block_t **va_block_ptr);
|
||||
|
||||
// Find the VMA for the given address and set va_block_context->hmm.vma.
|
||||
// Return NV_ERR_INVALID_ADDRESS if va_block_context->mm is NULL or there
|
||||
// is no VMA associated with the address 'addr' or the VMA does not have at
|
||||
// least PROT_READ permission.
|
||||
// Find the VMA for the given address and return it in vma_out. Return
|
||||
// NV_ERR_INVALID_ADDRESS if mm is NULL or there is no VMA associated with
|
||||
// the address 'addr' or the VMA does not have at least PROT_READ
|
||||
// permission.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read or mm equal to NULL.
|
||||
NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr);
|
||||
NV_STATUS uvm_hmm_find_vma(struct mm_struct *mm, struct vm_area_struct **vma_out, NvU64 addr);
|
||||
|
||||
// If va_block is a HMM va_block, check that va_block_context->hmm.vma is
|
||||
// not NULL and covers the given region. This always returns true and is
|
||||
// intended to only be used with UVM_ASSERT().
|
||||
// If va_block is a HMM va_block, check that vma is not NULL and covers the
|
||||
// given region. This always returns true and is intended to only be used
|
||||
// with UVM_ASSERT().
|
||||
// Locking: This function must be called with the va_block lock held and if
|
||||
// va_block is a HMM block, va_block_context->mm must be retained and
|
||||
// locked for at least read.
|
||||
// va_block is a HMM block, va_space->va_space_mm.mm->mmap_lock must be
|
||||
// retained and locked for at least read.
|
||||
bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *vma,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Initialize the HMM portion of the service_context.
|
||||
// This should be called one time before any retry loops calling
|
||||
// uvm_va_block_service_locked().
|
||||
void uvm_hmm_service_context_init(uvm_service_block_context_t *service_context);
|
||||
|
||||
// Begin a migration critical section. When calling into the kernel it is
|
||||
// sometimes necessary to drop the va_block lock. This function returns
|
||||
// NV_OK when no other thread has started a migration critical section.
|
||||
// Otherwise, it returns NV_ERR_BUSY_RETRY and threads should then retry
|
||||
// this function to begin a critical section.
|
||||
// Locking: va_block lock must not be held.
|
||||
NV_STATUS uvm_hmm_migrate_begin(uvm_va_block_t *va_block);
|
||||
|
||||
// Same as uvm_hmm_migrate_begin() but waits if required before beginning a
|
||||
// critical section.
|
||||
void uvm_hmm_migrate_begin_wait(uvm_va_block_t *va_block);
|
||||
|
||||
// Finish a migration critical section.
|
||||
void uvm_hmm_migrate_finish(uvm_va_block_t *va_block);
|
||||
|
||||
// Find or create a HMM va_block and mark it so the next va_block split
|
||||
// will fail for testing purposes.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
@@ -224,31 +222,29 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// This function assigns va_block_context->policy to the policy covering
|
||||
// the given address 'addr' and assigns the ending address '*endp' to the
|
||||
// minimum of va_block->end, va_block_context->hmm.vma->vm_end - 1, and the
|
||||
// ending address of the policy range. Note that va_block_context->hmm.vma
|
||||
// is expected to be initialized before calling this function.
|
||||
// Locking: This function must be called with
|
||||
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
|
||||
// the va_block lock held.
|
||||
void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
unsigned long addr,
|
||||
NvU64 *endp);
|
||||
// This function returns the policy covering the given address 'addr' and
|
||||
// assigns the ending address '*endp' to the minimum of va_block->end,
|
||||
// vma->vm_end - 1, and the ending address of the policy range. Locking:
|
||||
// This function must be called with vma->vm_mm retained and locked for at
|
||||
// least read and the va_block and va_space lock held.
|
||||
const uvm_va_policy_t *uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
NvU64 *endp);
|
||||
|
||||
// This function finds the VMA for the page index 'page_index' and assigns
|
||||
// it to va_block_context->vma, sets va_block_context->policy to the policy
|
||||
// covering the given address, and sets the ending page range '*outerp'
|
||||
// to the minimum of *outerp, va_block_context->hmm.vma->vm_end - 1, the
|
||||
// ending address of the policy range, and va_block->end.
|
||||
// Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise, NV_OK.
|
||||
// Locking: This function must be called with
|
||||
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
|
||||
// the va_block lock held.
|
||||
// This function finds the VMA for the page index 'page_index' and returns
|
||||
// it in vma_out which must not be NULL. Returns the policy covering the
|
||||
// given address, and sets the ending page range '*outerp' to the minimum of
|
||||
// *outerp, vma->vm_end - 1, the ending address of the policy range, and
|
||||
// va_block->end.
|
||||
// Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise sets *vma
|
||||
// and returns NV_OK.
|
||||
// Locking: This function must be called with mm retained and locked for at
|
||||
// least read and the va_block and va_space lock held.
|
||||
NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct **vma,
|
||||
uvm_page_index_t page_index,
|
||||
const uvm_va_policy_t **policy,
|
||||
uvm_page_index_t *outerp);
|
||||
|
||||
// Clear thrashing policy information from all HMM va_blocks.
|
||||
@@ -257,24 +253,21 @@ typedef struct
|
||||
|
||||
// Return the expanded region around 'address' limited to the intersection
|
||||
// of va_block start/end, vma start/end, and policy start/end.
|
||||
// va_block_context must not be NULL, va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()), and
|
||||
// va_block_context->policy must be valid.
|
||||
// Locking: the caller must hold mm->mmap_lock in at least read mode, the
|
||||
// va_space lock must be held in at least read mode, and the va_block lock
|
||||
// held.
|
||||
// Locking: the caller must hold va_space->va_space_mm.mm->mmap_lock in at
|
||||
// least read mode, the va_space lock must be held in at least read mode,
|
||||
// and the va_block lock held.
|
||||
uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *vma,
|
||||
const uvm_va_policy_t *policy,
|
||||
NvU64 address);
|
||||
|
||||
// Return the logical protection allowed of a HMM va_block for the page at
|
||||
// the given address.
|
||||
// va_block_context must not be NULL and va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()).
|
||||
// Locking: the caller must hold va_block_context->mm mmap_lock in at least
|
||||
// read mode.
|
||||
// the given address within the vma which must be valid. This is usually
|
||||
// obtained from uvm_hmm_va_block_find_create()).
|
||||
// Locking: the caller must hold va_space->va_space_mm.mm mmap_lock in at
|
||||
// least read mode.
|
||||
uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 addr);
|
||||
|
||||
// This is called to service a GPU fault.
|
||||
@@ -287,9 +280,9 @@ typedef struct
|
||||
uvm_service_block_context_t *service_context);
|
||||
|
||||
// This is called to migrate a region within a HMM va_block.
|
||||
// va_block_context must not be NULL and va_block_context->policy and
|
||||
// va_block_context->hmm.vma must be valid.
|
||||
// Locking: the va_block_context->mm must be retained, mmap_lock must be
|
||||
// va_block_context must not be NULL and va_block_context->hmm.vma
|
||||
// must be valid.
|
||||
// Locking: the va_space->va_space_mm.mm must be retained, mmap_lock must be
|
||||
// locked, and the va_block lock held.
|
||||
NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
@@ -302,7 +295,7 @@ typedef struct
|
||||
// UvmMigrate().
|
||||
//
|
||||
// va_block_context must not be NULL. The caller is not required to set
|
||||
// va_block_context->policy or va_block_context->hmm.vma.
|
||||
// va_block_context->hmm.vma.
|
||||
//
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked and
|
||||
// the va_space read lock must be held.
|
||||
@@ -314,6 +307,11 @@ typedef struct
|
||||
uvm_migrate_mode_t mode,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// Evicts all va_blocks in the va_space to the CPU. Unlike the
|
||||
// other va_block eviction functions this is based on virtual
|
||||
// address and therefore takes mmap_lock for read.
|
||||
void uvm_hmm_evict_va_blocks(uvm_va_space_t *va_space);
|
||||
|
||||
// This sets the va_block_context->hmm.src_pfns[] to the ZONE_DEVICE private
|
||||
// PFN for the GPU chunk memory.
|
||||
NV_STATUS uvm_hmm_va_block_evict_chunk_prep(uvm_va_block_t *va_block,
|
||||
@@ -345,13 +343,13 @@ typedef struct
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Migrate a GPU chunk to system memory. This called to remove CPU page
|
||||
// table references to device private struct pages for the given GPU after
|
||||
// all other references in va_blocks have been released and the GPU is
|
||||
// in the process of being removed/torn down. Note that there is no mm,
|
||||
// VMA, va_block or any user channel activity on this GPU.
|
||||
NV_STATUS uvm_hmm_pmm_gpu_evict_chunk(uvm_gpu_t *gpu,
|
||||
uvm_gpu_chunk_t *gpu_chunk);
|
||||
// Migrate a GPU device-private page to system memory. This is
|
||||
// called to remove CPU page table references to device private
|
||||
// struct pages for the given GPU after all other references in
|
||||
// va_blocks have been released and the GPU is in the process of
|
||||
// being removed/torn down. Note that there is no mm, VMA,
|
||||
// va_block or any user channel activity on this GPU.
|
||||
NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn);
|
||||
|
||||
// This returns what would be the intersection of va_block start/end and
|
||||
// VMA start/end-1 for the given 'lookup_address' if
|
||||
@@ -406,9 +404,8 @@ typedef struct
|
||||
return false;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
|
||||
static void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
@@ -432,38 +429,43 @@ typedef struct
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_cpu_find(uvm_va_space_t *va_space,
|
||||
uvm_service_block_context_t *service_context,
|
||||
struct vm_fault *vmf,
|
||||
uvm_va_block_t **va_block_ptr)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static void uvm_hmm_cpu_fault_finish(uvm_service_block_context_t *service_context)
|
||||
{
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct **vma,
|
||||
uvm_va_block_t **va_block_ptr)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
|
||||
static NV_STATUS uvm_hmm_find_vma(struct mm_struct *mm, struct vm_area_struct **vma, NvU64 addr)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *vma,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static void uvm_hmm_service_context_init(uvm_service_block_context_t *service_context)
|
||||
{
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_migrate_begin(uvm_va_block_t *va_block)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_hmm_migrate_begin_wait(uvm_va_block_t *va_block)
|
||||
{
|
||||
}
|
||||
|
||||
static void uvm_hmm_migrate_finish(uvm_va_block_t *va_block)
|
||||
{
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
@@ -522,16 +524,19 @@ typedef struct
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
unsigned long addr,
|
||||
NvU64 *endp)
|
||||
static const uvm_va_policy_t *uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
NvU64 *endp)
|
||||
{
|
||||
UVM_ASSERT(0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct **vma,
|
||||
uvm_page_index_t page_index,
|
||||
const uvm_va_policy_t **policy,
|
||||
uvm_page_index_t *outerp)
|
||||
{
|
||||
return NV_OK;
|
||||
@@ -543,14 +548,15 @@ typedef struct
|
||||
}
|
||||
|
||||
static uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *vma,
|
||||
const uvm_va_policy_t *policy,
|
||||
NvU64 address)
|
||||
{
|
||||
return (uvm_va_block_region_t){};
|
||||
}
|
||||
|
||||
static uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 addr)
|
||||
{
|
||||
return UVM_PROT_NONE;
|
||||
@@ -586,6 +592,10 @@ typedef struct
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static void uvm_hmm_evict_va_blocks(uvm_va_space_t *va_space)
|
||||
{
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_evict_chunk_prep(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_gpu_chunk_t *gpu_chunk,
|
||||
@@ -612,8 +622,7 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_pmm_gpu_evict_chunk(uvm_gpu_t *gpu,
|
||||
uvm_gpu_chunk_t *gpu_chunk)
|
||||
static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -49,13 +49,23 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
// A single top level PDE on Hopper covers 64 PB and that's the minimum
|
||||
// size that can be used.
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 64ull * 1024 * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->rm_va_size = 64 * UVM_SIZE_1PB;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384 * UVM_SIZE_1TB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
// See uvm_mmu.h for mapping placement
|
||||
parent_gpu->flat_vidmem_va_base = (64 * UVM_SIZE_1PB) + (8 * UVM_SIZE_1TB);
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
// Physical CE writes to vidmem are non-coherent with respect to the CPU on
|
||||
// GH180.
|
||||
parent_gpu->ce_phys_vidmem_write_supported = !uvm_gpu_is_coherent(parent_gpu);
|
||||
|
||||
// TODO: Bug 4174553: [HGX-SkinnyJoe][GH180] channel errors discussion/debug
|
||||
// portion for the uvm tests became nonresponsive after
|
||||
// some time and then failed even after reboot
|
||||
parent_gpu->peer_copy_mode = uvm_gpu_is_coherent(parent_gpu) ?
|
||||
UVM_GPU_PEER_COPY_MODE_VIRTUAL : g_uvm_global.peer_copy_mode;
|
||||
|
||||
// All GR context buffers may be mapped to 57b wide VAs. All "compute" units
|
||||
// accessing GR context buffers support the 57-bit VA range.
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_conf_computing.h"
|
||||
#include "clc8b5.h"
|
||||
|
||||
static NvU32 ce_aperture(uvm_aperture_t aperture)
|
||||
@@ -97,7 +98,8 @@ void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 p
|
||||
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, hopper_get_flush_value(push) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_PAYLOAD_SIZE, ONE_WORD) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_SEMAPHORE_NO_TIMESTAMP) |
|
||||
launch_dma_plc_mode);
|
||||
}
|
||||
|
||||
@@ -114,7 +116,8 @@ void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, N
|
||||
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, hopper_get_flush_value(push) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_PAYLOAD_SIZE, ONE_WORD) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_SEMAPHORE_NO_TIMESTAMP) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_REDUCTION, INC) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_REDUCTION_SIGN, UNSIGNED) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_REDUCTION_ENABLE, TRUE) |
|
||||
@@ -135,7 +138,8 @@ void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, hopper_get_flush_value(push) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_FOUR_WORD_SEMAPHORE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_PAYLOAD_SIZE, ONE_WORD) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_SEMAPHORE_WITH_TIMESTAMP) |
|
||||
launch_dma_plc_mode);
|
||||
}
|
||||
|
||||
@@ -148,12 +152,46 @@ static NvU32 hopper_memset_push_phys_mode(uvm_push_t *push, uvm_gpu_address_t ds
|
||||
return HWCONST(C8B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
|
||||
}
|
||||
|
||||
static bool hopper_scrub_enable(uvm_gpu_address_t dst, size_t size)
|
||||
static bool va_is_flat_vidmem(uvm_gpu_t *gpu, NvU64 va)
|
||||
{
|
||||
return !dst.is_virtual &&
|
||||
dst.aperture == UVM_APERTURE_VID &&
|
||||
IS_ALIGNED(dst.address, UVM_PAGE_SIZE_4K) &&
|
||||
IS_ALIGNED(size, UVM_PAGE_SIZE_4K);
|
||||
return (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu)) &&
|
||||
va >= gpu->parent->flat_vidmem_va_base &&
|
||||
va < gpu->parent->flat_vidmem_va_base + UVM_GPU_MAX_PHYS_MEM;
|
||||
}
|
||||
|
||||
// Return whether a memset should use the fast scrubber. If so, convert dst to
|
||||
// the address needed by the fast scrubber.
|
||||
static bool hopper_scrub_enable(uvm_gpu_t *gpu, uvm_gpu_address_t *dst, size_t size)
|
||||
{
|
||||
if (!IS_ALIGNED(dst->address, UVM_PAGE_SIZE_4K) || !IS_ALIGNED(size, UVM_PAGE_SIZE_4K))
|
||||
return false;
|
||||
|
||||
// When CE physical writes are disallowed, higher layers will convert
|
||||
// physical memsets to virtual using the flat mapping. Those layers are
|
||||
// unaware of the fast scrubber, which is safe to use specifically when CE
|
||||
// physical access is disallowed. Detect such memsets within the flat vidmem
|
||||
// region and convert them back to physical, since the fast scrubber only
|
||||
// works with physical addressing.
|
||||
if (dst->is_virtual && !gpu->parent->ce_phys_vidmem_write_supported && va_is_flat_vidmem(gpu, dst->address)) {
|
||||
*dst = uvm_gpu_address_physical(UVM_APERTURE_VID, dst->address - gpu->parent->flat_vidmem_va_base);
|
||||
return true;
|
||||
}
|
||||
|
||||
return !dst->is_virtual && dst->aperture == UVM_APERTURE_VID;
|
||||
}
|
||||
|
||||
static NvU32 hopper_memset_copy_type(uvm_push_t *push, uvm_gpu_address_t dst)
|
||||
{
|
||||
if (uvm_conf_computing_mode_enabled(uvm_push_get_gpu(push)) && dst.is_unprotected)
|
||||
return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, NONPROT2NONPROT);
|
||||
return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, DEFAULT);
|
||||
}
|
||||
|
||||
NvU32 uvm_hal_hopper_ce_memcopy_copy_type(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
if (uvm_conf_computing_mode_enabled(uvm_push_get_gpu(push)) && dst.is_unprotected && src.is_unprotected)
|
||||
return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, NONPROT2NONPROT);
|
||||
return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, DEFAULT);
|
||||
}
|
||||
|
||||
static void hopper_memset_common(uvm_push_t *push,
|
||||
@@ -172,8 +210,10 @@ static void hopper_memset_common(uvm_push_t *push,
|
||||
NvU32 launch_dma_remap_enable;
|
||||
NvU32 launch_dma_scrub_enable;
|
||||
NvU32 flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
|
||||
NvU32 copy_type_value = hopper_memset_copy_type(push, dst);
|
||||
bool is_scrub = hopper_scrub_enable(gpu, &dst, num_elements * memset_element_size);
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_is_valid(push, dst, memset_element_size),
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_is_valid(push, dst, num_elements, memset_element_size),
|
||||
"Memset validation failed in channel %s, GPU %s",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(gpu));
|
||||
@@ -186,7 +226,7 @@ static void hopper_memset_common(uvm_push_t *push,
|
||||
else
|
||||
pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
|
||||
|
||||
if (memset_element_size == 8 && hopper_scrub_enable(dst, num_elements * memset_element_size)) {
|
||||
if (memset_element_size == 8 && is_scrub) {
|
||||
launch_dma_remap_enable = HWCONST(C8B5, LAUNCH_DMA, REMAP_ENABLE, FALSE);
|
||||
launch_dma_scrub_enable = HWCONST(C8B5, LAUNCH_DMA, MEMORY_SCRUB_ENABLE, TRUE);
|
||||
|
||||
@@ -223,6 +263,7 @@ static void hopper_memset_common(uvm_push_t *push,
|
||||
launch_dma_scrub_enable |
|
||||
launch_dma_dst_type |
|
||||
launch_dma_plc_mode |
|
||||
copy_type_value |
|
||||
pipelined_value);
|
||||
|
||||
dst.address += memset_this_time * memset_element_size;
|
||||
@@ -250,7 +291,7 @@ void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 v
|
||||
|
||||
void uvm_hal_hopper_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size)
|
||||
{
|
||||
if (hopper_scrub_enable(dst, size)) {
|
||||
if (hopper_scrub_enable(uvm_push_get_gpu(push), &dst, size)) {
|
||||
NvU64 value64 = value;
|
||||
|
||||
value64 |= value64 << 8;
|
||||
@@ -274,7 +315,7 @@ void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 v
|
||||
{
|
||||
UVM_ASSERT_MSG(size % 4 == 0, "size: %zd\n", size);
|
||||
|
||||
if (hopper_scrub_enable(dst, size)) {
|
||||
if (hopper_scrub_enable(uvm_push_get_gpu(push), &dst, size)) {
|
||||
NvU64 value64 = value;
|
||||
|
||||
value64 |= value64 << 32;
|
||||
@@ -294,15 +335,233 @@ void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 v
|
||||
hopper_memset_common(push, dst, size, 4);
|
||||
}
|
||||
|
||||
bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
size_t num_elements,
|
||||
size_t element_size)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
// In HCC, if a memset uses physical addressing for the destination, then
|
||||
// it must write to (protected) vidmem. If the memset uses virtual
|
||||
// addressing, and the backing storage is not vidmem, the access is only
|
||||
// legal if the copy type is NONPROT2NONPROT, and the destination is
|
||||
// unprotected sysmem, but the validation does not detect it.
|
||||
if (uvm_conf_computing_mode_is_hcc(gpu) && !dst.is_virtual && dst.aperture != UVM_APERTURE_VID)
|
||||
return false;
|
||||
|
||||
if (!gpu->parent->ce_phys_vidmem_write_supported) {
|
||||
size_t size = num_elements * element_size;
|
||||
uvm_gpu_address_t temp = dst;
|
||||
|
||||
// Physical vidmem writes are disallowed, unless using the scrubber
|
||||
if (!dst.is_virtual && dst.aperture == UVM_APERTURE_VID && !hopper_scrub_enable(gpu, &temp, size)) {
|
||||
UVM_ERR_PRINT("Destination address of vidmem memset must be virtual, not physical: {%s, 0x%llx} size %zu\n",
|
||||
uvm_gpu_address_aperture_string(dst),
|
||||
dst.address,
|
||||
size);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
if (uvm_conf_computing_mode_is_hcc(gpu)) {
|
||||
// In HCC, if a memcopy uses physical addressing for either the
|
||||
// destination or the source, then the corresponding aperture must be
|
||||
// vidmem. If virtual addressing is used, and the backing storage is
|
||||
// sysmem the access is only legal if the copy type is NONPROT2NONPROT,
|
||||
// but the validation does not detect it. In other words the copy
|
||||
// source and destination is unprotected sysmem.
|
||||
if (!src.is_virtual && (src.aperture != UVM_APERTURE_VID))
|
||||
return false;
|
||||
|
||||
if (!dst.is_virtual && (dst.aperture != UVM_APERTURE_VID))
|
||||
return false;
|
||||
|
||||
if (dst.is_unprotected != src.is_unprotected)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!gpu->parent->ce_phys_vidmem_write_supported && !dst.is_virtual && dst.aperture == UVM_APERTURE_VID) {
|
||||
UVM_ERR_PRINT("Destination address of vidmem memcopy must be virtual, not physical: {%s, 0x%llx}\n",
|
||||
uvm_gpu_address_aperture_string(dst),
|
||||
dst.address);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Specialized version of uvm_hal_volta_ce_memcopy used for encryption and
|
||||
// decryption. Pre-Hopper functionality, such as validation or address patching,
|
||||
// has been removed.
|
||||
static void encrypt_or_decrypt(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, NvU32 size)
|
||||
{
|
||||
NvU32 pipelined_value;
|
||||
NvU32 launch_dma_src_dst_type;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
NvU32 flush_value;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
// HW allows unaligned operations only if the entire buffer is in one 32B
|
||||
// sector. Operations on buffers larger than 32B have to be aligned.
|
||||
if (size > UVM_CONF_COMPUTING_BUF_ALIGNMENT) {
|
||||
UVM_ASSERT(IS_ALIGNED(src.address, UVM_CONF_COMPUTING_BUF_ALIGNMENT));
|
||||
UVM_ASSERT(IS_ALIGNED(dst.address, UVM_CONF_COMPUTING_BUF_ALIGNMENT));
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT((dst.address >> UVM_CONF_COMPUTING_BUF_ALIGNMENT) ==
|
||||
((dst.address + size - 1) >> UVM_CONF_COMPUTING_BUF_ALIGNMENT));
|
||||
UVM_ASSERT((src.address >> UVM_CONF_COMPUTING_BUF_ALIGNMENT) ==
|
||||
((src.address + size - 1) >> UVM_CONF_COMPUTING_BUF_ALIGNMENT));
|
||||
}
|
||||
|
||||
launch_dma_src_dst_type = gpu->parent->ce_hal->phys_mode(push, dst, src);
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
|
||||
pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
|
||||
else
|
||||
pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
|
||||
|
||||
flush_value = hopper_get_flush_value(push);
|
||||
|
||||
gpu->parent->ce_hal->offset_in_out(push, src.address, dst.address);
|
||||
|
||||
NV_PUSH_1U(C8B5, LINE_LENGTH_IN, size);
|
||||
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, HWCONST(C8B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, REMAP_ENABLE, FALSE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, SECURE) |
|
||||
flush_value |
|
||||
launch_dma_src_dst_type |
|
||||
launch_dma_plc_mode |
|
||||
pipelined_value);
|
||||
}
|
||||
|
||||
// The GPU CE encrypt operation requires clients to pass a valid
|
||||
// address where the used IV will be written. But this requirement is
|
||||
// unnecessary, because UVM should instead rely on the CSL
|
||||
// nvUvmInterfaceCslLogDeviceEncryption API to independently track
|
||||
// the expected IV.
|
||||
//
|
||||
// To satisfy the HW requirement the same unprotected sysmem address is
|
||||
// passed to all GPU-side encryptions. This dummy buffer is allocated at
|
||||
// GPU initialization time.
|
||||
static NvU64 encrypt_iv_address(uvm_push_t *push, uvm_gpu_address_t dst)
|
||||
{
|
||||
NvU64 iv_address;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
// Match addressing mode of destination and IV
|
||||
if (dst.is_virtual) {
|
||||
iv_address = uvm_rm_mem_get_gpu_va(gpu->conf_computing.iv_rm_mem, gpu, false).address;
|
||||
}
|
||||
else {
|
||||
iv_address = uvm_mem_gpu_physical(gpu->conf_computing.iv_mem,
|
||||
gpu,
|
||||
0,
|
||||
gpu->conf_computing.iv_mem->size).address;
|
||||
}
|
||||
|
||||
UVM_ASSERT(IS_ALIGNED(iv_address, UVM_CONF_COMPUTING_IV_ALIGNMENT));
|
||||
|
||||
return iv_address;
|
||||
}
|
||||
|
||||
// TODO: Bug 3842953: adapt CE encrypt/decrypt for p2p encrypted transfers
|
||||
void uvm_hal_hopper_ce_encrypt(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
uvm_gpu_address_t src,
|
||||
NvU32 size,
|
||||
uvm_gpu_address_t auth_tag)
|
||||
{
|
||||
|
||||
NvU32 auth_tag_address_hi32, auth_tag_address_lo32;
|
||||
NvU64 iv_address;
|
||||
NvU32 iv_address_hi32, iv_address_lo32;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_is_hcc(gpu));
|
||||
UVM_ASSERT(IS_ALIGNED(auth_tag.address, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
|
||||
|
||||
if (!src.is_virtual)
|
||||
UVM_ASSERT(src.aperture == UVM_APERTURE_VID);
|
||||
|
||||
// The addressing mode (and aperture, if applicable) of the destination
|
||||
// pointer determines the addressing mode and aperture used by the
|
||||
// encryption to reference the other two addresses written by it:
|
||||
// authentication tag, and IV. If the client passes a sysmem physical
|
||||
// address as destination, then the authentication tag must also be a sysmem
|
||||
// physical address.
|
||||
UVM_ASSERT(dst.is_virtual == auth_tag.is_virtual);
|
||||
|
||||
if (!dst.is_virtual) {
|
||||
UVM_ASSERT(dst.aperture == UVM_APERTURE_SYS);
|
||||
UVM_ASSERT(auth_tag.aperture == UVM_APERTURE_SYS);
|
||||
}
|
||||
|
||||
NV_PUSH_1U(C8B5, SET_SECURE_COPY_MODE, HWCONST(C8B5, SET_SECURE_COPY_MODE, MODE, ENCRYPT));
|
||||
|
||||
auth_tag_address_hi32 = HWVALUE(C8B5, SET_ENCRYPT_AUTH_TAG_ADDR_UPPER, UPPER, NvU64_HI32(auth_tag.address));
|
||||
auth_tag_address_lo32 = HWVALUE(C8B5, SET_ENCRYPT_AUTH_TAG_ADDR_LOWER, LOWER, NvU64_LO32(auth_tag.address));
|
||||
|
||||
iv_address = encrypt_iv_address(push, dst);
|
||||
|
||||
iv_address_hi32 = HWVALUE(C8B5, SET_ENCRYPT_IV_ADDR_UPPER, UPPER, NvU64_HI32(iv_address));
|
||||
iv_address_lo32 = HWVALUE(C8B5, SET_ENCRYPT_IV_ADDR_LOWER, LOWER, NvU64_LO32(iv_address));
|
||||
|
||||
NV_PUSH_4U(C8B5, SET_ENCRYPT_AUTH_TAG_ADDR_UPPER, auth_tag_address_hi32,
|
||||
SET_ENCRYPT_AUTH_TAG_ADDR_LOWER, auth_tag_address_lo32,
|
||||
SET_ENCRYPT_IV_ADDR_UPPER, iv_address_hi32,
|
||||
SET_ENCRYPT_IV_ADDR_LOWER, iv_address_lo32);
|
||||
|
||||
encrypt_or_decrypt(push, dst, src, size);
|
||||
}
|
||||
|
||||
// TODO: Bug 3842953: adapt CE encrypt/decrypt for p2p encrypted transfers
|
||||
void uvm_hal_hopper_ce_decrypt(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
uvm_gpu_address_t src,
|
||||
NvU32 size,
|
||||
uvm_gpu_address_t auth_tag)
|
||||
{
|
||||
|
||||
NvU32 auth_tag_address_hi32, auth_tag_address_lo32;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_is_hcc(gpu));
|
||||
UVM_ASSERT(IS_ALIGNED(auth_tag.address, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
|
||||
|
||||
// The addressing mode (and aperture, if applicable) of the source and
|
||||
// authentication pointers should match. But unlike in the encryption case,
|
||||
// clients are not forced to pass a valid IV address.
|
||||
UVM_ASSERT(src.is_virtual == auth_tag.is_virtual);
|
||||
|
||||
if (!src.is_virtual) {
|
||||
UVM_ASSERT(src.aperture == UVM_APERTURE_SYS);
|
||||
UVM_ASSERT(auth_tag.aperture == UVM_APERTURE_SYS);
|
||||
}
|
||||
|
||||
if (!dst.is_virtual)
|
||||
UVM_ASSERT(dst.aperture == UVM_APERTURE_VID);
|
||||
|
||||
NV_PUSH_1U(C8B5, SET_SECURE_COPY_MODE, HWCONST(C8B5, SET_SECURE_COPY_MODE, MODE, DECRYPT));
|
||||
|
||||
auth_tag_address_hi32 = HWVALUE(C8B5, SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER, UPPER, NvU64_HI32(auth_tag.address));
|
||||
auth_tag_address_lo32 = HWVALUE(C8B5, SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER, LOWER, NvU64_LO32(auth_tag.address));
|
||||
|
||||
NV_PUSH_2U(C8B5, SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER, auth_tag_address_hi32,
|
||||
SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER, auth_tag_address_lo32);
|
||||
|
||||
encrypt_or_decrypt(push, dst, src, size);
|
||||
}
|
||||
|
||||
|
||||
@@ -391,10 +391,15 @@ void uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base(NvU64 *fifo_entry, N
|
||||
*fifo_entry |= (NvU64)(HWCONST(C86F, GP_ENTRY1, OPCODE, SET_PB_SEGMENT_EXTENDED_BASE)) << 32;
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length)
|
||||
void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry,
|
||||
NvU64 pushbuffer_va,
|
||||
NvU32 pushbuffer_length,
|
||||
uvm_gpfifo_sync_t sync_flag)
|
||||
{
|
||||
NvU64 fifo_entry_value;
|
||||
NvU64 pb_low_bits_mask = (1ull << 40) - 1;
|
||||
const NvU32 sync_value = (sync_flag == UVM_GPFIFO_SYNC_WAIT) ? HWCONST(C86F, GP_ENTRY1, SYNC, WAIT) :
|
||||
HWCONST(C86F, GP_ENTRY1, SYNC, PROCEED);
|
||||
|
||||
UVM_ASSERT(!uvm_global_is_suspended());
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(pushbuffer_va, 4), "pushbuffer va unaligned: %llu\n", pushbuffer_va);
|
||||
@@ -406,7 +411,8 @@ void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va
|
||||
pushbuffer_va &= pb_low_bits_mask;
|
||||
fifo_entry_value = HWVALUE(C86F, GP_ENTRY0, GET, NvU64_LO32(pushbuffer_va) >> 2);
|
||||
fifo_entry_value |= (NvU64)(HWVALUE(C86F, GP_ENTRY1, GET_HI, NvU64_HI32(pushbuffer_va)) |
|
||||
HWVALUE(C86F, GP_ENTRY1, LENGTH, pushbuffer_length >> 2)) << 32;
|
||||
HWVALUE(C86F, GP_ENTRY1, LENGTH, pushbuffer_length >> 2) |
|
||||
sync_value) << 32;
|
||||
|
||||
*fifo_entry = fifo_entry_value;
|
||||
}
|
||||
|
||||
213
kernel-open/nvidia-uvm/uvm_hopper_sec2.c
Normal file
213
kernel-open/nvidia-uvm/uvm_hopper_sec2.c
Normal file
@@ -0,0 +1,213 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2022-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_hal_types.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_push_macros.h"
|
||||
#include "nv_uvm_types.h"
|
||||
#include "nv_uvm_interface.h"
|
||||
#include "clcba2.h"
|
||||
#include "clc86f.h"
|
||||
#include "clb06f.h"
|
||||
|
||||
#define UVM_CSL_SIGN_AUTH_TAG_ALIGNMENT_BYTES (1 << HWSHIFT(CBA2, METHOD_STREAM_AUTH_TAG_ADDR_LO, DATA))
|
||||
|
||||
static void sign_push(uvm_push_t *push, NvU32 *init_method, NvU8 *auth_tag)
|
||||
{
|
||||
NvU32 *sign_input_buf = push->begin + UVM_METHOD_SIZE / sizeof(*push->begin);
|
||||
NvU32 sign_size = 0;
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(init_method < push->next);
|
||||
|
||||
while (init_method < push->next) {
|
||||
NvU8 subch = READ_HWVALUE(*init_method, B06F, DMA, METHOD_SUBCHANNEL);
|
||||
NvU32 count = READ_HWVALUE(*init_method, B06F, DMA, METHOD_COUNT);
|
||||
|
||||
if (subch == UVM_SUBCHANNEL_CBA2) {
|
||||
NvU32 method_addr = READ_HWVALUE(*init_method, B06F, DMA, METHOD_ADDRESS) << 2;
|
||||
|
||||
UVM_ASSERT(count == 1);
|
||||
UVM_ASSERT((sign_size + 2) * UVM_METHOD_SIZE <= UVM_CONF_COMPUTING_SIGN_BUF_MAX_SIZE);
|
||||
|
||||
sign_input_buf[sign_size] = method_addr;
|
||||
sign_input_buf[sign_size + 1] = init_method[1];
|
||||
|
||||
// We consume the method address and the method data from the input,
|
||||
// we advance the sign_input_buf by 2.
|
||||
sign_size += 2;
|
||||
}
|
||||
|
||||
init_method += (count + 1);
|
||||
}
|
||||
|
||||
UVM_ASSERT(sign_size > 0);
|
||||
|
||||
status = nvUvmInterfaceCslSign(&push->channel->csl.ctx,
|
||||
sign_size * UVM_METHOD_SIZE,
|
||||
(NvU8 *)sign_input_buf,
|
||||
auth_tag);
|
||||
|
||||
UVM_ASSERT_MSG(status == NV_OK,
|
||||
"Failure to sign method stream auth tag, err: %s, GPU: %s.\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(uvm_push_get_gpu(push)));
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_sec2_init(uvm_push_t *push)
|
||||
{
|
||||
// Commonly, we would push a SET_OBJECT HOPPER_SEC2_WORK_LAUNCH_A in the
|
||||
// init function. During channel initialization, this method would be sent
|
||||
// to ESCHED to notify the expected SEC2 class ID. ESCHED forwards this
|
||||
// method to the SEC2 engine. SEC2 is not guaranteed to support the
|
||||
// SET_OBJECT method, so we shouldn't submit it.
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_sec2_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
NvU32 sem_lo;
|
||||
NvU32 flush_value;
|
||||
NvU8 *sign_auth_tag_ptr;
|
||||
NvU32 sign_auth_tag_addr_lo;
|
||||
uvm_gpu_address_t sign_auth_tag_gpu_va;
|
||||
NvU32 *csl_sign_init = push->next;
|
||||
|
||||
UVM_ASSERT(IS_ALIGNED(NvU64_LO32(gpu_va), 1 << HWSHIFT(CBA2, SEMAPHORE_B, LOWER)));
|
||||
|
||||
sem_lo = READ_HWVALUE(NvU64_LO32(gpu_va), CBA2, SEMAPHORE_B, LOWER);
|
||||
|
||||
flush_value = uvm_hal_membar_before_semaphore(push) ? HWCONST(CBA2, SEMAPHORE_D, FLUSH_DISABLE, FALSE) :
|
||||
HWCONST(CBA2, SEMAPHORE_D, FLUSH_DISABLE, TRUE);
|
||||
|
||||
// The push and the method stream signature have the same lifetime, we
|
||||
// reserve space in the pushbuffer to store the signature. After the push is
|
||||
// processed by the GPU, the pushbuffer is recycled entirely, including the
|
||||
// signature buffer.
|
||||
sign_auth_tag_ptr = uvm_push_get_single_inline_buffer(push,
|
||||
UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES,
|
||||
UVM_CSL_SIGN_AUTH_TAG_ALIGNMENT_BYTES,
|
||||
&sign_auth_tag_gpu_va);
|
||||
|
||||
NV_PUSH_1U(CBA2,
|
||||
METHOD_STREAM_AUTH_TAG_ADDR_HI,
|
||||
HWVALUE(CBA2, METHOD_STREAM_AUTH_TAG_ADDR_HI, DATA, NvU64_HI32(sign_auth_tag_gpu_va.address)));
|
||||
|
||||
sign_auth_tag_addr_lo = READ_HWVALUE(NvU64_LO32(sign_auth_tag_gpu_va.address),
|
||||
CBA2,
|
||||
METHOD_STREAM_AUTH_TAG_ADDR_LO,
|
||||
DATA);
|
||||
NV_PUSH_1U(CBA2,
|
||||
METHOD_STREAM_AUTH_TAG_ADDR_LO,
|
||||
HWVALUE(CBA2, METHOD_STREAM_AUTH_TAG_ADDR_LO, DATA, sign_auth_tag_addr_lo));
|
||||
|
||||
NV_PUSH_1U(CBA2, SEMAPHORE_A, HWVALUE(CBA2, SEMAPHORE_A, UPPER, NvU64_HI32(gpu_va)));
|
||||
NV_PUSH_1U(CBA2, SEMAPHORE_B, HWVALUE(CBA2, SEMAPHORE_B, LOWER, sem_lo));
|
||||
NV_PUSH_1U(CBA2, SET_SEMAPHORE_PAYLOAD_LOWER, payload);
|
||||
|
||||
NV_PUSH_1U(CBA2, SEMAPHORE_D, HWCONST(CBA2, SEMAPHORE_D, TIMESTAMP, DISABLE) |
|
||||
HWCONST(CBA2, SEMAPHORE_D, PAYLOAD_SIZE, 32_BIT) |
|
||||
flush_value);
|
||||
|
||||
sign_push(push, csl_sign_init, sign_auth_tag_ptr);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_sec2_semaphore_timestamp_unsupported(uvm_push_t *push, NvU64 gpu_va)
|
||||
{
|
||||
// TODO: Bug 3804752: [uvm][HCC] Add support for Hopper SEC2 HAL in UVM.
|
||||
// Semaphore_timestamp is not implemented in the SEC2 engine yet. We will
|
||||
// add support in UVM when they become available.
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
UVM_ASSERT_MSG(false, "SEC2 semaphore_timestamp is not supported on GPU: %s.\n", uvm_gpu_name(gpu));
|
||||
}
|
||||
|
||||
static void execute_with_membar(uvm_push_t *push)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
uvm_membar_t membar = uvm_push_get_and_reset_membar_flag(push);
|
||||
|
||||
NvU32 flush_value = (membar == UVM_MEMBAR_SYS) ? HWCONST(CBA2, EXECUTE, FLUSH_DISABLE, FALSE) :
|
||||
HWCONST(CBA2, EXECUTE, FLUSH_DISABLE, TRUE);
|
||||
|
||||
NV_PUSH_1U(CBA2, EXECUTE, flush_value |
|
||||
HWCONST(CBA2, EXECUTE, NOTIFY, DISABLE));
|
||||
|
||||
if (membar == UVM_MEMBAR_GPU) {
|
||||
gpu->parent->host_hal->wait_for_idle(push);
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_sec2_decrypt(uvm_push_t *push, NvU64 dst_va, NvU64 src_va, NvU32 size, NvU64 auth_tag_va)
|
||||
{
|
||||
NvU8 *sign_auth_tag_ptr;
|
||||
NvU32 sign_auth_tag_addr_lo;
|
||||
uvm_gpu_address_t sign_auth_tag_gpu_va;
|
||||
NvU32 *csl_sign_init = push->next;
|
||||
|
||||
// Check that the provided alignment matches HW
|
||||
BUILD_BUG_ON(UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT != (1 << HWSHIFT(CBA2, DECRYPT_COPY_DST_ADDR_LO, DATA)));
|
||||
BUILD_BUG_ON(UVM_CONF_COMPUTING_BUF_ALIGNMENT < (1 << HWSHIFT(CBA2, DECRYPT_COPY_DST_ADDR_LO, DATA)));
|
||||
BUILD_BUG_ON(UVM_CONF_COMPUTING_BUF_ALIGNMENT % (1 << HWSHIFT(CBA2, DECRYPT_COPY_DST_ADDR_LO, DATA)) != 0);
|
||||
|
||||
// No overlapping.
|
||||
UVM_ASSERT(!uvm_ranges_overlap(src_va, src_va + size - 1, dst_va, dst_va + size - 1));
|
||||
|
||||
// Alignment requirements.
|
||||
UVM_ASSERT(IS_ALIGNED(NvU64_LO32(src_va), 1 << HWSHIFT(CBA2, DECRYPT_COPY_SRC_ADDR_LO, DATA)));
|
||||
UVM_ASSERT(IS_ALIGNED(NvU64_LO32(dst_va), 1 << HWSHIFT(CBA2, DECRYPT_COPY_DST_ADDR_LO, DATA)));
|
||||
UVM_ASSERT(IS_ALIGNED(NvU64_LO32(auth_tag_va), 1 << HWSHIFT(CBA2, DECRYPT_COPY_AUTH_TAG_ADDR_LO, DATA)));
|
||||
UVM_ASSERT(IS_ALIGNED(size, 1 << HWSHIFT(CBA2, DECRYPT_COPY_SIZE, DATA)));
|
||||
|
||||
// See comments in SEC2 semaphore_release.
|
||||
sign_auth_tag_ptr = uvm_push_get_single_inline_buffer(push,
|
||||
UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES,
|
||||
UVM_CSL_SIGN_AUTH_TAG_ALIGNMENT_BYTES,
|
||||
&sign_auth_tag_gpu_va);
|
||||
|
||||
NV_PUSH_1U(CBA2, DECRYPT_COPY_SRC_ADDR_HI, NvU64_HI32(src_va));
|
||||
NV_PUSH_1U(CBA2, DECRYPT_COPY_SRC_ADDR_LO, NvU64_LO32(src_va));
|
||||
|
||||
NV_PUSH_1U(CBA2, DECRYPT_COPY_DST_ADDR_HI, NvU64_HI32(dst_va));
|
||||
NV_PUSH_1U(CBA2, DECRYPT_COPY_DST_ADDR_LO, NvU64_LO32(dst_va));
|
||||
|
||||
NV_PUSH_1U(CBA2, DECRYPT_COPY_SIZE, size);
|
||||
NV_PUSH_1U(CBA2, DECRYPT_COPY_AUTH_TAG_ADDR_HI, NvU64_HI32(auth_tag_va));
|
||||
NV_PUSH_1U(CBA2, DECRYPT_COPY_AUTH_TAG_ADDR_LO, NvU64_LO32(auth_tag_va));
|
||||
|
||||
NV_PUSH_1U(CBA2,
|
||||
METHOD_STREAM_AUTH_TAG_ADDR_HI,
|
||||
HWVALUE(CBA2, METHOD_STREAM_AUTH_TAG_ADDR_HI, DATA, NvU64_HI32(sign_auth_tag_gpu_va.address)));
|
||||
|
||||
sign_auth_tag_addr_lo = READ_HWVALUE(NvU64_LO32(sign_auth_tag_gpu_va.address),
|
||||
CBA2,
|
||||
METHOD_STREAM_AUTH_TAG_ADDR_LO,
|
||||
DATA);
|
||||
NV_PUSH_1U(CBA2,
|
||||
METHOD_STREAM_AUTH_TAG_ADDR_LO,
|
||||
HWVALUE(CBA2, METHOD_STREAM_AUTH_TAG_ADDR_LO, DATA, sign_auth_tag_addr_lo));
|
||||
|
||||
execute_with_membar(push);
|
||||
|
||||
sign_push(push, csl_sign_init, sign_auth_tag_ptr);
|
||||
}
|
||||
@@ -1048,6 +1048,41 @@ typedef struct
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_MAP_EXTERNAL_SPARSE_PARAMS;
|
||||
|
||||
//
|
||||
// Used to initialise a secondary UVM file-descriptor which holds a
|
||||
// reference on the memory map to prevent it being torn down without
|
||||
// first notifying UVM. This is achieved by preventing mmap() calls on
|
||||
// the secondary file-descriptor so that on process exit
|
||||
// uvm_mm_release() will be called while the memory map is present
|
||||
// such that UVM can cleanly shutdown the GPU by handling faults
|
||||
// instead of cancelling them.
|
||||
//
|
||||
// This ioctl must be called after the primary file-descriptor has
|
||||
// been initialised with the UVM_INITIALIZE ioctl. The primary FD
|
||||
// should be passed in the uvmFd field and the UVM_MM_INITIALIZE ioctl
|
||||
// will hold a reference on the primary FD. Therefore uvm_release() is
|
||||
// guaranteed to be called after uvm_mm_release().
|
||||
//
|
||||
// Once this file-descriptor has been closed the UVM context is
|
||||
// effectively dead and subsequent operations requiring a memory map
|
||||
// will fail. Calling UVM_MM_INITIALIZE on a context that has already
|
||||
// been initialized via any FD will return NV_ERR_INVALID_STATE.
|
||||
//
|
||||
// Calling this with a non-UVM file-descriptor in uvmFd will return
|
||||
// NV_ERR_INVALID_ARGUMENT. Calling this on the same file-descriptor
|
||||
// as UVM_INITIALIZE or more than once on the same FD will return
|
||||
// NV_ERR_IN_USE.
|
||||
//
|
||||
// Not all platforms require this secondary file-descriptor. On those
|
||||
// platforms NV_WARN_NOTHING_TO_DO will be returned and users may
|
||||
// close the file-descriptor at anytime.
|
||||
#define UVM_MM_INITIALIZE UVM_IOCTL_BASE(75)
|
||||
typedef struct
|
||||
{
|
||||
NvS32 uvmFd; // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_MM_INITIALIZE_PARAMS;
|
||||
|
||||
//
|
||||
// Temporary ioctls which should be removed before UVM 8 release
|
||||
// Number backwards from 2047 - highest custom ioctl function number
|
||||
|
||||
@@ -153,6 +153,10 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
#define VM_MIXEDMAP 0x00000000
|
||||
#endif
|
||||
|
||||
#if !defined(MPOL_PREFERRED_MANY)
|
||||
#define MPOL_PREFERRED_MANY 5
|
||||
#endif
|
||||
|
||||
//
|
||||
// printk.h already defined pr_fmt, so we have to redefine it so the pr_*
|
||||
// routines pick up our version
|
||||
@@ -540,6 +544,7 @@ typedef struct
|
||||
#endif // NV_IS_EXPORT_SYMBOL_PRESENT_int_active_memcg
|
||||
|
||||
#if defined(NVCPU_X86) || defined(NVCPU_X86_64)
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgtable_types.h>
|
||||
#endif
|
||||
|
||||
@@ -547,6 +552,27 @@ typedef struct
|
||||
#define PAGE_KERNEL_NOENC PAGE_KERNEL
|
||||
#endif
|
||||
|
||||
// uvm_pgprot_decrypted is a GPL-aware version of pgprot_decrypted that returns
|
||||
// the given input when UVM cannot use GPL symbols, or pgprot_decrypted is not
|
||||
// defined. Otherwise, the function is equivalent to pgprot_decrypted. UVM only
|
||||
// depends on pgprot_decrypted when the driver is allowed to use GPL symbols:
|
||||
// both AMD's SEV and Intel's TDX are only supported in conjunction with OpenRM.
|
||||
//
|
||||
// It is safe to invoke uvm_pgprot_decrypted in KVM + AMD SEV-SNP guests, even
|
||||
// if the call is not required, because pgprot_decrypted(PAGE_KERNEL_NOENC) ==
|
||||
// PAGE_KERNEL_NOENC.
|
||||
//
|
||||
// pgprot_decrypted was added by commit 21729f81ce8a ("x86/mm: Provide general
|
||||
// kernel support for memory encryption") in v4.14 (2017-07-18)
|
||||
static inline pgprot_t uvm_pgprot_decrypted(pgprot_t prot)
|
||||
{
|
||||
#if defined(pgprot_decrypted)
|
||||
return pgprot_decrypted(prot);
|
||||
#endif
|
||||
|
||||
return prot;
|
||||
}
|
||||
|
||||
// Commit 1dff8083a024650c75a9c961c38082473ceae8cf (v4.7).
|
||||
//
|
||||
// Archs with CONFIG_MMU should have their own page.h, and can't include
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
|
||||
const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 26);
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 33);
|
||||
|
||||
switch (lock_order) {
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
|
||||
@@ -43,18 +43,25 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_RM_API);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_RM_GPUS);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_BLOCK_MIGRATE);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_BLOCK);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHUNK_MAPPING);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PAGE_TREE);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_PUSH);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_WLC_PUSH);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_SEC2_PUSH);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PUSH);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_PMA);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_ROOT_CHUNK);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHANNEL);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_WLC_CHANNEL);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACE_EVENTS);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACE_TOOLS);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_SEMA_POOL_TRACKER);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_SECURE_SEMAPHORE);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_LEAF);
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
|
||||
@@ -279,6 +279,15 @@
|
||||
// Operations not allowed while holding the lock:
|
||||
// - GPU memory allocation which can evict memory (would require nesting
|
||||
// block locks)
|
||||
//
|
||||
// - GPU DMA Allocation pool lock (gpu->conf_computing.dma_buffer_pool.lock)
|
||||
// Order: UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
// Exclusive lock (mutex)
|
||||
//
|
||||
// Protects:
|
||||
// - Protect the state of the uvm_conf_computing_dma_buffer_pool_t
|
||||
//
|
||||
// - Chunk mapping lock (gpu->root_chunk_mappings.bitlocks and
|
||||
// gpu->sysmem_mappings.bitlock)
|
||||
// Order: UVM_LOCK_ORDER_CHUNK_MAPPING
|
||||
@@ -313,6 +322,62 @@
|
||||
// Operations not allowed while holding this lock
|
||||
// - GPU memory allocation which can evict
|
||||
//
|
||||
// - CE channel CSL channel pool semaphore
|
||||
// Order: UVM_LOCK_ORDER_CSL_PUSH
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
// Semaphore per CE channel pool
|
||||
//
|
||||
// The semaphore controls concurrent pushes to CE channels that are not WCL
|
||||
// channels. Secure work submission depends on channel availability in
|
||||
// GPFIFO entries (as in any other channel type) but also on channel
|
||||
// locking. Each channel has a lock to enforce ordering of pushes. The
|
||||
// channel's CSL lock is taken on channel reservation until uvm_push_end.
|
||||
// When the Confidential Computing feature is enabled, channels are
|
||||
// stateful, and the CSL lock protects their CSL state/context.
|
||||
//
|
||||
// Operations allowed while holding this lock
|
||||
// - Pushing work to CE channels (except for WLC channels)
|
||||
//
|
||||
// - WLC CSL channel pool semaphore
|
||||
// Order: UVM_LOCK_ORDER_CSL_WLC_PUSH
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
// Semaphore per WLC channel pool
|
||||
//
|
||||
// The semaphore controls concurrent pushes to WLC channels. WLC work
|
||||
// submission depends on channel availability in GPFIFO entries (as in any
|
||||
// other channel type) but also on channel locking. Each WLC channel has a
|
||||
// lock to enforce ordering of pushes. The channel's CSL lock is taken on
|
||||
// channel reservation until uvm_push_end. SEC2 channels are stateful
|
||||
// channels and the CSL lock protects their CSL state/context.
|
||||
//
|
||||
// This lock ORDER is different and sits below the generic channel CSL
|
||||
// lock and above the SEC2 CSL lock. This reflects the dual nature of WLC
|
||||
// channels; they use SEC2 indirect work launch during initialization,
|
||||
// and after their schedule is initialized they provide indirect launch
|
||||
// functionality to other CE channels.
|
||||
//
|
||||
// Operations allowed while holding this lock
|
||||
// - Pushing work to WLC channels
|
||||
//
|
||||
// - SEC2 CSL channel pool semaphore
|
||||
// Order: UVM_LOCK_ORDER_SEC2_CSL_PUSH
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
// Semaphore per SEC2 channel pool
|
||||
//
|
||||
// The semaphore controls concurrent pushes to SEC2 channels. SEC2 work
|
||||
// submission depends on channel availability in GPFIFO entries (as in any
|
||||
// other channel type) but also on channel locking. Each SEC2 channel has a
|
||||
// lock to enforce ordering of pushes. The channel's CSL lock is taken on
|
||||
// channel reservation until uvm_push_end. SEC2 channels are stateful
|
||||
// channels and the CSL lock protects their CSL state/context.
|
||||
//
|
||||
// This lock ORDER is different and lower than UVM_LOCK_ORDER_CSL_PUSH
|
||||
// to allow secure work submission to use a SEC2 channel to submit
|
||||
// work before releasing the CSL lock of the originating channel.
|
||||
//
|
||||
// Operations allowed while holding this lock
|
||||
// - Pushing work to SEC2 channels
|
||||
//
|
||||
// - Concurrent push semaphore
|
||||
// Order: UVM_LOCK_ORDER_PUSH
|
||||
// Semaphore (uvm_semaphore_t)
|
||||
@@ -346,9 +411,20 @@
|
||||
// channel pool lock documentation contains the guidelines about which lock
|
||||
// type (mutex or spinlock) to use.
|
||||
//
|
||||
// - WLC Channel lock
|
||||
// Order: UVM_LOCK_ORDER_WLC_CHANNEL
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
// Spinlock (uvm_spinlock_t)
|
||||
//
|
||||
// Lock protecting the state of WLC channels in a channel pool. This lock
|
||||
// is separate from the generic channel lock (UVM_LOCK_ORDER_CHANNEL)
|
||||
// to allow for indirect worklaunch pushes while holding the main channel
|
||||
// lock (WLC pushes don't need any of the pushbuffer locks described
|
||||
// above)
|
||||
//
|
||||
// - Tools global VA space list lock (g_tools_va_space_list_lock)
|
||||
// Order: UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST
|
||||
// Reader/writer lock (rw_sempahore)
|
||||
// Reader/writer lock (rw_semaphore)
|
||||
//
|
||||
// This lock protects the list of VA spaces used when broadcasting
|
||||
// UVM profiling events.
|
||||
@@ -366,6 +442,13 @@
|
||||
// events come from perf events, both VA_SPACE_EVENTS and VA_SPACE_TOOLS
|
||||
// must be taken to register/report some tools events.
|
||||
//
|
||||
// - Tracking semaphores
|
||||
// Order: UVM_LOCK_ORDER_SECURE_SEMAPHORE
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
//
|
||||
// CE semaphore payloads are encrypted, and require to take the CSL lock
|
||||
// (UVM_LOCK_ORDER_LEAF) to decrypt the payload.
|
||||
//
|
||||
// - Leaf locks
|
||||
// Order: UVM_LOCK_ORDER_LEAF
|
||||
//
|
||||
@@ -390,18 +473,25 @@ typedef enum
|
||||
UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL,
|
||||
UVM_LOCK_ORDER_RM_API,
|
||||
UVM_LOCK_ORDER_RM_GPUS,
|
||||
UVM_LOCK_ORDER_VA_BLOCK_MIGRATE,
|
||||
UVM_LOCK_ORDER_VA_BLOCK,
|
||||
UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL,
|
||||
UVM_LOCK_ORDER_CHUNK_MAPPING,
|
||||
UVM_LOCK_ORDER_PAGE_TREE,
|
||||
UVM_LOCK_ORDER_CSL_PUSH,
|
||||
UVM_LOCK_ORDER_CSL_WLC_PUSH,
|
||||
UVM_LOCK_ORDER_CSL_SEC2_PUSH,
|
||||
UVM_LOCK_ORDER_PUSH,
|
||||
UVM_LOCK_ORDER_PMM,
|
||||
UVM_LOCK_ORDER_PMM_PMA,
|
||||
UVM_LOCK_ORDER_PMM_ROOT_CHUNK,
|
||||
UVM_LOCK_ORDER_CHANNEL,
|
||||
UVM_LOCK_ORDER_WLC_CHANNEL,
|
||||
UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST,
|
||||
UVM_LOCK_ORDER_VA_SPACE_EVENTS,
|
||||
UVM_LOCK_ORDER_VA_SPACE_TOOLS,
|
||||
UVM_LOCK_ORDER_SEMA_POOL_TRACKER,
|
||||
UVM_LOCK_ORDER_SECURE_SEMAPHORE,
|
||||
UVM_LOCK_ORDER_LEAF,
|
||||
UVM_LOCK_ORDER_COUNT,
|
||||
} uvm_lock_order_t;
|
||||
@@ -914,9 +1004,10 @@ typedef struct
|
||||
// be the same as the string passed to "spinlock".
|
||||
// See uvm_spin_lock() and uvm_spin_unlock() below as examples.
|
||||
//
|
||||
#define uvm_assert_spinlock_locked(spinlock) ({ \
|
||||
typeof(spinlock) _lock_ = (spinlock); \
|
||||
UVM_ASSERT(spin_is_locked(&_lock_->lock) && uvm_check_locked(_lock_, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)); \
|
||||
#define uvm_assert_spinlock_locked(spinlock) ({ \
|
||||
typeof(spinlock) _lock_ = (spinlock); \
|
||||
UVM_ASSERT(spin_is_locked(&_lock_->lock)); \
|
||||
UVM_ASSERT(uvm_check_locked(_lock_, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)); \
|
||||
})
|
||||
|
||||
#define uvm_assert_spinlock_unlocked(spinlock) UVM_ASSERT(!spin_is_locked(&(spinlock)->lock))
|
||||
|
||||
@@ -33,11 +33,13 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
// space for UVM internal mappings.
|
||||
// A single top level PDE covers 64 or 128 MB on Maxwell so 128 GB is fine to use.
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024;
|
||||
parent_gpu->rm_va_size = 128 * UVM_SIZE_1GB;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = 768ull * 1024 * 1024 * 1024;
|
||||
parent_gpu->uvm_mem_va_base = 768 * UVM_SIZE_1GB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
parent_gpu->ce_phys_vidmem_write_supported = true;
|
||||
|
||||
// We don't have a compelling use case in UVM-Lite for direct peer
|
||||
// migrations between GPUs, so don't bother setting them up.
|
||||
parent_gpu->peer_copy_mode = UVM_GPU_PEER_COPY_MODE_UNSUPPORTED;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2022 NVIDIA Corporation
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -29,10 +29,10 @@
|
||||
void uvm_hal_maxwell_ce_init(uvm_push_t *push)
|
||||
{
|
||||
// Notably this sends SET_OBJECT with the CE class on subchannel 0 instead
|
||||
// of the recommended by HW subchannel 4 (subchannel 4 is recommended to
|
||||
// of the recommended by HW subchannel 4 (subchannel 4 is required to
|
||||
// match CE usage on GRCE). For the UVM driver using subchannel 0 has the
|
||||
// benefit of also verifying that we ended up on the right PBDMA though as
|
||||
// SET_OBJECT with CE class on subchannel 0 would fail on GRCE.
|
||||
// benefit of also verifying that we ended up on the right CE engine type
|
||||
// though as SET_OBJECT with CE class on subchannel 0 would fail on GRCE.
|
||||
NV_PUSH_1U(B06F, SET_OBJECT, uvm_push_get_gpu(push)->parent->rm_info.ceClass);
|
||||
}
|
||||
|
||||
@@ -185,6 +185,12 @@ NvU32 uvm_hal_maxwell_ce_plc_mode(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Noop, since COPY_TYPE doesn't exist in Maxwell.
|
||||
NvU32 uvm_hal_maxwell_ce_memcopy_copy_type(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size)
|
||||
{
|
||||
// If >4GB copies ever become an important use case, this function should
|
||||
@@ -195,6 +201,7 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
|
||||
NvU32 pipelined_value;
|
||||
NvU32 launch_dma_src_dst_type;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
NvU32 copy_type_value;
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memcopy_is_valid(push, dst, src),
|
||||
"Memcopy validation failed in channel %s, GPU %s.\n",
|
||||
@@ -205,6 +212,7 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
|
||||
|
||||
launch_dma_src_dst_type = gpu->parent->ce_hal->phys_mode(push, dst, src);
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
copy_type_value = gpu->parent->ce_hal->memcopy_copy_type(push, dst, src);
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
|
||||
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
|
||||
@@ -226,6 +234,7 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
|
||||
HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE) |
|
||||
launch_dma_src_dst_type |
|
||||
launch_dma_plc_mode |
|
||||
copy_type_value |
|
||||
pipelined_value);
|
||||
|
||||
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
|
||||
@@ -266,7 +275,7 @@ static void memset_common(uvm_push_t *push, uvm_gpu_address_t dst, size_t size,
|
||||
NvU32 launch_dma_dst_type;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_is_valid(push, dst, memset_element_size),
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_is_valid(push, dst, size, memset_element_size),
|
||||
"Memset validation failed in channel %s, GPU %s.\n",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(gpu));
|
||||
@@ -352,3 +361,24 @@ void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value,
|
||||
uvm_push_get_gpu(push)->parent->ce_hal->memset_4(push, uvm_gpu_address_virtual(dst_va), value, size);
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_ce_encrypt_unsupported(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
uvm_gpu_address_t src,
|
||||
NvU32 size,
|
||||
uvm_gpu_address_t auth_tag)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT_MSG(false, "CE encrypt is not supported on GPU: %s.\n", uvm_gpu_name(gpu));
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_ce_decrypt_unsupported(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
uvm_gpu_address_t src,
|
||||
NvU32 size,
|
||||
uvm_gpu_address_t auth_tag)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT_MSG(false, "CE decrypt is not supported on GPU: %s.\n", uvm_gpu_name(gpu));
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -62,11 +62,18 @@ NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm
|
||||
return 0;
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_fault_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 index,
|
||||
uvm_fault_buffer_entry_t *buffer_entry)
|
||||
uvm_fault_type_t uvm_hal_maxwell_fault_buffer_get_fault_type_unsupported(const NvU32 *fault_entry)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "fault_buffer_get_fault_type is not supported.\n");
|
||||
return UVM_FAULT_TYPE_COUNT;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hal_maxwell_fault_buffer_parse_replayable_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 index,
|
||||
uvm_fault_buffer_entry_t *buffer_entry)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "fault_buffer_parse_entry is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
|
||||
@@ -217,9 +217,14 @@ void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
HWCONST(A16F, SEMAPHORED, RELEASE_WFI, DIS));
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length)
|
||||
void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry,
|
||||
NvU64 pushbuffer_va,
|
||||
NvU32 pushbuffer_length,
|
||||
uvm_gpfifo_sync_t sync_flag)
|
||||
{
|
||||
NvU64 fifo_entry_value;
|
||||
const NvU32 sync_value = (sync_flag == UVM_GPFIFO_SYNC_WAIT) ? HWCONST(A16F, GP_ENTRY1, SYNC, WAIT) :
|
||||
HWCONST(A16F, GP_ENTRY1, SYNC, PROCEED);
|
||||
|
||||
UVM_ASSERT(!uvm_global_is_suspended());
|
||||
UVM_ASSERT_MSG(pushbuffer_va % 4 == 0, "pushbuffer va unaligned: %llu\n", pushbuffer_va);
|
||||
@@ -228,7 +233,8 @@ void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_v
|
||||
fifo_entry_value = HWVALUE(A16F, GP_ENTRY0, GET, NvU64_LO32(pushbuffer_va) >> 2);
|
||||
fifo_entry_value |= (NvU64)(HWVALUE(A16F, GP_ENTRY1, GET_HI, NvU64_HI32(pushbuffer_va)) |
|
||||
HWVALUE(A16F, GP_ENTRY1, LENGTH, pushbuffer_length >> 2) |
|
||||
HWCONST(A16F, GP_ENTRY1, PRIV, KERNEL)) << 32;
|
||||
HWCONST(A16F, GP_ENTRY1, PRIV, KERNEL) |
|
||||
sync_value) << 32;
|
||||
|
||||
*fifo_entry = fifo_entry_value;
|
||||
}
|
||||
|
||||
53
kernel-open/nvidia-uvm/uvm_maxwell_sec2.c
Normal file
53
kernel-open/nvidia-uvm/uvm_maxwell_sec2.c
Normal file
@@ -0,0 +1,53 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_common.h"
|
||||
|
||||
void uvm_hal_maxwell_sec2_init_noop(uvm_push_t *push)
|
||||
{
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_sec2_semaphore_release_unsupported(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
UVM_ASSERT_MSG(false, "SEC2 semaphore_release is not supported on GPU: %s.\n", uvm_gpu_name(gpu));
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_sec2_semaphore_timestamp_unsupported(uvm_push_t *push, NvU64 gpu_va)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
UVM_ASSERT_MSG(false, "SEC2 semaphore_timestamp is not supported on GPU: %s.\n", uvm_gpu_name(gpu));
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_sec2_decrypt_unsupported(uvm_push_t *push,
|
||||
NvU64 dst_va,
|
||||
NvU64 src_va,
|
||||
NvU32 size,
|
||||
NvU64 auth_tag_va)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
UVM_ASSERT_MSG(false, "SEC2 decrypt is not supported on GPU: %s.\n", uvm_gpu_name(gpu));
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -22,6 +22,7 @@
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_hal_types.h"
|
||||
#include "uvm_mmu.h"
|
||||
#include "uvm_processors.h"
|
||||
#include "uvm_va_space.h"
|
||||
@@ -67,26 +68,15 @@ static bool vidmem_can_be_mapped(uvm_mem_t *vidmem, bool is_user_space)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool sysmem_can_be_mapped(uvm_mem_t *sysmem)
|
||||
{
|
||||
UVM_ASSERT(uvm_mem_is_sysmem(sysmem));
|
||||
|
||||
// If SEV is enabled, only unprotected memory can be mapped
|
||||
if (g_uvm_global.sev_enabled)
|
||||
return uvm_mem_is_sysmem_dma(sysmem);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool mem_can_be_mapped_on_cpu(uvm_mem_t *mem, bool is_user_space)
|
||||
{
|
||||
if (uvm_mem_is_sysmem(mem))
|
||||
return sysmem_can_be_mapped(mem);
|
||||
return true;
|
||||
|
||||
if (!vidmem_can_be_mapped(mem, is_user_space))
|
||||
return false;
|
||||
|
||||
return mem->backing_gpu->parent->numa_info.enabled && PAGE_ALIGNED(mem->chunk_size);
|
||||
return mem->backing_gpu->mem_info.numa.enabled && PAGE_ALIGNED(mem->chunk_size);
|
||||
}
|
||||
|
||||
static bool mem_can_be_mapped_on_cpu_kernel(uvm_mem_t *mem)
|
||||
@@ -99,10 +89,22 @@ static bool mem_can_be_mapped_on_cpu_user(uvm_mem_t *mem)
|
||||
return mem_can_be_mapped_on_cpu(mem, true);
|
||||
}
|
||||
|
||||
static bool sysmem_can_be_mapped_on_gpu(uvm_mem_t *sysmem)
|
||||
{
|
||||
UVM_ASSERT(uvm_mem_is_sysmem(sysmem));
|
||||
|
||||
// In Confidential Computing, only unprotected memory can be mapped on the
|
||||
// GPU
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return uvm_mem_is_sysmem_dma(sysmem);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool mem_can_be_mapped_on_gpu(uvm_mem_t *mem, uvm_gpu_t *gpu, bool is_user_space)
|
||||
{
|
||||
if (uvm_mem_is_sysmem(mem))
|
||||
return sysmem_can_be_mapped(mem);
|
||||
return sysmem_can_be_mapped_on_gpu(mem);
|
||||
|
||||
if (!vidmem_can_be_mapped(mem, is_user_space))
|
||||
return false;
|
||||
@@ -312,7 +314,7 @@ static NvU32 mem_pick_chunk_size(uvm_mem_t *mem)
|
||||
|
||||
// When UVM_PAGE_SIZE_DEFAULT is used on NUMA-enabled GPUs, we force
|
||||
// chunk_size to be PAGE_SIZE at least, to allow CPU mappings.
|
||||
if (mem->backing_gpu->parent->numa_info.enabled)
|
||||
if (mem->backing_gpu->mem_info.numa.enabled)
|
||||
chunk_size = max(chunk_size, (NvU32)PAGE_SIZE);
|
||||
|
||||
return chunk_size;
|
||||
@@ -449,6 +451,9 @@ static gfp_t sysmem_allocation_gfp_flags(int order, bool zero)
|
||||
return gfp_flags;
|
||||
}
|
||||
|
||||
// This allocation is a non-protected memory allocation under Confidential
|
||||
// Computing.
|
||||
//
|
||||
// There is a tighter coupling between allocation and mapping because of the
|
||||
// allocator UVM must use. Hence, this function does the equivalent of
|
||||
// uvm_mem_map_gpu_phys().
|
||||
@@ -523,9 +528,10 @@ static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
|
||||
|
||||
// In case of failure, the caller is required to handle cleanup by calling
|
||||
// uvm_mem_free
|
||||
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_protected)
|
||||
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unprotected)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_pmm_gpu_memory_type_t mem_type;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_vidmem(mem));
|
||||
|
||||
@@ -542,14 +548,23 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_prot
|
||||
if (!mem->vidmem.chunks)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
status = uvm_pmm_gpu_alloc_kernel(&mem->backing_gpu->pmm,
|
||||
mem->chunks_count,
|
||||
mem->chunk_size,
|
||||
UVM_PMM_ALLOC_FLAGS_NONE,
|
||||
mem->vidmem.chunks,
|
||||
NULL);
|
||||
// When CC is disabled the behavior is identical to that of PMM, and the
|
||||
// protection flag is ignored (squashed by PMM internally).
|
||||
if (is_unprotected)
|
||||
mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_UNPROTECTED;
|
||||
else
|
||||
mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_PROTECTED;
|
||||
|
||||
status = uvm_pmm_gpu_alloc(&mem->backing_gpu->pmm,
|
||||
mem->chunks_count,
|
||||
mem->chunk_size,
|
||||
mem_type,
|
||||
UVM_PMM_ALLOC_FLAGS_NONE,
|
||||
mem->vidmem.chunks,
|
||||
NULL);
|
||||
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("pmm_gpu_alloc(count=%zd, size=0x%x) failed: %s\n",
|
||||
UVM_ERR_PRINT("uvm_pmm_gpu_alloc (count=%zd, size=0x%x) failed: %s\n",
|
||||
mem->chunks_count,
|
||||
mem->chunk_size,
|
||||
nvstatusToString(status));
|
||||
@@ -559,7 +574,7 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_prot
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero, bool is_protected)
|
||||
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero, bool is_unprotected)
|
||||
{
|
||||
if (uvm_mem_is_sysmem(mem)) {
|
||||
gfp_t gfp_flags;
|
||||
@@ -581,7 +596,7 @@ static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zer
|
||||
return status;
|
||||
}
|
||||
|
||||
return mem_alloc_vidmem_chunks(mem, zero, is_protected);
|
||||
return mem_alloc_vidmem_chunks(mem, zero, is_unprotected);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_mem_map_kernel(uvm_mem_t *mem, const uvm_global_processor_mask_t *mask)
|
||||
@@ -611,7 +626,7 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
|
||||
NV_STATUS status;
|
||||
NvU64 physical_size;
|
||||
uvm_mem_t *mem = NULL;
|
||||
bool is_protected = false;
|
||||
bool is_unprotected = false;
|
||||
|
||||
UVM_ASSERT(params->size > 0);
|
||||
|
||||
@@ -633,7 +648,12 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
|
||||
physical_size = UVM_ALIGN_UP(mem->size, mem->chunk_size);
|
||||
mem->chunks_count = physical_size / mem->chunk_size;
|
||||
|
||||
status = mem_alloc_chunks(mem, params->mm, params->zero, is_protected);
|
||||
if (params->is_unprotected)
|
||||
UVM_ASSERT(uvm_mem_is_vidmem(mem));
|
||||
|
||||
is_unprotected = params->is_unprotected;
|
||||
|
||||
status = mem_alloc_chunks(mem, params->mm, params->zero, is_unprotected);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
@@ -718,8 +738,8 @@ static NV_STATUS mem_map_cpu_to_sysmem_kernel(uvm_mem_t *mem)
|
||||
pages[page_index] = mem_cpu_page(mem, page_index * PAGE_SIZE);
|
||||
}
|
||||
|
||||
if (g_uvm_global.sev_enabled)
|
||||
prot = PAGE_KERNEL_NOENC;
|
||||
if (g_uvm_global.conf_computing_enabled && uvm_mem_is_sysmem_dma(mem))
|
||||
prot = uvm_pgprot_decrypted(PAGE_KERNEL_NOENC);
|
||||
|
||||
mem->kernel.cpu_addr = vmap(pages, num_pages, VM_MAP, prot);
|
||||
|
||||
@@ -982,7 +1002,7 @@ uvm_gpu_address_t uvm_mem_gpu_address_copy(uvm_mem_t *mem, uvm_gpu_t *accessing_
|
||||
UVM_ASSERT(uvm_mem_is_physically_contiguous(mem, offset, size));
|
||||
|
||||
if (uvm_mem_is_sysmem(mem) || uvm_mem_is_local_vidmem(mem, accessing_gpu))
|
||||
return uvm_mem_gpu_address_physical(mem, accessing_gpu, offset, size);
|
||||
return uvm_gpu_address_copy(accessing_gpu, uvm_mem_gpu_physical(mem, accessing_gpu, offset, size));
|
||||
|
||||
// Peer GPUs may need to use some form of translation (identity mappings,
|
||||
// indirect peers) to copy.
|
||||
@@ -1041,6 +1061,16 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
|
||||
page_size = mem_pick_gpu_page_size(mem, gpu, tree);
|
||||
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x\n", page_size);
|
||||
|
||||
// When the Confidential Computing feature is enabled, DMA allocations are
|
||||
// majoritarily allocated and managed by a per-GPU DMA buffer pool
|
||||
// (uvm_conf_computing_dma_buffer_pool_t). Because we would typically
|
||||
// already hold the DMA_BUFFER_POOL lock at this time, we cannot hold
|
||||
// the block lock. Allocate PTEs without eviction in this context.
|
||||
//
|
||||
// See uvm_pmm_gpu_alloc()
|
||||
if (uvm_mem_is_sysmem_dma(mem))
|
||||
pmm_flags = UVM_PMM_ALLOC_FLAGS_NONE;
|
||||
|
||||
status = uvm_page_table_range_vec_create(tree,
|
||||
gpu_va,
|
||||
uvm_mem_physical_size(mem),
|
||||
@@ -1205,7 +1235,7 @@ void uvm_mem_unmap_gpu_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
|
||||
static bool mem_can_be_phys_mapped_on_gpu(uvm_mem_t *mem, uvm_gpu_t *gpu)
|
||||
{
|
||||
if (uvm_mem_is_sysmem(mem))
|
||||
return sysmem_can_be_mapped(mem);
|
||||
return sysmem_can_be_mapped_on_gpu(mem);
|
||||
else
|
||||
return uvm_mem_is_local_vidmem(mem, gpu);
|
||||
}
|
||||
@@ -1306,10 +1336,16 @@ NvU64 uvm_mem_get_gpu_va_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
|
||||
|
||||
uvm_gpu_address_t uvm_mem_gpu_address_virtual_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
|
||||
{
|
||||
return uvm_gpu_address_virtual(uvm_mem_get_gpu_va_kernel(mem, gpu));
|
||||
uvm_gpu_address_t addr = uvm_gpu_address_virtual(uvm_mem_get_gpu_va_kernel(mem, gpu));
|
||||
if (uvm_conf_computing_mode_enabled(gpu) && mem->dma_owner)
|
||||
addr.is_unprotected = true;
|
||||
return addr;
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_mem_gpu_address_physical(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU64 offset, NvU64 size)
|
||||
{
|
||||
return uvm_gpu_address_from_phys(uvm_mem_gpu_physical(mem, gpu, offset, size));
|
||||
uvm_gpu_address_t addr = uvm_gpu_address_from_phys(uvm_mem_gpu_physical(mem, gpu, offset, size));
|
||||
if (uvm_conf_computing_mode_enabled(gpu) && mem->dma_owner)
|
||||
addr.is_unprotected = true;
|
||||
return addr;
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user