mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-27 11:39:46 +00:00
Compare commits
5 Commits
570.153.02
...
575.64.03
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1893c6c8fd | ||
|
|
fade1f7b20 | ||
|
|
30e15d79de | ||
|
|
e00332b05f | ||
|
|
4159579888 |
35
README.md
35
README.md
@@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 570.133.20.
|
||||
version 575.64.03.
|
||||
|
||||
|
||||
## How to Build
|
||||
@@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
570.133.20 driver release. This can be achieved by installing
|
||||
575.64.03 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@@ -185,7 +185,7 @@ table below).
|
||||
For details on feature support and limitations, see the NVIDIA GPU driver
|
||||
end user README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/570.133.20/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/575.64.03/README/kernel_open.html
|
||||
|
||||
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
|
||||
Package for more details.
|
||||
@@ -350,6 +350,7 @@ Subsystem Device ID.
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 12C6 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 17AA 22B8 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 17AA 22BA |
|
||||
| Tesla T4 | 1EB8 10DE 12A2 |
|
||||
| NVIDIA GeForce RTX 2070 SUPER | 1EC2 |
|
||||
| NVIDIA GeForce RTX 2070 SUPER | 1EC7 |
|
||||
| NVIDIA GeForce RTX 2080 | 1ED0 |
|
||||
@@ -954,16 +955,44 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 103C 204B |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 10DE 204B |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 17AA 204B |
|
||||
| NVIDIA RTX PRO 5000 Blackwell | 2BB3 1028 204D |
|
||||
| NVIDIA RTX PRO 5000 Blackwell | 2BB3 103C 204D |
|
||||
| NVIDIA RTX PRO 5000 Blackwell | 2BB3 10DE 204D |
|
||||
| NVIDIA RTX PRO 5000 Blackwell | 2BB3 17AA 204D |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 1028 204C |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 103C 204C |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 10DE 204C |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 17AA 204C |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Server Edition | 2BB5 10DE 204E |
|
||||
| NVIDIA GeForce RTX 5080 | 2C02 |
|
||||
| NVIDIA GeForce RTX 5070 Ti | 2C05 |
|
||||
| NVIDIA GeForce RTX 5090 Laptop GPU | 2C18 |
|
||||
| NVIDIA GeForce RTX 5080 Laptop GPU | 2C19 |
|
||||
| NVIDIA RTX PRO 4500 Blackwell | 2C31 1028 2051 |
|
||||
| NVIDIA RTX PRO 4500 Blackwell | 2C31 103C 2051 |
|
||||
| NVIDIA RTX PRO 4500 Blackwell | 2C31 10DE 2051 |
|
||||
| NVIDIA RTX PRO 4500 Blackwell | 2C31 17AA 2051 |
|
||||
| NVIDIA RTX PRO 4000 Blackwell | 2C34 1028 2052 |
|
||||
| NVIDIA RTX PRO 4000 Blackwell | 2C34 103C 2052 |
|
||||
| NVIDIA RTX PRO 4000 Blackwell | 2C34 10DE 2052 |
|
||||
| NVIDIA RTX PRO 4000 Blackwell | 2C34 17AA 2052 |
|
||||
| NVIDIA RTX PRO 5000 Blackwell Generation Laptop GPU | 2C38 |
|
||||
| NVIDIA RTX PRO 4000 Blackwell Generation Laptop GPU | 2C39 |
|
||||
| NVIDIA GeForce RTX 5090 Laptop GPU | 2C58 |
|
||||
| NVIDIA GeForce RTX 5080 Laptop GPU | 2C59 |
|
||||
| NVIDIA GeForce RTX 5060 Ti | 2D04 |
|
||||
| NVIDIA GeForce RTX 5060 | 2D05 |
|
||||
| NVIDIA GeForce RTX 5070 Laptop GPU | 2D18 |
|
||||
| NVIDIA GeForce RTX 5060 Laptop GPU | 2D19 |
|
||||
| NVIDIA RTX PRO 2000 Blackwell Generation Laptop GPU | 2D39 |
|
||||
| NVIDIA GeForce RTX 5070 Laptop GPU | 2D58 |
|
||||
| NVIDIA GeForce RTX 5060 Laptop GPU | 2D59 |
|
||||
| NVIDIA GeForce RTX 5050 | 2D83 |
|
||||
| NVIDIA GeForce RTX 5050 Laptop GPU | 2D98 |
|
||||
| NVIDIA RTX PRO 1000 Blackwell Generation Laptop GPU | 2DB8 |
|
||||
| NVIDIA RTX PRO 500 Blackwell Generation Laptop GPU | 2DB9 |
|
||||
| NVIDIA GeForce RTX 5050 Laptop GPU | 2DD8 |
|
||||
| NVIDIA GeForce RTX 5070 | 2F04 |
|
||||
| NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F18 |
|
||||
| NVIDIA RTX PRO 3000 Blackwell Generation Laptop GPU | 2F38 |
|
||||
| NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F58 |
|
||||
|
||||
@@ -75,21 +75,14 @@ $(foreach _module, $(NV_KERNEL_MODULES), \
|
||||
$(eval include $(src)/$(_module)/$(_module).Kbuild))
|
||||
|
||||
|
||||
#
|
||||
# Define CFLAGS that apply to all the NVIDIA kernel modules. EXTRA_CFLAGS
|
||||
# is deprecated since 2.6.24 in favor of ccflags-y, but we need to support
|
||||
# older kernels which do not have ccflags-y. Newer kernels append
|
||||
# $(EXTRA_CFLAGS) to ccflags-y for compatibility.
|
||||
#
|
||||
|
||||
EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.133.20\"
|
||||
ccflags-y += -I$(src)/common/inc
|
||||
ccflags-y += -I$(src)
|
||||
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
|
||||
ccflags-y += -DNV_VERSION_STRING=\"575.64.03\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
ccflags-y += -I$(SYSSRCHOST1X)
|
||||
endif
|
||||
|
||||
# Some Android kernels prohibit driver use of filesystem functions like
|
||||
@@ -99,57 +92,57 @@ endif
|
||||
PLATFORM_IS_ANDROID ?= 0
|
||||
|
||||
ifeq ($(PLATFORM_IS_ANDROID),1)
|
||||
EXTRA_CFLAGS += -DNV_FILESYSTEM_ACCESS_AVAILABLE=0
|
||||
ccflags-y += -DNV_FILESYSTEM_ACCESS_AVAILABLE=0
|
||||
else
|
||||
EXTRA_CFLAGS += -DNV_FILESYSTEM_ACCESS_AVAILABLE=1
|
||||
ccflags-y += -DNV_FILESYSTEM_ACCESS_AVAILABLE=1
|
||||
endif
|
||||
|
||||
EXTRA_CFLAGS += -Wno-unused-function
|
||||
ccflags-y += -Wno-unused-function
|
||||
|
||||
ifneq ($(NV_BUILD_TYPE),debug)
|
||||
EXTRA_CFLAGS += -Wuninitialized
|
||||
ccflags-y += -Wuninitialized
|
||||
endif
|
||||
|
||||
EXTRA_CFLAGS += -fno-strict-aliasing
|
||||
ccflags-y += -fno-strict-aliasing
|
||||
|
||||
ifeq ($(ARCH),arm64)
|
||||
EXTRA_CFLAGS += -mstrict-align
|
||||
ccflags-y += -mstrict-align
|
||||
endif
|
||||
|
||||
ifeq ($(NV_BUILD_TYPE),debug)
|
||||
EXTRA_CFLAGS += -g
|
||||
ccflags-y += -g
|
||||
endif
|
||||
|
||||
EXTRA_CFLAGS += -ffreestanding
|
||||
ccflags-y += -ffreestanding
|
||||
|
||||
ifeq ($(ARCH),arm64)
|
||||
EXTRA_CFLAGS += -mgeneral-regs-only -march=armv8-a
|
||||
EXTRA_CFLAGS += $(call cc-option,-mno-outline-atomics,)
|
||||
ccflags-y += -mgeneral-regs-only -march=armv8-a
|
||||
ccflags-y += $(call cc-option,-mno-outline-atomics,)
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH),x86_64)
|
||||
EXTRA_CFLAGS += -mno-red-zone -mcmodel=kernel
|
||||
ccflags-y += -mno-red-zone -mcmodel=kernel
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH),powerpc)
|
||||
EXTRA_CFLAGS += -mlittle-endian -mno-strict-align
|
||||
ccflags-y += -mlittle-endian -mno-strict-align
|
||||
endif
|
||||
|
||||
EXTRA_CFLAGS += -DNV_UVM_ENABLE
|
||||
EXTRA_CFLAGS += $(call cc-option,-Werror=undef,)
|
||||
EXTRA_CFLAGS += -DNV_SPECTRE_V2=$(NV_SPECTRE_V2)
|
||||
EXTRA_CFLAGS += -DNV_KERNEL_INTERFACE_LAYER
|
||||
ccflags-y += -DNV_UVM_ENABLE
|
||||
ccflags-y += $(call cc-option,-Werror=undef,)
|
||||
ccflags-y += -DNV_SPECTRE_V2=$(NV_SPECTRE_V2)
|
||||
ccflags-y += -DNV_KERNEL_INTERFACE_LAYER
|
||||
|
||||
#
|
||||
# Detect SGI UV systems and apply system-specific optimizations.
|
||||
#
|
||||
|
||||
ifneq ($(wildcard /proc/sgi_uv),)
|
||||
EXTRA_CFLAGS += -DNV_CONFIG_X86_UV
|
||||
ccflags-y += -DNV_CONFIG_X86_UV
|
||||
endif
|
||||
|
||||
ifdef VGX_FORCE_VFIO_PCI_CORE
|
||||
EXTRA_CFLAGS += -DNV_VGPU_FORCE_VFIO_PCI_CORE
|
||||
ccflags-y += -DNV_VGPU_FORCE_VFIO_PCI_CORE
|
||||
endif
|
||||
|
||||
WARNINGS_AS_ERRORS ?=
|
||||
@@ -183,7 +176,8 @@ NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \
|
||||
|
||||
NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)
|
||||
|
||||
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
|
||||
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(ccflags-y) -fno-pie
|
||||
NV_CONFTEST_CFLAGS += $(filter -std=%,$(KBUILD_CFLAGS))
|
||||
NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
|
||||
NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
|
||||
NV_CONFTEST_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types,)
|
||||
|
||||
@@ -71,6 +71,31 @@ else
|
||||
CC ?= cc
|
||||
LD ?= ld
|
||||
OBJDUMP ?= objdump
|
||||
AWK ?= awk
|
||||
# Bake the following awk program in a string. The program is needed to add C++
|
||||
# to the languages excluded from BTF generation.
|
||||
#
|
||||
# Also, unconditionally return success (0) from the awk program, rather than
|
||||
# propagating pahole's return status (with 'exit system(pahole_cmd)'), to
|
||||
# workaround an DW_TAG_rvalue_reference_type error in
|
||||
# kernel/nvidia-modeset.ko.
|
||||
#
|
||||
# BEGIN {
|
||||
# pahole_cmd = "pahole"
|
||||
# for (i = 1; i < ARGC; i++) {
|
||||
# if (ARGV[i] ~ /--lang_exclude=/) {
|
||||
# pahole_cmd = pahole_cmd sprintf(" %s,c++", ARGV[i])
|
||||
# } else {
|
||||
# pahole_cmd = pahole_cmd sprintf(" %s", ARGV[i])
|
||||
# }
|
||||
# }
|
||||
# system(pahole_cmd)
|
||||
# }
|
||||
PAHOLE_AWK_PROGRAM = BEGIN { pahole_cmd = \"pahole\"; for (i = 1; i < ARGC; i++) { if (ARGV[i] ~ /--lang_exclude=/) { pahole_cmd = pahole_cmd sprintf(\" %s,c++\", ARGV[i]); } else { pahole_cmd = pahole_cmd sprintf(\" %s\", ARGV[i]); } } system(pahole_cmd); }
|
||||
# If scripts/pahole-flags.sh is not present in the kernel tree, add PAHOLE and
|
||||
# PAHOLE_AWK_PROGRAM assignments to PAHOLE_VARIABLES; otherwise assign the
|
||||
# empty string to PAHOLE_VARIABLES.
|
||||
PAHOLE_VARIABLES=$(if $(wildcard $(KERNEL_SOURCES)/scripts/pahole-flags.sh),,"PAHOLE=$(AWK) '$(PAHOLE_AWK_PROGRAM)'")
|
||||
|
||||
ifndef ARCH
|
||||
ARCH := $(shell uname -m | sed -e 's/i.86/i386/' \
|
||||
@@ -86,7 +111,7 @@ else
|
||||
ifneq ($(filter $(ARCH),i386 x86_64),)
|
||||
KERNEL_ARCH = x86
|
||||
else
|
||||
ifeq ($(filter $(ARCH),arm64 powerpc),)
|
||||
ifeq ($(filter $(ARCH),arm64 powerpc riscv),)
|
||||
$(error Unsupported architecture $(ARCH))
|
||||
endif
|
||||
endif
|
||||
@@ -112,7 +137,8 @@ else
|
||||
|
||||
.PHONY: modules module clean clean_conftest modules_install
|
||||
modules clean modules_install:
|
||||
@$(MAKE) "LD=$(LD)" "CC=$(CC)" "OBJDUMP=$(OBJDUMP)" $(KBUILD_PARAMS) $@
|
||||
@$(MAKE) "LD=$(LD)" "CC=$(CC)" "OBJDUMP=$(OBJDUMP)" \
|
||||
$(PAHOLE_VARIABLES) $(KBUILD_PARAMS) $@
|
||||
@if [ "$@" = "modules" ]; then \
|
||||
for module in $(NV_KERNEL_MODULES); do \
|
||||
if [ -x split-object-file.sh ]; then \
|
||||
|
||||
35
kernel-open/common/inc/dce_rm_client_ipc.h
Normal file
35
kernel-open/common/inc/dce_rm_client_ipc.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef _OS_DCE_CLIENT_IPC_H_
|
||||
#define _OS_DCE_CLIENT_IPC_H_
|
||||
|
||||
// RM IPC Client Types
|
||||
|
||||
#define DCE_CLIENT_RM_IPC_TYPE_SYNC 0x0
|
||||
#define DCE_CLIENT_RM_IPC_TYPE_EVENT 0x1
|
||||
#define DCE_CLIENT_RM_IPC_TYPE_MAX 0x2
|
||||
|
||||
void dceclientHandleAsyncRpcCallback(NvU32 handle, NvU32 interfaceType,
|
||||
NvU32 msgLength, void *data,
|
||||
void *usrCtx);
|
||||
#endif
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -36,8 +36,7 @@
|
||||
#include "nv-timer.h"
|
||||
#include "nv-time.h"
|
||||
#include "nv-chardev-numbers.h"
|
||||
|
||||
#define NV_KERNEL_NAME "Linux"
|
||||
#include "nv-platform.h"
|
||||
|
||||
#ifndef AUTOCONF_INCLUDED
|
||||
#if defined(NV_GENERATED_AUTOCONF_H_PRESENT)
|
||||
@@ -239,7 +238,7 @@ NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32,
|
||||
#undef NV_SET_PAGES_UC_PRESENT
|
||||
#endif
|
||||
|
||||
#if !defined(NVCPU_AARCH64) && !defined(NVCPU_PPC64LE) && !defined(NVCPU_RISCV64)
|
||||
#if !defined(NVCPU_AARCH64) && !defined(NVCPU_RISCV64)
|
||||
#if !defined(NV_SET_MEMORY_UC_PRESENT) && !defined(NV_SET_PAGES_UC_PRESENT)
|
||||
#error "This driver requires the ability to change memory types!"
|
||||
#endif
|
||||
@@ -403,7 +402,7 @@ typedef enum
|
||||
NV_MEMORY_TYPE_DEVICE_MMIO, /* All kinds of MMIO referred by NVRM e.g. BARs and MCFG of device */
|
||||
} nv_memory_type_t;
|
||||
|
||||
#if defined(NVCPU_AARCH64) || defined(NVCPU_PPC64LE) || defined(NVCPU_RISCV64)
|
||||
#if defined(NVCPU_AARCH64) || defined(NVCPU_RISCV64)
|
||||
#define NV_ALLOW_WRITE_COMBINING(mt) 1
|
||||
#elif defined(NVCPU_X86_64)
|
||||
#if defined(NV_ENABLE_PAT_SUPPORT)
|
||||
@@ -461,10 +460,7 @@ static inline void *nv_vmalloc(unsigned long size)
|
||||
#else
|
||||
void *ptr = __vmalloc(size, GFP_KERNEL);
|
||||
#endif
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
}
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -481,10 +477,7 @@ static inline void *nv_ioremap(NvU64 phys, NvU64 size)
|
||||
#else
|
||||
void *ptr = ioremap(phys, size);
|
||||
#endif
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
}
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -500,29 +493,12 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
|
||||
ptr = ioremap_cache_shared(phys, size);
|
||||
#elif defined(NV_IOREMAP_CACHE_PRESENT)
|
||||
ptr = ioremap_cache(phys, size);
|
||||
#elif defined(NVCPU_PPC64LE)
|
||||
//
|
||||
// ioremap_cache() has been only implemented correctly for ppc64le with
|
||||
// commit f855b2f544d6 in April 2017 (kernel 4.12+). Internally, the kernel
|
||||
// does provide a default implementation of ioremap_cache() that would be
|
||||
// incorrect for our use (creating an uncached mapping) before the
|
||||
// referenced commit, but that implementation is not exported and the
|
||||
// NV_IOREMAP_CACHE_PRESENT conftest doesn't pick it up, and we end up in
|
||||
// this #elif branch.
|
||||
//
|
||||
// At the same time, ppc64le have supported ioremap_prot() since May 2011
|
||||
// (commit 40f1ce7fb7e8, kernel 3.0+) and that covers all kernels we
|
||||
// support on power.
|
||||
//
|
||||
ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
|
||||
#else
|
||||
return nv_ioremap(phys, size);
|
||||
#endif
|
||||
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
}
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -537,10 +513,8 @@ static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
|
||||
return nv_ioremap_nocache(phys, size);
|
||||
#endif
|
||||
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
}
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -560,22 +534,19 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
#define NV_KMALLOC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kmalloc(size, NV_GFP_KERNEL); \
|
||||
if (ptr) \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KZALLOC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kzalloc(size, NV_GFP_KERNEL); \
|
||||
if (ptr) \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KMALLOC_ATOMIC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kmalloc(size, NV_GFP_ATOMIC); \
|
||||
if (ptr) \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#if defined(__GFP_RETRY_MAYFAIL)
|
||||
@@ -589,8 +560,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
#define NV_KMALLOC_NO_OOM(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kmalloc(size, NV_GFP_NO_OOM); \
|
||||
if (ptr) \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KFREE(ptr, size) \
|
||||
@@ -623,9 +593,9 @@ static inline pgprot_t nv_sme_clr(pgprot_t prot)
|
||||
#endif // __sme_clr
|
||||
}
|
||||
|
||||
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
|
||||
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot)
|
||||
{
|
||||
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
|
||||
pgprot_t prot = __pgprot(pgprot_val(vm_prot));
|
||||
|
||||
#if defined(pgprot_decrypted)
|
||||
return pgprot_decrypted(prot);
|
||||
@@ -646,41 +616,6 @@ static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static inline NvUPtr nv_vmap(struct page **pages, NvU32 page_count,
|
||||
NvBool cached, NvBool unencrypted)
|
||||
{
|
||||
void *ptr;
|
||||
pgprot_t prot = PAGE_KERNEL;
|
||||
#if defined(NVCPU_X86_64)
|
||||
#if defined(PAGE_KERNEL_NOENC)
|
||||
if (unencrypted)
|
||||
{
|
||||
prot = cached ? nv_adjust_pgprot(PAGE_KERNEL_NOENC, 0) :
|
||||
nv_adjust_pgprot(NV_PAGE_KERNEL_NOCACHE_NOENC, 0);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
prot = cached ? PAGE_KERNEL : PAGE_KERNEL_NOCACHE;
|
||||
}
|
||||
#elif defined(NVCPU_AARCH64)
|
||||
prot = cached ? PAGE_KERNEL : NV_PGPROT_UNCACHED(PAGE_KERNEL);
|
||||
#endif
|
||||
/* All memory cached in PPC64LE; can't honor 'cached' input. */
|
||||
ptr = vmap(pages, page_count, VM_MAP, prot);
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, page_count * PAGE_SIZE);
|
||||
}
|
||||
return (NvUPtr)ptr;
|
||||
}
|
||||
|
||||
static inline void nv_vunmap(NvUPtr vaddr, NvU32 page_count)
|
||||
{
|
||||
vunmap((void *)vaddr);
|
||||
NV_MEMDBG_REMOVE((void *)vaddr, page_count * PAGE_SIZE);
|
||||
}
|
||||
|
||||
#if defined(NV_GET_NUM_PHYSPAGES_PRESENT)
|
||||
#define NV_NUM_PHYSPAGES get_num_physpages()
|
||||
#else
|
||||
@@ -705,6 +640,47 @@ static inline void nv_vunmap(NvUPtr vaddr, NvU32 page_count)
|
||||
|
||||
#define NV_NUM_CPUS() num_possible_cpus()
|
||||
|
||||
#define NV_HAVE_MEMORY_ENCRYPT_DECRYPT 0
|
||||
|
||||
#if defined(NVCPU_X86_64) && \
|
||||
NV_IS_EXPORT_SYMBOL_GPL_set_memory_encrypted && \
|
||||
NV_IS_EXPORT_SYMBOL_GPL_set_memory_decrypted
|
||||
#undef NV_HAVE_MEMORY_ENCRYPT_DECRYPT
|
||||
#define NV_HAVE_MEMORY_ENCRYPT_DECRYPT 1
|
||||
#endif
|
||||
|
||||
static inline void nv_set_memory_decrypted_zeroed(NvBool unencrypted,
|
||||
unsigned long virt_addr,
|
||||
int num_native_pages,
|
||||
size_t size)
|
||||
{
|
||||
if (virt_addr == 0)
|
||||
return;
|
||||
|
||||
#if NV_HAVE_MEMORY_ENCRYPT_DECRYPT
|
||||
if (unencrypted)
|
||||
{
|
||||
set_memory_decrypted(virt_addr, num_native_pages);
|
||||
memset((void *)virt_addr, 0, size);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_set_memory_encrypted(NvBool unencrypted,
|
||||
unsigned long virt_addr,
|
||||
int num_native_pages)
|
||||
{
|
||||
if (virt_addr == 0)
|
||||
return;
|
||||
|
||||
#if NV_HAVE_MEMORY_ENCRYPT_DECRYPT
|
||||
if (unencrypted)
|
||||
{
|
||||
set_memory_encrypted(virt_addr, num_native_pages);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
|
||||
{
|
||||
#if defined(NV_PHYS_TO_DMA_PRESENT)
|
||||
@@ -885,94 +861,42 @@ typedef void irqreturn_t;
|
||||
(((addr) >> NV_RM_PAGE_SHIFT) == \
|
||||
(((addr) + (size) - 1) >> NV_RM_PAGE_SHIFT)))
|
||||
|
||||
/*
|
||||
* The kernel may have a workaround for this, by providing a method to isolate
|
||||
* a single 4K page in a given mapping.
|
||||
*/
|
||||
#if (PAGE_SIZE > NV_RM_PAGE_SIZE) && defined(NVCPU_PPC64LE) && defined(NV_PAGE_4K_PFN)
|
||||
#define NV_4K_PAGE_ISOLATION_PRESENT
|
||||
#define NV_4K_PAGE_ISOLATION_MMAP_ADDR(addr) \
|
||||
((NvP64)((void*)(((addr) >> NV_RM_PAGE_SHIFT) << PAGE_SHIFT)))
|
||||
#define NV_4K_PAGE_ISOLATION_MMAP_LEN(size) PAGE_SIZE
|
||||
#define NV_4K_PAGE_ISOLATION_ACCESS_START(addr) \
|
||||
((NvP64)((void*)((addr) & ~NV_RM_PAGE_MASK)))
|
||||
#define NV_4K_PAGE_ISOLATION_ACCESS_LEN(addr, size) \
|
||||
((((addr) & NV_RM_PAGE_MASK) + size + NV_RM_PAGE_MASK) & \
|
||||
~NV_RM_PAGE_MASK)
|
||||
#define NV_PROT_4K_PAGE_ISOLATION NV_PAGE_4K_PFN
|
||||
#endif
|
||||
|
||||
static inline int nv_remap_page_range(struct vm_area_struct *vma,
|
||||
unsigned long virt_addr, NvU64 phys_addr, NvU64 size, pgprot_t prot)
|
||||
{
|
||||
int ret = -1;
|
||||
|
||||
#if defined(NV_4K_PAGE_ISOLATION_PRESENT) && defined(NV_PROT_4K_PAGE_ISOLATION)
|
||||
if ((size == PAGE_SIZE) &&
|
||||
((pgprot_val(prot) & NV_PROT_4K_PAGE_ISOLATION) != 0))
|
||||
{
|
||||
/*
|
||||
* remap_4k_pfn() hardcodes the length to a single OS page, and checks
|
||||
* whether applying the page isolation workaround will cause PTE
|
||||
* corruption (in which case it will fail, and this is an unsupported
|
||||
* configuration).
|
||||
*/
|
||||
#if defined(NV_HASH__REMAP_4K_PFN_PRESENT)
|
||||
ret = hash__remap_4k_pfn(vma, virt_addr, (phys_addr >> PAGE_SHIFT), prot);
|
||||
#else
|
||||
ret = remap_4k_pfn(vma, virt_addr, (phys_addr >> PAGE_SHIFT), prot);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
ret = remap_pfn_range(vma, virt_addr, (phys_addr >> PAGE_SHIFT), size,
|
||||
return remap_pfn_range(vma, virt_addr, (phys_addr >> PAGE_SHIFT), size,
|
||||
prot);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int nv_io_remap_page_range(struct vm_area_struct *vma,
|
||||
NvU64 phys_addr, NvU64 size, NvU32 extra_prot, NvU64 start)
|
||||
NvU64 phys_addr, NvU64 size, NvU64 start)
|
||||
{
|
||||
int ret = -1;
|
||||
#if !defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL)
|
||||
ret = nv_remap_page_range(vma, start, phys_addr, size,
|
||||
nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
|
||||
nv_adjust_pgprot(vma->vm_page_prot));
|
||||
#else
|
||||
ret = io_remap_pfn_range(vma, start, (phys_addr >> PAGE_SHIFT),
|
||||
size, nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
|
||||
size, nv_adjust_pgprot(vma->vm_page_prot));
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline vm_fault_t nv_insert_pfn(struct vm_area_struct *vma,
|
||||
NvU64 virt_addr, NvU64 pfn, NvU32 extra_prot)
|
||||
NvU64 virt_addr, NvU64 pfn)
|
||||
{
|
||||
/*
|
||||
* vm_insert_pfn{,_prot} replaced with vmf_insert_pfn{,_prot} in Linux 4.20
|
||||
*/
|
||||
#if defined(NV_VMF_INSERT_PFN_PROT_PRESENT)
|
||||
return vmf_insert_pfn_prot(vma, virt_addr, pfn,
|
||||
__pgprot(pgprot_val(vma->vm_page_prot) | extra_prot));
|
||||
__pgprot(pgprot_val(vma->vm_page_prot)));
|
||||
#else
|
||||
int ret = -EINVAL;
|
||||
/*
|
||||
* Only PPC64LE (NV_4K_PAGE_ISOLATION_PRESENT) requires extra_prot to be
|
||||
* used when remapping.
|
||||
*
|
||||
* vm_insert_pfn_prot() was added in Linux 4.4, whereas POWER9 support
|
||||
* was added in Linux 4.8.
|
||||
*
|
||||
* Rather than tampering with the vma to make use of extra_prot with
|
||||
* vm_insert_pfn() on older kernels, for now, just fail in this case, as
|
||||
* it's not expected to be used currently.
|
||||
*/
|
||||
#if defined(NV_VM_INSERT_PFN_PROT_PRESENT)
|
||||
ret = vm_insert_pfn_prot(vma, virt_addr, pfn,
|
||||
__pgprot(pgprot_val(vma->vm_page_prot) | extra_prot));
|
||||
#elif !defined(NV_4K_PAGE_ISOLATION_PRESENT)
|
||||
__pgprot(pgprot_val(vma->vm_page_prot)));
|
||||
#else
|
||||
ret = vm_insert_pfn(vma, virt_addr, pfn);
|
||||
#endif
|
||||
switch (ret)
|
||||
@@ -1158,7 +1082,6 @@ static inline void nv_kmem_cache_free_stack(nvidia_stack_t *stack)
|
||||
typedef struct nvidia_pte_s {
|
||||
NvU64 phys_addr;
|
||||
unsigned long virt_addr;
|
||||
NvU64 dma_addr;
|
||||
} nvidia_pte_t;
|
||||
|
||||
#if defined(CONFIG_DMA_SHARED_BUFFER)
|
||||
@@ -1199,6 +1122,7 @@ typedef struct nv_alloc_s {
|
||||
NvS32 node_id; /* Node id for memory allocation when node is set in flags */
|
||||
void *import_priv;
|
||||
struct sg_table *import_sgt;
|
||||
dma_addr_t dma_handle; /* dma handle used by dma_alloc_coherent(), dma_free_coherent() */
|
||||
} nv_alloc_t;
|
||||
|
||||
/**
|
||||
@@ -1424,6 +1348,23 @@ struct os_wait_queue {
|
||||
struct completion q;
|
||||
};
|
||||
|
||||
/*!
|
||||
* @brief Mapping between clock names and clock handles.
|
||||
*
|
||||
* TEGRA_DISP_WHICH_CLK_MAX: maximum number of clocks
|
||||
* defined in below enum.
|
||||
*
|
||||
* arch/nvalloc/unix/include/nv.h
|
||||
* enum TEGRASOC_WHICH_CLK_MAX;
|
||||
*
|
||||
*/
|
||||
typedef struct nvsoc_clks_s {
|
||||
struct {
|
||||
struct clk *handles;
|
||||
const char *clkName;
|
||||
} clk[TEGRASOC_WHICH_CLK_MAX];
|
||||
} nvsoc_clks_t;
|
||||
|
||||
/*
|
||||
* To report error in msi/msix when unhandled count reaches a threshold
|
||||
*/
|
||||
@@ -1583,6 +1524,8 @@ typedef struct nv_linux_state_s {
|
||||
nv_acpi_t* nv_acpi_object;
|
||||
#endif
|
||||
|
||||
nvsoc_clks_t soc_clk_handles;
|
||||
|
||||
/* Lock serializing ISRs for different SOC vectors */
|
||||
nv_spinlock_t soc_isr_lock;
|
||||
void *soc_bh_mutex;
|
||||
@@ -1782,12 +1725,10 @@ static inline struct kmem_cache *nv_kmem_cache_create(const char *name, unsigned
|
||||
*/
|
||||
static inline NV_STATUS nv_check_gpu_state(nv_state_t *nv)
|
||||
{
|
||||
#if !defined(NVCPU_PPC64LE)
|
||||
if (NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv))
|
||||
{
|
||||
return NV_ERR_GPU_IS_LOST;
|
||||
}
|
||||
#endif
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -297,9 +297,21 @@ static inline struct rw_semaphore *nv_mmap_get_lock(struct mm_struct *mm)
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NV_CAN_CALL_VMA_START_WRITE 1
|
||||
|
||||
#if !NV_CAN_CALL_VMA_START_WRITE
|
||||
/*
|
||||
* Commit 45ad9f5290dc updated vma_start_write() to call __vma_start_write().
|
||||
*/
|
||||
void nv_vma_start_write(struct vm_area_struct *);
|
||||
#endif
|
||||
|
||||
static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
{
|
||||
#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
#if !NV_CAN_CALL_VMA_START_WRITE
|
||||
nv_vma_start_write(vma);
|
||||
ACCESS_PRIVATE(vma, __vm_flags) |= flags;
|
||||
#elif defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
vm_flags_set(vma, flags);
|
||||
#else
|
||||
vma->vm_flags |= flags;
|
||||
@@ -308,7 +320,10 @@ static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
|
||||
static inline void nv_vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
{
|
||||
#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
#if !NV_CAN_CALL_VMA_START_WRITE
|
||||
nv_vma_start_write(vma);
|
||||
ACCESS_PRIVATE(vma, __vm_flags) &= ~flags;
|
||||
#elif defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
vm_flags_clear(vma, flags);
|
||||
#else
|
||||
vma->vm_flags &= ~flags;
|
||||
|
||||
@@ -26,8 +26,7 @@
|
||||
|
||||
#include "nv-linux.h"
|
||||
|
||||
#if (defined(CONFIG_X86_LOCAL_APIC) || defined(NVCPU_AARCH64) || \
|
||||
defined(NVCPU_PPC64LE)) && \
|
||||
#if (defined(CONFIG_X86_LOCAL_APIC) || defined(NVCPU_AARCH64)) && \
|
||||
(defined(CONFIG_PCI_MSI) || defined(CONFIG_PCI_USE_VECTOR))
|
||||
#define NV_LINUX_PCIE_MSI_SUPPORTED
|
||||
#endif
|
||||
|
||||
36
kernel-open/common/inc/nv-platform.h
Normal file
36
kernel-open/common/inc/nv-platform.h
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NV_PLATFORM_H
|
||||
#define NV_PLATFORM_H
|
||||
|
||||
#include "nv-linux.h"
|
||||
|
||||
irqreturn_t nvidia_isr (int, void *);
|
||||
irqreturn_t nvidia_isr_kthread_bh (int, void *);
|
||||
|
||||
#define NV_SUPPORTS_PLATFORM_DEVICE 0
|
||||
|
||||
#define NV_SUPPORTS_PLATFORM_DISPLAY_DEVICE 0
|
||||
|
||||
#endif
|
||||
@@ -41,7 +41,7 @@ void nv_procfs_remove_gpu (nv_linux_state_t *);
|
||||
|
||||
int nvidia_mmap (struct file *, struct vm_area_struct *);
|
||||
int nvidia_mmap_helper (nv_state_t *, nv_linux_file_private_t *, nvidia_stack_t *, struct vm_area_struct *, void *);
|
||||
int nv_encode_caching (pgprot_t *, NvU32, NvU32);
|
||||
int nv_encode_caching (pgprot_t *, NvU32, nv_memory_type_t);
|
||||
void nv_revoke_gpu_mappings_locked(nv_state_t *);
|
||||
|
||||
NvUPtr nv_vm_map_pages (struct page **, NvU32, NvBool, NvBool);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -63,4 +63,13 @@ static inline void nv_timer_setup(struct nv_timer *nv_timer,
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_timer_delete_sync(struct timer_list *timer)
|
||||
{
|
||||
#if !defined(NV_BSD) && NV_IS_EXPORT_SYMBOL_PRESENT_timer_delete_sync
|
||||
timer_delete_sync(timer);
|
||||
#else
|
||||
del_timer_sync(timer);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // __NV_TIMER_H__
|
||||
|
||||
@@ -168,6 +168,15 @@ typedef enum _TEGRASOC_WHICH_CLK
|
||||
TEGRASOC_WHICH_CLK_PLLA_DISP,
|
||||
TEGRASOC_WHICH_CLK_PLLA_DISPHUB,
|
||||
TEGRASOC_WHICH_CLK_PLLA,
|
||||
TEGRASOC_WHICH_CLK_EMC,
|
||||
TEGRASOC_WHICH_CLK_GPU_FIRST,
|
||||
TEGRASOC_WHICH_CLK_GPU_SYS = TEGRASOC_WHICH_CLK_GPU_FIRST,
|
||||
TEGRASOC_WHICH_CLK_GPU_NVD,
|
||||
TEGRASOC_WHICH_CLK_GPU_UPROC,
|
||||
TEGRASOC_WHICH_CLK_GPU_GPC0,
|
||||
TEGRASOC_WHICH_CLK_GPU_GPC1,
|
||||
TEGRASOC_WHICH_CLK_GPU_GPC2,
|
||||
TEGRASOC_WHICH_CLK_GPU_LAST = TEGRASOC_WHICH_CLK_GPU_GPC2,
|
||||
TEGRASOC_WHICH_CLK_MAX, // TEGRASOC_WHICH_CLK_MAX is defined for boundary checks only.
|
||||
} TEGRASOC_WHICH_CLK;
|
||||
|
||||
@@ -283,7 +292,6 @@ typedef struct nv_usermap_access_params_s
|
||||
MemoryArea memArea;
|
||||
NvU64 access_start;
|
||||
NvU64 access_size;
|
||||
NvU64 remap_prot_extra;
|
||||
NvBool contig;
|
||||
NvU32 caching;
|
||||
} nv_usermap_access_params_t;
|
||||
@@ -299,7 +307,6 @@ typedef struct nv_alloc_mapping_context_s {
|
||||
MemoryArea memArea;
|
||||
NvU64 access_start;
|
||||
NvU64 access_size;
|
||||
NvU64 remap_prot_extra;
|
||||
NvU32 prot;
|
||||
NvBool valid;
|
||||
NvU32 caching;
|
||||
@@ -567,24 +574,24 @@ typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemor
|
||||
* flags
|
||||
*/
|
||||
|
||||
#define NV_FLAG_OPEN 0x0001
|
||||
#define NV_FLAG_EXCLUDE 0x0002
|
||||
#define NV_FLAG_CONTROL 0x0004
|
||||
// Unused 0x0008
|
||||
#define NV_FLAG_SOC_DISPLAY 0x0010
|
||||
#define NV_FLAG_USES_MSI 0x0020
|
||||
#define NV_FLAG_USES_MSIX 0x0040
|
||||
#define NV_FLAG_PASSTHRU 0x0080
|
||||
#define NV_FLAG_SUSPENDED 0x0100
|
||||
#define NV_FLAG_SOC_IGPU 0x0200
|
||||
#define NV_FLAG_OPEN 0x0001
|
||||
#define NV_FLAG_EXCLUDE 0x0002
|
||||
#define NV_FLAG_CONTROL 0x0004
|
||||
// Unused 0x0008
|
||||
#define NV_FLAG_SOC_DISPLAY 0x0010
|
||||
#define NV_FLAG_USES_MSI 0x0020
|
||||
#define NV_FLAG_USES_MSIX 0x0040
|
||||
#define NV_FLAG_PASSTHRU 0x0080
|
||||
#define NV_FLAG_SUSPENDED 0x0100
|
||||
#define NV_FLAG_SOC_IGPU 0x0200
|
||||
/* To be set when an FLR needs to be triggered after device shut down. */
|
||||
#define NV_FLAG_TRIGGER_FLR 0x0400
|
||||
#define NV_FLAG_PERSISTENT_SW_STATE 0x0800
|
||||
#define NV_FLAG_IN_RECOVERY 0x1000
|
||||
// Unused 0x2000
|
||||
#define NV_FLAG_UNBIND_LOCK 0x4000
|
||||
#define NV_FLAG_TRIGGER_FLR 0x0400
|
||||
#define NV_FLAG_PERSISTENT_SW_STATE 0x0800
|
||||
#define NV_FLAG_IN_RECOVERY 0x1000
|
||||
#define NV_FLAG_PCI_REMOVE_IN_PROGRESS 0x2000
|
||||
#define NV_FLAG_UNBIND_LOCK 0x4000
|
||||
/* To be set when GPU is not present on the bus, to help device teardown */
|
||||
#define NV_FLAG_IN_SURPRISE_REMOVAL 0x8000
|
||||
#define NV_FLAG_IN_SURPRISE_REMOVAL 0x8000
|
||||
|
||||
typedef enum
|
||||
{
|
||||
@@ -798,7 +805,7 @@ NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU64, Nv
|
||||
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvU64, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_free_pages (nv_state_t *, NvU32, NvBool, NvU32, void *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_register_user_pages (nv_state_t *, NvU64, NvU64 *, void *, void **);
|
||||
NV_STATUS NV_API_CALL nv_register_user_pages (nv_state_t *, NvU64, NvU64 *, void *, void **, NvBool);
|
||||
void NV_API_CALL nv_unregister_user_pages (nv_state_t *, NvU64, void **, void **);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_register_peer_io_mem (nv_state_t *, NvU64 *, NvU64, void **);
|
||||
@@ -918,6 +925,15 @@ NV_STATUS NV_API_CALL nv_get_phys_pages (void *, void *, NvU32 *);
|
||||
|
||||
void NV_API_CALL nv_get_disp_smmu_stream_ids (nv_state_t *, NvU32 *, NvU32 *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_clk_get_handles (nv_state_t *);
|
||||
void NV_API_CALL nv_clk_clear_handles (nv_state_t *);
|
||||
NV_STATUS NV_API_CALL nv_enable_clk (nv_state_t *, TEGRASOC_WHICH_CLK);
|
||||
void NV_API_CALL nv_disable_clk (nv_state_t *, TEGRASOC_WHICH_CLK);
|
||||
NV_STATUS NV_API_CALL nv_get_curr_freq (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32 *);
|
||||
NV_STATUS NV_API_CALL nv_get_max_freq (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32 *);
|
||||
NV_STATUS NV_API_CALL nv_get_min_freq (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32 *);
|
||||
NV_STATUS NV_API_CALL nv_set_freq (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32);
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------
|
||||
*
|
||||
@@ -1043,6 +1059,9 @@ void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
|
||||
|
||||
NvBool NV_API_CALL rm_is_altstack_in_use(void);
|
||||
|
||||
void NV_API_CALL rm_notify_gpu_addition(nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_notify_gpu_removal(nvidia_stack_t *, nv_state_t *);
|
||||
|
||||
/* vGPU VFIO specific functions */
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *,
|
||||
NvU32 *, NvU32 *, NvU32);
|
||||
@@ -1057,7 +1076,7 @@ NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *,
|
||||
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *, NvU32, NvBool *);
|
||||
NV_STATUS NV_API_CALL nv_gpu_unbind_event(nvidia_stack_t *, NvU32, NvBool *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_access_params_t*);
|
||||
NV_STATUS NV_API_CALL nv_check_usermap_access_params(nv_state_t*, const nv_usermap_access_params_t*);
|
||||
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);
|
||||
void NV_API_CALL nv_flush_coherent_cpu_cache_range(nv_state_t *nv, NvU64 cpu_virtual, NvU64 size);
|
||||
|
||||
|
||||
120
kernel-open/common/inc/nv_common_utils.h
Normal file
120
kernel-open/common/inc/nv_common_utils.h
Normal file
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NV_COMMON_UTILS_H__
|
||||
#define __NV_COMMON_UTILS_H__
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvmisc.h"
|
||||
|
||||
#if !defined(TRUE)
|
||||
#define TRUE NV_TRUE
|
||||
#endif
|
||||
|
||||
#if !defined(FALSE)
|
||||
#define FALSE NV_FALSE
|
||||
#endif
|
||||
|
||||
#define NV_IS_UNSIGNED(x) ((__typeof__(x))-1 > 0)
|
||||
|
||||
/* Get the length of a statically-sized array. */
|
||||
#define ARRAY_LEN(_arr) (sizeof(_arr) / sizeof(_arr[0]))
|
||||
|
||||
#define NV_INVALID_HEAD 0xFFFFFFFF
|
||||
|
||||
#define NV_INVALID_CONNECTOR_PHYSICAL_INFORMATION (~0)
|
||||
|
||||
#if !defined(NV_MIN)
|
||||
# define NV_MIN(a,b) (((a)<(b))?(a):(b))
|
||||
#endif
|
||||
|
||||
#define NV_MIN3(a,b,c) NV_MIN(NV_MIN(a, b), c)
|
||||
#define NV_MIN4(a,b,c,d) NV_MIN3(NV_MIN(a,b),c,d)
|
||||
|
||||
#if !defined(NV_MAX)
|
||||
# define NV_MAX(a,b) (((a)>(b))?(a):(b))
|
||||
#endif
|
||||
|
||||
#define NV_MAX3(a,b,c) NV_MAX(NV_MAX(a, b), c)
|
||||
#define NV_MAX4(a,b,c,d) NV_MAX3(NV_MAX(a,b),c,d)
|
||||
|
||||
static inline int NV_LIMIT_VAL_TO_MIN_MAX(int val, int min, int max)
|
||||
{
|
||||
if (val < min) {
|
||||
return min;
|
||||
}
|
||||
if (val > max) {
|
||||
return max;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
#define NV_ROUNDUP_DIV(x,y) ((x) / (y) + (((x) % (y)) ? 1 : 0))
|
||||
|
||||
/*
|
||||
* Macros used for computing palette entries:
|
||||
*
|
||||
* NV_UNDER_REPLICATE(val, source_size, result_size) expands a value
|
||||
* of source_size bits into a value of target_size bits by shifting
|
||||
* the source value into the high bits and replicating the high bits
|
||||
* of the value into the low bits of the result.
|
||||
*
|
||||
* PALETTE_DEPTH_SHIFT(val, w) maps a colormap entry for a component
|
||||
* that has w bits to an appropriate entry in a LUT of 256 entries.
|
||||
*/
|
||||
static inline unsigned int NV_UNDER_REPLICATE(unsigned short val,
|
||||
int source_size,
|
||||
int result_size)
|
||||
{
|
||||
return (val << (result_size - source_size)) |
|
||||
(val >> ((source_size << 1) - result_size));
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned short PALETTE_DEPTH_SHIFT(unsigned short val, int depth)
|
||||
{
|
||||
return NV_UNDER_REPLICATE(val, depth, 8);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use __builtin_ffs where it is supported, or provide an equivalent
|
||||
* implementation for platforms like riscv where it is not.
|
||||
*/
|
||||
#if defined(__GNUC__) && !NVCPU_IS_RISCV64
|
||||
static inline int nv_ffs(int x)
|
||||
{
|
||||
return __builtin_ffs(x);
|
||||
}
|
||||
#else
|
||||
static inline int nv_ffs(int x)
|
||||
{
|
||||
if (x == 0)
|
||||
return 0;
|
||||
|
||||
LOWESTBITIDX_32(x);
|
||||
|
||||
return 1 + x;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NV_COMMON_UTILS_H__ */
|
||||
370
kernel-open/common/inc/nv_dpy_id.h
Normal file
370
kernel-open/common/inc/nv_dpy_id.h
Normal file
@@ -0,0 +1,370 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2010-2014 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
/*
|
||||
* This header file defines the types NVDpyId and NVDpyIdList, as well
|
||||
* as inline functions to manipulate these types. NVDpyId and
|
||||
* NVDpyIdList should be treated as opaque by includers of this header
|
||||
* file.
|
||||
*/
|
||||
|
||||
#ifndef __NV_DPY_ID_H__
|
||||
#define __NV_DPY_ID_H__
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvmisc.h"
|
||||
#include "nv_common_utils.h"
|
||||
#include <nvlimits.h> /* NV_MAX_SUBDEVICES */
|
||||
|
||||
typedef struct {
|
||||
NvU32 opaqueDpyId;
|
||||
} NVDpyId;
|
||||
|
||||
typedef struct {
|
||||
NvU32 opaqueDpyIdList;
|
||||
} NVDpyIdList;
|
||||
|
||||
#define NV_DPY_ID_MAX_SUBDEVICES NV_MAX_SUBDEVICES
|
||||
#define NV_DPY_ID_MAX_DPYS_IN_LIST 32
|
||||
|
||||
/*
|
||||
* For use in combination with nvDpyIdToPrintFormat(); e.g.,
|
||||
*
|
||||
* printf("dpy id: " NV_DPY_ID_PRINT_FORMAT "\n",
|
||||
* nvDpyIdToPrintFormat(dpyId));
|
||||
*
|
||||
* The includer should not make assumptions about the return type of
|
||||
* nvDpyIdToPrintFormat().
|
||||
*/
|
||||
#define NV_DPY_ID_PRINT_FORMAT "0x%08x"
|
||||
|
||||
/* functions to return an invalid DpyId and empty DpyIdList */
|
||||
|
||||
static inline NVDpyId nvInvalidDpyId(void)
|
||||
{
|
||||
NVDpyId dpyId = { 0 };
|
||||
return dpyId;
|
||||
}
|
||||
|
||||
static inline NVDpyIdList nvEmptyDpyIdList(void)
|
||||
{
|
||||
NVDpyIdList dpyIdList = { 0 };
|
||||
return dpyIdList;
|
||||
}
|
||||
|
||||
static inline NVDpyIdList nvAllDpyIdList(void)
|
||||
{
|
||||
NVDpyIdList dpyIdList = { ~0U };
|
||||
return dpyIdList;
|
||||
}
|
||||
|
||||
static inline void
|
||||
nvEmptyDpyIdListSubDeviceArray(NVDpyIdList dpyIdList[NV_DPY_ID_MAX_SUBDEVICES])
|
||||
{
|
||||
int dispIndex;
|
||||
for (dispIndex = 0; dispIndex < NV_DPY_ID_MAX_SUBDEVICES; dispIndex++) {
|
||||
dpyIdList[dispIndex] = nvEmptyDpyIdList();
|
||||
}
|
||||
}
|
||||
|
||||
/* set operations on DpyIds and DpyIdLists: Add, Subtract, Intersect, Xor */
|
||||
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvAddDpyIdToDpyIdList(NVDpyId dpyId, NVDpyIdList dpyIdList)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList |
|
||||
dpyId.opaqueDpyId;
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
/* Passing an invalid display ID makes this function return an empty list. */
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvAddDpyIdToEmptyDpyIdList(NVDpyId dpyId)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyId.opaqueDpyId;
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvAddDpyIdListToDpyIdList(NVDpyIdList dpyIdListA,
|
||||
NVDpyIdList dpyIdListB)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdListB.opaqueDpyIdList |
|
||||
dpyIdListA.opaqueDpyIdList;
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
/* Returns: dpyIdList - dpyId */
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvDpyIdListMinusDpyId(NVDpyIdList dpyIdList, NVDpyId dpyId)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList &
|
||||
(~dpyId.opaqueDpyId);
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
/* Returns: dpyIdListA - dpyIdListB */
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvDpyIdListMinusDpyIdList(NVDpyIdList dpyIdListA,
|
||||
NVDpyIdList dpyIdListB)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdListA.opaqueDpyIdList &
|
||||
(~dpyIdListB.opaqueDpyIdList);
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvIntersectDpyIdAndDpyIdList(NVDpyId dpyId, NVDpyIdList dpyIdList)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList &
|
||||
dpyId.opaqueDpyId;
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvIntersectDpyIdListAndDpyIdList(NVDpyIdList dpyIdListA,
|
||||
NVDpyIdList dpyIdListB)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdListA.opaqueDpyIdList &
|
||||
dpyIdListB.opaqueDpyIdList;
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvXorDpyIdAndDpyIdList(NVDpyId dpyId, NVDpyIdList dpyIdList)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList ^
|
||||
dpyId.opaqueDpyId;
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvXorDpyIdListAndDpyIdList(NVDpyIdList dpyIdListA,
|
||||
NVDpyIdList dpyIdListB)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdListA.opaqueDpyIdList ^
|
||||
dpyIdListB.opaqueDpyIdList;
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
|
||||
/* boolean checks */
|
||||
|
||||
static inline NvBool nvDpyIdIsInDpyIdList(NVDpyId dpyId,
|
||||
NVDpyIdList dpyIdList)
|
||||
{
|
||||
return !!(dpyIdList.opaqueDpyIdList & dpyId.opaqueDpyId);
|
||||
}
|
||||
|
||||
static inline NvBool nvDpyIdIsInvalid(NVDpyId dpyId)
|
||||
{
|
||||
return (dpyId.opaqueDpyId == 0);
|
||||
}
|
||||
|
||||
static inline NvBool nvDpyIdListIsEmpty(NVDpyIdList dpyIdList)
|
||||
{
|
||||
return (dpyIdList.opaqueDpyIdList == 0);
|
||||
}
|
||||
|
||||
static inline NvBool
|
||||
nvDpyIdListSubDeviceArrayIsEmpty(NVDpyIdList
|
||||
dpyIdList[NV_DPY_ID_MAX_SUBDEVICES])
|
||||
{
|
||||
int dispIndex;
|
||||
for (dispIndex = 0; dispIndex < NV_DPY_ID_MAX_SUBDEVICES; dispIndex++) {
|
||||
if (!nvDpyIdListIsEmpty(dpyIdList[dispIndex])) {
|
||||
return NV_FALSE;
|
||||
}
|
||||
}
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
|
||||
static inline NvBool nvDpyIdsAreEqual(NVDpyId dpyIdA, NVDpyId dpyIdB)
|
||||
{
|
||||
return (dpyIdA.opaqueDpyId == dpyIdB.opaqueDpyId);
|
||||
}
|
||||
|
||||
static inline NvBool nvDpyIdListsAreEqual(NVDpyIdList dpyIdListA,
|
||||
NVDpyIdList dpyIdListB)
|
||||
{
|
||||
return (dpyIdListA.opaqueDpyIdList == dpyIdListB.opaqueDpyIdList);
|
||||
}
|
||||
|
||||
static inline NvBool nvDpyIdListIsASubSetofDpyIdList(NVDpyIdList dpyIdListA,
|
||||
NVDpyIdList dpyIdListB)
|
||||
{
|
||||
NVDpyIdList intersectedDpyIdList =
|
||||
nvIntersectDpyIdListAndDpyIdList(dpyIdListA, dpyIdListB);
|
||||
|
||||
return nvDpyIdListsAreEqual(intersectedDpyIdList, dpyIdListA);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* retrieve the individual dpyIds from dpyIdList; if dpyId is invalid,
|
||||
* start at the beginning of the list; otherwise, start at the dpyId
|
||||
* after the specified dpyId
|
||||
*/
|
||||
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyId nvNextDpyIdInDpyIdListUnsorted(NVDpyId dpyId, NVDpyIdList dpyIdList)
|
||||
{
|
||||
if (nvDpyIdIsInvalid(dpyId)) {
|
||||
dpyId.opaqueDpyId = 1;
|
||||
} else {
|
||||
dpyId.opaqueDpyId <<= 1;
|
||||
}
|
||||
|
||||
while (dpyId.opaqueDpyId) {
|
||||
|
||||
if (nvDpyIdIsInDpyIdList(dpyId, dpyIdList)) {
|
||||
return dpyId;
|
||||
}
|
||||
|
||||
dpyId.opaqueDpyId <<= 1;
|
||||
}
|
||||
|
||||
/* no dpyIds left in dpyIdlist; return the invalid dpyId */
|
||||
|
||||
return nvInvalidDpyId();
|
||||
}
|
||||
|
||||
#define FOR_ALL_DPY_IDS(_dpyId, _dpyIdList) \
|
||||
for ((_dpyId) = nvNextDpyIdInDpyIdListUnsorted(nvInvalidDpyId(), \
|
||||
(_dpyIdList)); \
|
||||
!nvDpyIdIsInvalid(_dpyId); \
|
||||
(_dpyId) = nvNextDpyIdInDpyIdListUnsorted((_dpyId), \
|
||||
(_dpyIdList)))
|
||||
|
||||
/* report how many dpyIds are in the dpyIdList */
|
||||
|
||||
static inline int nvCountDpyIdsInDpyIdList(NVDpyIdList dpyIdList)
|
||||
{
|
||||
return nvPopCount32(dpyIdList.opaqueDpyIdList);
|
||||
}
|
||||
|
||||
static inline int
|
||||
nvCountDpyIdsInDpyIdListSubDeviceArray(NVDpyIdList
|
||||
dpyIdList[NV_DPY_ID_MAX_SUBDEVICES])
|
||||
{
|
||||
int dispIndex, n = 0;
|
||||
|
||||
for (dispIndex = 0; dispIndex < NV_DPY_ID_MAX_SUBDEVICES; dispIndex++) {
|
||||
n += nvCountDpyIdsInDpyIdList(dpyIdList[dispIndex]);
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/* convert between dpyId/dpyIdList and NV-CONTROL values */
|
||||
|
||||
static inline int nvDpyIdToNvControlVal(NVDpyId dpyId)
|
||||
{
|
||||
return (int) dpyId.opaqueDpyId;
|
||||
}
|
||||
|
||||
static inline int nvDpyIdListToNvControlVal(NVDpyIdList dpyIdList)
|
||||
{
|
||||
return (int) dpyIdList.opaqueDpyIdList;
|
||||
}
|
||||
|
||||
static inline NVDpyId nvNvControlValToDpyId(int val)
|
||||
{
|
||||
NVDpyId dpyId;
|
||||
dpyId.opaqueDpyId = (val == 0) ? 0 : 1 << (nv_ffs(val)-1);
|
||||
return dpyId;
|
||||
}
|
||||
|
||||
static inline NVDpyIdList nvNvControlValToDpyIdList(int val)
|
||||
{
|
||||
NVDpyIdList dpyIdList;
|
||||
dpyIdList.opaqueDpyIdList = val;
|
||||
return dpyIdList;
|
||||
}
|
||||
|
||||
|
||||
/* convert between dpyId and NvU32 */
|
||||
|
||||
static inline NVDpyId nvNvU32ToDpyId(NvU32 val)
|
||||
{
|
||||
NVDpyId dpyId;
|
||||
dpyId.opaqueDpyId = (val == 0) ? 0 : 1 << (nv_ffs(val)-1);
|
||||
return dpyId;
|
||||
}
|
||||
|
||||
static inline NVDpyIdList nvNvU32ToDpyIdList(NvU32 val)
|
||||
{
|
||||
NVDpyIdList dpyIdList;
|
||||
dpyIdList.opaqueDpyIdList = val;
|
||||
return dpyIdList;
|
||||
}
|
||||
|
||||
static inline NvU32 nvDpyIdToNvU32(NVDpyId dpyId)
|
||||
{
|
||||
return dpyId.opaqueDpyId;
|
||||
}
|
||||
|
||||
static inline NvU32 nvDpyIdListToNvU32(NVDpyIdList dpyIdList)
|
||||
{
|
||||
return dpyIdList.opaqueDpyIdList;
|
||||
}
|
||||
|
||||
/* Return the bit position of dpyId: a number in the range [0..31]. */
|
||||
static inline NvU32 nvDpyIdToIndex(NVDpyId dpyId)
|
||||
{
|
||||
return nv_ffs(dpyId.opaqueDpyId) - 1;
|
||||
}
|
||||
|
||||
/* Return a display ID that is not in the list passed in. */
|
||||
|
||||
static inline NVDpyId nvNewDpyId(NVDpyIdList excludeList)
|
||||
{
|
||||
NVDpyId dpyId;
|
||||
if (~excludeList.opaqueDpyIdList == 0) {
|
||||
return nvInvalidDpyId();
|
||||
}
|
||||
dpyId.opaqueDpyId =
|
||||
1U << (nv_ffs(~excludeList.opaqueDpyIdList) - 1);
|
||||
return dpyId;
|
||||
}
|
||||
|
||||
/* See comment for NV_DPY_ID_PRINT_FORMAT. */
|
||||
static inline NvU32 nvDpyIdToPrintFormat(NVDpyId dpyId)
|
||||
{
|
||||
return nvDpyIdToNvU32(dpyId);
|
||||
}
|
||||
|
||||
/* Prevent usage of opaque values. */
|
||||
#define opaqueDpyId __ERROR_ACCESS_ME_VIA_NV_DPY_ID_H
|
||||
#define opaqueDpyIdList __ERROR_ACCESS_ME_VIA_NV_DPY_ID_H
|
||||
|
||||
#endif /* __NV_DPY_ID_H__ */
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -20,8 +20,8 @@
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef __NV_SMG_H__
|
||||
#define __NV_SMG_H__
|
||||
#ifndef __NV_MIG_TYPES_H__
|
||||
#define __NV_MIG_TYPES_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -29,25 +29,12 @@ extern "C" {
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
/*
|
||||
* The simplest required abstraction for accessing RM independent of the
|
||||
* calling component which may be a kernel module or userspace driver.
|
||||
*/
|
||||
typedef NvU32 (*NVSubdevSMGRMControl) (void *ctx, NvU32 object, NvU32 cmd, void *params, NvU32 paramsSize);
|
||||
typedef NvU32 (*NVSubdevSMGRMAlloc) (void *ctx, NvU32 parent, NvU32 object, NvU32 cls, void *allocParams);
|
||||
typedef NvU32 (*NVSubdevSMGRMFree) (void *ctx, NvU32 parent, NvU32 object);
|
||||
typedef NvU32 MIGDeviceId;
|
||||
|
||||
NvBool NVSubdevSMGSetPartition(void *ctx,
|
||||
NvU32 subdevHandle,
|
||||
const char *computeInstUuid,
|
||||
NvU32 gpuInstSubscriptionHdl,
|
||||
NvU32 computeInstSubscriptionHdl,
|
||||
NVSubdevSMGRMControl rmControl,
|
||||
NVSubdevSMGRMAlloc rmAlloc,
|
||||
NVSubdevSMGRMFree rmFree);
|
||||
#define NO_MIG_DEVICE 0L
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NV_SMG_H__ */
|
||||
#endif /* __NV_MIG_TYPES_H__ */
|
||||
@@ -660,14 +660,20 @@ NV_STATUS nvUvmInterfaceServiceDeviceInterruptsRM(uvmGpuDeviceHandle device);
|
||||
RM will propagate the update to all channels using the provided VA space.
|
||||
All channels must be idle when this call is made.
|
||||
|
||||
If the pageDirectory is in system memory then a CPU physical address must be
|
||||
provided. RM will establish and manage the DMA mapping for the
|
||||
pageDirectory.
|
||||
|
||||
Arguments:
|
||||
vaSpace[IN} - VASpace Object
|
||||
physAddress[IN] - Physical address of new page directory
|
||||
physAddress[IN] - Physical address of new page directory. If
|
||||
!bVidMemAperture this is a CPU physical address.
|
||||
numEntries[IN] - Number of entries including previous PDE which will be copied
|
||||
bVidMemAperture[IN] - If set pageDirectory will reside in VidMem aperture else sysmem
|
||||
pasid[IN] - PASID (Process Address Space IDentifier) of the process
|
||||
corresponding to the VA space. Ignored unless the VA space
|
||||
object has ATS enabled.
|
||||
dmaAddress[OUT] - DMA mapping created for physAddress.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_GENERIC
|
||||
@@ -675,7 +681,8 @@ NV_STATUS nvUvmInterfaceServiceDeviceInterruptsRM(uvmGpuDeviceHandle device);
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceSetPageDirectory(uvmGpuAddressSpaceHandle vaSpace,
|
||||
NvU64 physAddress, unsigned numEntries,
|
||||
NvBool bVidMemAperture, NvU32 pasid);
|
||||
NvBool bVidMemAperture, NvU32 pasid,
|
||||
NvU64 *dmaAddress);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceUnsetPageDirectory
|
||||
@@ -1862,5 +1869,4 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
|
||||
NV_STATUS nvUvmInterfaceCslLogEncryption(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation,
|
||||
NvU32 bufferSize);
|
||||
|
||||
#endif // _NV_UVM_INTERFACE_H_
|
||||
|
||||
@@ -268,6 +268,7 @@ typedef struct UvmGpuChannelInfo_tag
|
||||
|
||||
// The errorNotifier is filled out when the channel hits an RC error.
|
||||
NvNotification *errorNotifier;
|
||||
|
||||
NvNotification *keyRotationNotifier;
|
||||
|
||||
NvU32 hwRunlistId;
|
||||
@@ -297,6 +298,7 @@ typedef struct UvmGpuChannelInfo_tag
|
||||
NvU64 gpFifoGpuVa;
|
||||
NvU64 gpPutGpuVa;
|
||||
NvU64 gpGetGpuVa;
|
||||
|
||||
// GPU VA of work submission offset is needed in Confidential Computing
|
||||
// so CE channels can ring doorbell of other channels as required for
|
||||
// WLC/LCIC work submission
|
||||
@@ -374,6 +376,9 @@ typedef struct
|
||||
// True if the CE can be used for P2P transactions
|
||||
NvBool p2p:1;
|
||||
|
||||
// True if the CE supports encryption
|
||||
NvBool secure:1;
|
||||
|
||||
// Mask of physical CEs assigned to this LCE
|
||||
//
|
||||
// The value returned by RM for this field may change when a GPU is
|
||||
@@ -1007,17 +1012,17 @@ typedef struct UvmGpuFaultInfo_tag
|
||||
NvU32 replayableFaultMask;
|
||||
|
||||
// Fault buffer CPU mapping
|
||||
void* bufferAddress;
|
||||
//
|
||||
// When Confidential Computing is disabled, the mapping points to the
|
||||
// actual HW fault buffer.
|
||||
//
|
||||
// When Confidential Computing is enabled, the mapping points to a
|
||||
// copy of the HW fault buffer. This "shadow buffer" is maintained
|
||||
// by GSP-RM.
|
||||
void* bufferAddress;
|
||||
|
||||
// Size, in bytes, of the fault buffer pointed by bufferAddress.
|
||||
NvU32 bufferSize;
|
||||
|
||||
// Mapping pointing to the start of the fault buffer metadata containing
|
||||
// a 16Byte authentication tag and a valid byte. Always NULL when
|
||||
// Confidential Computing is disabled.
|
||||
|
||||
37
kernel-open/common/inc/nvi2c.h
Normal file
37
kernel-open/common/inc/nvi2c.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef _NV_I2C_H_
|
||||
#define _NV_I2C_H_
|
||||
|
||||
#define NV_I2C_MSG_WR 0x0000
|
||||
#define NV_I2C_MSG_RD 0x0001
|
||||
|
||||
typedef struct nv_i2c_msg_s
|
||||
{
|
||||
NvU16 addr;
|
||||
NvU16 flags;
|
||||
NvU16 len;
|
||||
NvU8* buf;
|
||||
} nv_i2c_msg_t;
|
||||
|
||||
#endif
|
||||
96
kernel-open/common/inc/nvimpshared.h
Normal file
96
kernel-open/common/inc/nvimpshared.h
Normal file
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/******************************************************************************\
|
||||
* *
|
||||
* Description: *
|
||||
* Accommodates sharing of IMP-related structures between kernel interface *
|
||||
* files and core RM. *
|
||||
* *
|
||||
\******************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <nvtypes.h>
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable:4324)
|
||||
#endif
|
||||
|
||||
//
|
||||
// This file was generated with FINN, an NVIDIA coding tool.
|
||||
// Source file: nvimpshared.finn
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
//
|
||||
// There are only a small number of discrete dramclk frequencies available on
|
||||
// the system. This structure contains IMP-relevant information associated
|
||||
// with a specific dramclk frequency.
|
||||
//
|
||||
typedef struct DRAM_CLK_INSTANCE {
|
||||
NvU32 dram_clk_freq_khz;
|
||||
|
||||
NvU32 mchub_clk_khz;
|
||||
|
||||
NvU32 mc_clk_khz;
|
||||
|
||||
NvU32 max_iso_bw_kbps;
|
||||
|
||||
//
|
||||
// switch_latency_ns is the maximum time required to switch the dramclk
|
||||
// frequency to the frequency specified in dram_clk_freq_khz.
|
||||
//
|
||||
NvU32 switch_latency_ns;
|
||||
} DRAM_CLK_INSTANCE;
|
||||
|
||||
//
|
||||
// This table is used to collect information from other modules that is needed
|
||||
// for RM IMP calculations. (Used on Tegra only.)
|
||||
//
|
||||
typedef struct TEGRA_IMP_IMPORT_DATA {
|
||||
//
|
||||
// max_iso_bw_kbps stores the maximum possible ISO bandwidth available to
|
||||
// display, assuming display is the only active ISO client. (Note that ISO
|
||||
// bandwidth will typically be allocated to multiple clients, so display
|
||||
// will generally not have access to the maximum possible bandwidth.)
|
||||
//
|
||||
NvU32 max_iso_bw_kbps;
|
||||
|
||||
// On Orin, each dram channel is 16 bits wide.
|
||||
NvU32 num_dram_channels;
|
||||
|
||||
//
|
||||
// dram_clk_instance stores entries for all possible dramclk frequencies,
|
||||
// sorted by dramclk frequency in increasing order.
|
||||
//
|
||||
// "24" is expected to be larger than the actual number of required entries
|
||||
// (which is provided by a BPMP API), but it can be increased if necessary.
|
||||
//
|
||||
// num_dram_clk_entries is filled in with the actual number of distinct
|
||||
// dramclk entries.
|
||||
//
|
||||
NvU32 num_dram_clk_entries;
|
||||
DRAM_CLK_INSTANCE dram_clk_instance[24];
|
||||
} TEGRA_IMP_IMPORT_DATA;
|
||||
@@ -640,22 +640,28 @@ enum NvKmsInputColorRange {
|
||||
* If DEFAULT is provided, driver will assume full range for RGB formats
|
||||
* and limited range for YUV formats.
|
||||
*/
|
||||
NVKMS_INPUT_COLORRANGE_DEFAULT = 0,
|
||||
NVKMS_INPUT_COLOR_RANGE_DEFAULT = 0,
|
||||
|
||||
NVKMS_INPUT_COLORRANGE_LIMITED = 1,
|
||||
NVKMS_INPUT_COLOR_RANGE_LIMITED = 1,
|
||||
|
||||
NVKMS_INPUT_COLORRANGE_FULL = 2,
|
||||
NVKMS_INPUT_COLOR_RANGE_FULL = 2,
|
||||
};
|
||||
|
||||
enum NvKmsInputColorSpace {
|
||||
/* Unknown colorspace; no de-gamma will be applied */
|
||||
NVKMS_INPUT_COLORSPACE_NONE = 0,
|
||||
/* Unknown colorspace */
|
||||
NVKMS_INPUT_COLOR_SPACE_NONE = 0,
|
||||
|
||||
/* Linear, Rec.709 [-0.5, 7.5) */
|
||||
NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR = 1,
|
||||
NVKMS_INPUT_COLOR_SPACE_BT601 = 1,
|
||||
NVKMS_INPUT_COLOR_SPACE_BT709 = 2,
|
||||
NVKMS_INPUT_COLOR_SPACE_BT2020 = 3,
|
||||
NVKMS_INPUT_COLOR_SPACE_BT2100 = NVKMS_INPUT_COLOR_SPACE_BT2020,
|
||||
|
||||
/* PQ, Rec.2020 unity */
|
||||
NVKMS_INPUT_COLORSPACE_BT2100_PQ = 2,
|
||||
NVKMS_INPUT_COLOR_SPACE_SCRGB = 4
|
||||
};
|
||||
|
||||
enum NvKmsInputTf {
|
||||
NVKMS_INPUT_TF_LINEAR = 0,
|
||||
NVKMS_INPUT_TF_PQ = 1
|
||||
};
|
||||
|
||||
enum NvKmsOutputColorimetry {
|
||||
|
||||
@@ -24,8 +24,10 @@
|
||||
#if !defined(__NVKMS_KAPI_H__)
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nv_mig_types.h"
|
||||
|
||||
#include "nv-gpu-info.h"
|
||||
#include "nv_dpy_id.h"
|
||||
#include "nvkms-api-types.h"
|
||||
#include "nvkms-format.h"
|
||||
|
||||
@@ -173,12 +175,18 @@ struct NvKmsKapiDeviceResourcesInfo {
|
||||
NvBool supportsSyncpts;
|
||||
|
||||
NvBool requiresVrrSemaphores;
|
||||
|
||||
NvBool supportsInputColorRange;
|
||||
NvBool supportsInputColorSpace;
|
||||
} caps;
|
||||
|
||||
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
|
||||
NvBool supportsICtCp[NVKMS_KAPI_LAYER_MAX];
|
||||
|
||||
struct NvKmsKapiLutCaps lutCaps;
|
||||
|
||||
NvU64 vtFbBaseAddress;
|
||||
NvU64 vtFbSize;
|
||||
};
|
||||
|
||||
#define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
|
||||
@@ -204,6 +212,7 @@ struct NvKmsKapiConnectorInfo {
|
||||
NvU32 numIncompatibleConnectors;
|
||||
NvKmsKapiConnector incompatibleConnectorHandles[NVKMS_KAPI_MAX_CONNECTORS];
|
||||
|
||||
NVDpyIdList dynamicDpyIdList;
|
||||
};
|
||||
|
||||
struct NvKmsKapiStaticDisplayInfo {
|
||||
@@ -222,6 +231,8 @@ struct NvKmsKapiStaticDisplayInfo {
|
||||
NvKmsKapiDisplay possibleCloneHandles[NVKMS_KAPI_MAX_CLONE_DISPLAYS];
|
||||
|
||||
NvU32 headMask;
|
||||
|
||||
NvBool isDpMST;
|
||||
};
|
||||
|
||||
struct NvKmsKapiSyncParams {
|
||||
@@ -260,7 +271,8 @@ struct NvKmsKapiLayerConfig {
|
||||
NvBool enabled;
|
||||
} hdrMetadata;
|
||||
|
||||
enum NvKmsOutputTf tf;
|
||||
enum NvKmsInputTf inputTf;
|
||||
enum NvKmsOutputTf outputTf;
|
||||
|
||||
NvU8 minPresentInterval;
|
||||
NvBool tearing;
|
||||
@@ -272,6 +284,7 @@ struct NvKmsKapiLayerConfig {
|
||||
NvU16 dstWidth, dstHeight;
|
||||
|
||||
enum NvKmsInputColorSpace inputColorSpace;
|
||||
enum NvKmsInputColorRange inputColorRange;
|
||||
|
||||
struct {
|
||||
NvBool enabled;
|
||||
@@ -315,7 +328,10 @@ struct NvKmsKapiLayerRequestedConfig {
|
||||
NvBool dstXYChanged : 1;
|
||||
NvBool dstWHChanged : 1;
|
||||
NvBool cscChanged : 1;
|
||||
NvBool tfChanged : 1;
|
||||
NvBool inputTfChanged : 1;
|
||||
NvBool outputTfChanged : 1;
|
||||
NvBool inputColorSpaceChanged : 1;
|
||||
NvBool inputColorRangeChanged : 1;
|
||||
NvBool hdrMetadataChanged : 1;
|
||||
NvBool matrixOverridesChanged : 1;
|
||||
NvBool ilutChanged : 1;
|
||||
@@ -481,6 +497,8 @@ struct NvKmsKapiEvent {
|
||||
struct NvKmsKapiAllocateDeviceParams {
|
||||
/* [IN] GPU ID obtained from enumerateGpus() */
|
||||
NvU32 gpuId;
|
||||
/* [IN] MIG device if requested */
|
||||
MIGDeviceId migDevice;
|
||||
|
||||
/* [IN] Private data of device allocator */
|
||||
void *privateData;
|
||||
@@ -563,6 +581,11 @@ typedef enum NvKmsKapiRegisterWaiterResultRec {
|
||||
|
||||
typedef void NvKmsKapiSuspendResumeCallbackFunc(NvBool suspend);
|
||||
|
||||
struct NvKmsKapiGpuInfo {
|
||||
nv_gpu_info_t gpuInfo;
|
||||
MIGDeviceId migDevice;
|
||||
};
|
||||
|
||||
struct NvKmsKapiFunctionsTable {
|
||||
|
||||
/*!
|
||||
@@ -586,7 +609,7 @@ struct NvKmsKapiFunctionsTable {
|
||||
*
|
||||
* \return Count of enumerated gpus.
|
||||
*/
|
||||
NvU32 (*enumerateGpus)(nv_gpu_info_t *gpuInfo);
|
||||
NvU32 (*enumerateGpus)(struct NvKmsKapiGpuInfo *kapiGpuInfo);
|
||||
|
||||
/*!
|
||||
* Allocate an NVK device using which you can query/allocate resources on
|
||||
@@ -1559,6 +1582,26 @@ struct NvKmsKapiFunctionsTable {
|
||||
NvS32 index
|
||||
);
|
||||
|
||||
/*!
|
||||
* Check or wait on a head's LUT notifier.
|
||||
*
|
||||
* \param [in] device A device allocated using allocateDevice().
|
||||
*
|
||||
* \param [in] head The head to check for LUT completion.
|
||||
*
|
||||
* \param [in] waitForCompletion If true, wait for the notifier in NvKms
|
||||
* before returning.
|
||||
*
|
||||
* \param [out] complete Returns whether the notifier has completed.
|
||||
*/
|
||||
NvBool
|
||||
(*checkLutNotifier)
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvU32 head,
|
||||
NvBool waitForCompletion
|
||||
);
|
||||
|
||||
/*
|
||||
* Notify NVKMS that the system's framebuffer console has been disabled and
|
||||
* the reserved allocation for the old framebuffer console can be unmapped.
|
||||
|
||||
@@ -701,11 +701,6 @@ nvPrevPow2_U64(const NvU64 x )
|
||||
} \
|
||||
}
|
||||
|
||||
//
|
||||
// Bug 4851259: Newly added functions must be hidden from certain HS-signed
|
||||
// ucode compilers to avoid signature mismatch.
|
||||
//
|
||||
#ifndef NVDEC_1_0
|
||||
/*!
|
||||
* Returns the position of nth set bit in the given mask.
|
||||
*
|
||||
@@ -735,8 +730,6 @@ nvGetNthSetBitIndex32(NvU32 mask, NvU32 n)
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif // NVDEC_1_0
|
||||
|
||||
//
|
||||
// Size to use when declaring variable-sized arrays
|
||||
//
|
||||
@@ -780,12 +773,15 @@ nvGetNthSetBitIndex32(NvU32 mask, NvU32 n)
|
||||
// Returns the offset (in bytes) of 'member' in struct 'type'.
|
||||
#ifndef NV_OFFSETOF
|
||||
#if defined(__GNUC__) && (__GNUC__ > 3)
|
||||
#define NV_OFFSETOF(type, member) ((NvU32)__builtin_offsetof(type, member))
|
||||
#define NV_OFFSETOF(type, member) ((NvUPtr) __builtin_offsetof(type, member))
|
||||
#else
|
||||
#define NV_OFFSETOF(type, member) ((NvU32)(NvU64)&(((type *)0)->member)) // shouldn't we use PtrToUlong? But will need to include windows header.
|
||||
#define NV_OFFSETOF(type, member) ((NvUPtr) &(((type *)0)->member))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Given a pointer and the member it is of the parent struct, return a pointer to the parent struct
|
||||
#define NV_CONTAINEROF(ptr, type, member) ((type *) (((NvUPtr) ptr) - NV_OFFSETOF(type, member)))
|
||||
|
||||
//
|
||||
// Performs a rounded division of b into a (unsigned). For SIGNED version of
|
||||
// NV_ROUNDED_DIV() macro check the comments in bug 769777.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -159,7 +159,11 @@ NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_NOT_READY, 0x00000081, "Nvlink Fabri
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_FAILURE, 0x00000082, "Nvlink Fabric Probe failed")
|
||||
NV_STATUS_CODE(NV_ERR_GPU_MEMORY_ONLINING_FAILURE, 0x00000083, "GPU Memory Onlining failed")
|
||||
NV_STATUS_CODE(NV_ERR_REDUCTION_MANAGER_NOT_AVAILABLE, 0x00000084, "Reduction Manager is not available")
|
||||
NV_STATUS_CODE(NV_ERR_THRESHOLD_CROSSED, 0x00000085, "A fatal threshold has been crossed")
|
||||
NV_STATUS_CODE(NV_ERR_RESOURCE_RETIREMENT_ERROR, 0x00000086, "An error occurred while trying to retire a resource")
|
||||
NV_STATUS_CODE(NV_ERR_FABRIC_STATE_OUT_OF_SYNC, 0x00000087, "NVLink fabric state cached by the driver is out of sync")
|
||||
NV_STATUS_CODE(NV_ERR_BUFFER_FULL, 0x00000088, "Buffer is full")
|
||||
NV_STATUS_CODE(NV_ERR_BUFFER_EMPTY, 0x00000089, "Buffer is empty")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
@@ -170,5 +174,6 @@ NV_STATUS_CODE(NV_WARN_MORE_PROCESSING_REQUIRED, 0x00010005, "WARNING More
|
||||
NV_STATUS_CODE(NV_WARN_NOTHING_TO_DO, 0x00010006, "WARNING Nothing to do")
|
||||
NV_STATUS_CODE(NV_WARN_NULL_OBJECT, 0x00010007, "WARNING NULL object found")
|
||||
NV_STATUS_CODE(NV_WARN_OUT_OF_RANGE, 0x00010008, "WARNING value out of range")
|
||||
NV_STATUS_CODE(NV_WARN_THRESHOLD_CROSSED, 0x00010009, "WARNING Threshold has been crossed")
|
||||
|
||||
#endif /* SDK_NVSTATUSCODES_H */
|
||||
|
||||
@@ -229,6 +229,7 @@ extern NvU64 os_page_mask;
|
||||
extern NvU8 os_page_shift;
|
||||
extern NvBool os_cc_enabled;
|
||||
extern NvBool os_cc_sev_snp_enabled;
|
||||
extern NvBool os_cc_sme_enabled;
|
||||
extern NvBool os_cc_snp_vtom_enabled;
|
||||
extern NvBool os_cc_tdx_enabled;
|
||||
extern NvBool os_dma_buf_enabled;
|
||||
|
||||
387
kernel-open/common/inc/os_dsi_panel_props.h
Normal file
387
kernel-open/common/inc/os_dsi_panel_props.h
Normal file
@@ -0,0 +1,387 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef _OS_DSI_PANEL_PARAMS_H_
|
||||
#define _OS_DSI_PANEL_PARAMS_H_
|
||||
|
||||
#define DSI_GENERIC_LONG_WRITE 0x29
|
||||
#define DSI_DCS_LONG_WRITE 0x39
|
||||
#define DSI_GENERIC_SHORT_WRITE_1_PARAMS 0x13
|
||||
#define DSI_GENERIC_SHORT_WRITE_2_PARAMS 0x23
|
||||
#define DSI_DCS_WRITE_0_PARAM 0x05
|
||||
#define DSI_DCS_WRITE_1_PARAM 0x15
|
||||
#define DSI_DCS_READ_PARAM 0x06
|
||||
#define DSI_DCS_COMPRESSION_MODE 0x07
|
||||
#define DSI_DCS_PPS_LONG_WRITE 0x0A
|
||||
|
||||
#define DSI_DCS_SET_ADDR_MODE 0x36
|
||||
#define DSI_DCS_EXIT_SLEEP_MODE 0x11
|
||||
#define DSI_DCS_ENTER_SLEEP_MODE 0x10
|
||||
#define DSI_DCS_SET_DISPLAY_ON 0x29
|
||||
#define DSI_DCS_SET_DISPLAY_OFF 0x28
|
||||
#define DSI_DCS_SET_TEARING_EFFECT_OFF 0x34
|
||||
#define DSI_DCS_SET_TEARING_EFFECT_ON 0x35
|
||||
#define DSI_DCS_NO_OP 0x0
|
||||
#define DSI_NULL_PKT_NO_DATA 0x9
|
||||
#define DSI_BLANKING_PKT_NO_DATA 0x19
|
||||
#define DSI_DCS_SET_COMPRESSION_METHOD 0xC0
|
||||
|
||||
/* DCS commands for command mode */
|
||||
#define DSI_ENTER_PARTIAL_MODE 0x12
|
||||
#define DSI_SET_PIXEL_FORMAT 0x3A
|
||||
#define DSI_AREA_COLOR_MODE 0x4C
|
||||
#define DSI_SET_PARTIAL_AREA 0x30
|
||||
#define DSI_SET_PAGE_ADDRESS 0x2B
|
||||
#define DSI_SET_ADDRESS_MODE 0x36
|
||||
#define DSI_SET_COLUMN_ADDRESS 0x2A
|
||||
#define DSI_WRITE_MEMORY_START 0x2C
|
||||
#define DSI_WRITE_MEMORY_CONTINUE 0x3C
|
||||
|
||||
#define PKT_ID0(id) ((((id) & 0x3f) << 3) | \
|
||||
(((DSI_ENABLE) & 0x1) << 9))
|
||||
#define PKT_LEN0(len) (((len) & 0x7) << 0)
|
||||
#define PKT_ID1(id) ((((id) & 0x3f) << 13) | \
|
||||
(((DSI_ENABLE) & 0x1) << 19))
|
||||
#define PKT_LEN1(len) (((len) & 0x7) << 10)
|
||||
#define PKT_ID2(id) ((((id) & 0x3f) << 23) | \
|
||||
(((DSI_ENABLE) & 0x1) << 29))
|
||||
#define PKT_LEN2(len) (((len) & 0x7) << 20)
|
||||
#define PKT_ID3(id) ((((id) & 0x3f) << 3) | \
|
||||
(((DSI_ENABLE) & 0x1) << 9))
|
||||
#define PKT_LEN3(len) (((len) & 0x7) << 0)
|
||||
#define PKT_ID4(id) ((((id) & 0x3f) << 13) | \
|
||||
(((DSI_ENABLE) & 0x1) << 19))
|
||||
#define PKT_LEN4(len) (((len) & 0x7) << 10)
|
||||
#define PKT_ID5(id) ((((id) & 0x3f) << 23) | \
|
||||
(((DSI_ENABLE) & 0x1) << 29))
|
||||
#define PKT_LEN5(len) (((len) & 0x7) << 20)
|
||||
#define PKT_LP (((DSI_ENABLE) & 0x1) << 30)
|
||||
#define NUMOF_PKT_SEQ 12
|
||||
|
||||
/* DSI pixel data format, enum values should match with dt-bindings in tegra-panel.h */
|
||||
typedef enum
|
||||
{
|
||||
DSI_PIXEL_FORMAT_16BIT_P,
|
||||
DSI_PIXEL_FORMAT_18BIT_P,
|
||||
DSI_PIXEL_FORMAT_18BIT_NP,
|
||||
DSI_PIXEL_FORMAT_24BIT_P,
|
||||
DSI_PIXEL_FORMAT_8BIT_DSC,
|
||||
DSI_PIXEL_FORMAT_12BIT_DSC,
|
||||
DSI_PIXEL_FORMAT_16BIT_DSC,
|
||||
DSI_PIXEL_FORMAT_10BIT_DSC,
|
||||
DSI_PIXEL_FORMAT_30BIT_P,
|
||||
DSI_PIXEL_FORMAT_36BIT_P,
|
||||
} DSIPIXELFORMAT;
|
||||
|
||||
/* DSI virtual channel number */
|
||||
typedef enum
|
||||
{
|
||||
DSI_VIRTUAL_CHANNEL_0,
|
||||
DSI_VIRTUAL_CHANNEL_1,
|
||||
DSI_VIRTUAL_CHANNEL_2,
|
||||
DSI_VIRTUAL_CHANNEL_3,
|
||||
} DSIVIRTUALCHANNEL;
|
||||
|
||||
/* DSI transmit method for video data */
|
||||
typedef enum
|
||||
{
|
||||
DSI_VIDEO_TYPE_VIDEO_MODE,
|
||||
DSI_VIDEO_TYPE_COMMAND_MODE,
|
||||
} DSIVIDEODATAMODE;
|
||||
|
||||
/* DSI HS clock mode */
|
||||
typedef enum
|
||||
{
|
||||
DSI_VIDEO_CLOCK_CONTINUOUS,
|
||||
DSI_VIDEO_CLOCK_TX_ONLY,
|
||||
} DSICLOCKMODE;
|
||||
|
||||
/* DSI burst mode setting in video mode. Each mode is assigned with a
|
||||
* fixed value. The rationale behind this is to avoid change of these
|
||||
* values, since the calculation of dsi clock depends on them. */
|
||||
typedef enum
|
||||
{
|
||||
DSI_VIDEO_NON_BURST_MODE = 0,
|
||||
DSI_VIDEO_NON_BURST_MODE_WITH_SYNC_END = 1,
|
||||
DSI_VIDEO_BURST_MODE_LOWEST_SPEED = 2,
|
||||
DSI_VIDEO_BURST_MODE_LOW_SPEED = 3,
|
||||
DSI_VIDEO_BURST_MODE_MEDIUM_SPEED = 4,
|
||||
DSI_VIDEO_BURST_MODE_FAST_SPEED = 5,
|
||||
DSI_VIDEO_BURST_MODE_FASTEST_SPEED = 6,
|
||||
} DSIVIDEOBURSTMODE;
|
||||
|
||||
/* DSI Ganged Mode */
|
||||
typedef enum
|
||||
{
|
||||
DSI_GANGED_SYMMETRIC_LEFT_RIGHT = 1,
|
||||
DSI_GANGED_SYMMETRIC_EVEN_ODD = 2,
|
||||
DSI_GANGED_SYMMETRIC_LEFT_RIGHT_OVERLAP = 3,
|
||||
} DSIGANGEDTYPE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
DSI_LINK0,
|
||||
DSI_LINK1,
|
||||
} DSILINKNUM;
|
||||
|
||||
/* DSI Command Packet type */
|
||||
typedef enum
|
||||
{
|
||||
DSI_PACKET_CMD,
|
||||
DSI_DELAY_MS,
|
||||
DSI_GPIO_SET,
|
||||
DSI_SEND_FRAME,
|
||||
DSI_PACKET_VIDEO_VBLANK_CMD,
|
||||
DSI_DELAY_US,
|
||||
} DSICMDPKTTYPE;
|
||||
|
||||
/* DSI Phy type */
|
||||
typedef enum
|
||||
{
|
||||
DSI_DPHY,
|
||||
DSI_CPHY,
|
||||
} DSIPHYTYPE;
|
||||
|
||||
enum {
|
||||
DSI_GPIO_LCD_RESET,
|
||||
DSI_GPIO_PANEL_EN,
|
||||
DSI_GPIO_PANEL_EN_1,
|
||||
DSI_GPIO_BL_ENABLE,
|
||||
DSI_GPIO_BL_PWM,
|
||||
DSI_GPIO_AVDD_AVEE_EN,
|
||||
DSI_GPIO_VDD_1V8_LCD_EN,
|
||||
DSI_GPIO_TE,
|
||||
DSI_GPIO_BRIDGE_EN_0,
|
||||
DSI_GPIO_BRIDGE_EN_1,
|
||||
DSI_GPIO_BRIDGE_REFCLK_EN,
|
||||
DSI_N_GPIO_PANEL, /* add new gpio above this entry */
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
DSI_DISABLE,
|
||||
DSI_ENABLE,
|
||||
};
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU8 cmd_type;
|
||||
NvU8 data_id;
|
||||
union
|
||||
{
|
||||
NvU16 data_len;
|
||||
NvU16 delay_ms;
|
||||
NvU16 delay_us;
|
||||
NvU32 gpio;
|
||||
NvU16 frame_cnt;
|
||||
struct
|
||||
{
|
||||
NvU8 data0;
|
||||
NvU8 data1;
|
||||
} sp;
|
||||
} sp_len_dly;
|
||||
NvU32 *pdata;
|
||||
NvU8 link_id;
|
||||
NvBool club_cmd;
|
||||
} DSI_CMD, *PDSICMD;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU16 t_hsdexit_ns;
|
||||
NvU16 t_hstrail_ns;
|
||||
NvU16 t_datzero_ns;
|
||||
NvU16 t_hsprepare_ns;
|
||||
NvU16 t_hsprebegin_ns;
|
||||
NvU16 t_hspost_ns;
|
||||
|
||||
NvU16 t_clktrail_ns;
|
||||
NvU16 t_clkpost_ns;
|
||||
NvU16 t_clkzero_ns;
|
||||
NvU16 t_tlpx_ns;
|
||||
|
||||
NvU16 t_clkprepare_ns;
|
||||
NvU16 t_clkpre_ns;
|
||||
NvU16 t_wakeup_ns;
|
||||
|
||||
NvU16 t_taget_ns;
|
||||
NvU16 t_tasure_ns;
|
||||
NvU16 t_tago_ns;
|
||||
} DSI_PHY_TIMING_IN_NS;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU32 hActive;
|
||||
NvU32 vActive;
|
||||
NvU32 hFrontPorch;
|
||||
NvU32 vFrontPorch;
|
||||
NvU32 hBackPorch;
|
||||
NvU32 vBackPorch;
|
||||
NvU32 hSyncWidth;
|
||||
NvU32 vSyncWidth;
|
||||
NvU32 hPulsePolarity;
|
||||
NvU32 vPulsePolarity;
|
||||
NvU32 pixelClkRate;
|
||||
} DSITIMINGS, *PDSITIMINGS;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU8 n_data_lanes; /* required */
|
||||
NvU8 pixel_format; /* required */
|
||||
NvU8 refresh_rate; /* required */
|
||||
NvU8 rated_refresh_rate;
|
||||
NvU8 panel_reset; /* required */
|
||||
NvU8 virtual_channel; /* required */
|
||||
NvU8 dsi_instance;
|
||||
NvU16 dsi_panel_rst_gpio;
|
||||
NvU16 dsi_panel_bl_en_gpio;
|
||||
NvU16 dsi_panel_bl_pwm_gpio;
|
||||
NvU16 even_odd_split_width;
|
||||
NvU8 controller_vs;
|
||||
|
||||
NvBool panel_has_frame_buffer; /* required*/
|
||||
|
||||
/* Deprecated. Use DSI_SEND_FRAME panel command instead. */
|
||||
NvBool panel_send_dc_frames;
|
||||
|
||||
DSI_CMD *dsi_init_cmd; /* required */
|
||||
NvU16 n_init_cmd; /* required */
|
||||
NvU32 *dsi_init_cmd_array;
|
||||
NvU32 init_cmd_array_size;
|
||||
NvBool sendInitCmdsEarly;
|
||||
|
||||
DSI_CMD *dsi_early_suspend_cmd;
|
||||
NvU16 n_early_suspend_cmd;
|
||||
NvU32 *dsi_early_suspend_cmd_array;
|
||||
NvU32 early_suspend_cmd_array_size;
|
||||
|
||||
DSI_CMD *dsi_late_resume_cmd;
|
||||
NvU16 n_late_resume_cmd;
|
||||
NvU32 *dsi_late_resume_cmd_array;
|
||||
NvU32 late_resume_cmd_array_size;
|
||||
|
||||
DSI_CMD *dsi_postvideo_cmd;
|
||||
NvU16 n_postvideo_cmd;
|
||||
NvU32 *dsi_postvideo_cmd_array;
|
||||
NvU32 postvideo_cmd_array_size;
|
||||
|
||||
DSI_CMD *dsi_suspend_cmd; /* required */
|
||||
NvU16 n_suspend_cmd; /* required */
|
||||
NvU32 *dsi_suspend_cmd_array;
|
||||
NvU32 suspend_cmd_array_size;
|
||||
|
||||
NvU8 video_data_type; /* required */
|
||||
NvU8 video_clock_mode;
|
||||
NvU8 video_burst_mode;
|
||||
NvU8 ganged_type;
|
||||
NvU16 ganged_overlap;
|
||||
NvBool ganged_swap_links;
|
||||
NvBool ganged_write_to_all_links;
|
||||
NvU8 split_link_type;
|
||||
|
||||
NvU8 suspend_aggr;
|
||||
|
||||
NvU16 panel_buffer_size_byte;
|
||||
NvU16 panel_reset_timeout_msec;
|
||||
|
||||
NvBool hs_cmd_mode_supported;
|
||||
NvBool hs_cmd_mode_on_blank_supported;
|
||||
NvBool enable_hs_clock_on_lp_cmd_mode;
|
||||
NvBool no_pkt_seq_eot; /* 1st generation panel may not
|
||||
* support eot. Don't set it for
|
||||
* most panels.*/
|
||||
const NvU32 *pktSeq;
|
||||
NvU32 *pktSeq_array;
|
||||
NvU32 pktSeq_array_size;
|
||||
NvBool skip_dsi_pkt_header;
|
||||
NvBool power_saving_suspend;
|
||||
NvBool suspend_stop_stream_late;
|
||||
NvBool dsi2lvds_bridge_enable;
|
||||
NvBool dsi2edp_bridge_enable;
|
||||
|
||||
NvU32 max_panel_freq_khz;
|
||||
NvU32 lp_cmd_mode_freq_khz;
|
||||
NvU32 lp_read_cmd_mode_freq_khz;
|
||||
NvU32 hs_clk_in_lp_cmd_mode_freq_khz;
|
||||
NvU32 burst_mode_freq_khz;
|
||||
NvU32 fpga_freq_khz;
|
||||
|
||||
NvU32 te_gpio;
|
||||
NvBool te_polarity_low;
|
||||
NvBool dsiEnVRR;
|
||||
NvBool dsiVrrPanelSupportsTe;
|
||||
NvBool dsiForceSetTePin;
|
||||
|
||||
int panel_gpio[DSI_N_GPIO_PANEL];
|
||||
NvBool panel_gpio_populated;
|
||||
|
||||
NvU32 dpd_dsi_pads;
|
||||
|
||||
DSI_PHY_TIMING_IN_NS phyTimingNs;
|
||||
|
||||
NvU8 *bl_name;
|
||||
|
||||
NvBool lp00_pre_panel_wakeup;
|
||||
NvBool ulpm_not_supported;
|
||||
NvBool use_video_host_fifo_for_cmd;
|
||||
NvBool dsi_csi_loopback;
|
||||
NvBool set_max_timeout;
|
||||
NvBool use_legacy_dphy_core;
|
||||
// Swap P/N pins polarity of all data lanes
|
||||
NvBool swap_data_lane_polarity;
|
||||
// Swap P/N pins polarity of clock lane
|
||||
NvBool swap_clock_lane_polarity;
|
||||
// Reverse clock polarity for partition A/B. 1st SOT bit goes on negedge of Clock lane
|
||||
NvBool reverse_clock_polarity;
|
||||
// DSI Lane Crossbar. Allocating xbar array for max number of lanes
|
||||
NvBool lane_xbar_exists;
|
||||
NvU32 lane_xbar_ctrl[8];
|
||||
NvU32 refresh_rate_adj;
|
||||
|
||||
NvU8 dsiPhyType;
|
||||
NvBool en_data_scrambling;
|
||||
|
||||
NvU32 dsipll_vco_rate_hz;
|
||||
NvU32 dsipll_clkoutpn_rate_hz;
|
||||
NvU32 dsipll_clkouta_rate_hz;
|
||||
NvU32 vpll0_rate_hz;
|
||||
|
||||
DSITIMINGS dsiTimings;
|
||||
|
||||
// DSC Parameters
|
||||
NvBool dsiDscEnable;
|
||||
NvU32 dsiDscBpp;
|
||||
NvU32 dsiDscNumSlices;
|
||||
NvU32 dsiDscSliceWidth;
|
||||
NvU32 dsiDscSliceHeight;
|
||||
NvBool dsiDscEnBlockPrediction;
|
||||
NvBool dsiDscEnDualDsc;
|
||||
NvU32 dsiDscDecoderMajorVersion;
|
||||
NvU32 dsiDscDecoderMinorVersion;
|
||||
NvBool dsiDscUseCustomPPS;
|
||||
NvU32 dsiDscCustomPPSData[32];
|
||||
|
||||
// Driver allocates memory for PPS cmd to be sent to Panel
|
||||
NvBool ppsCmdMemAllocated;
|
||||
} DSI_PANEL_INFO;
|
||||
|
||||
#endif
|
||||
32
kernel-open/common/inc/os_gpio.h
Normal file
32
kernel-open/common/inc/os_gpio.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef _OS_GPIO_H_
|
||||
#define _OS_GPIO_H_
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_OS_GPIO_FUNC_HOTPLUG_A,
|
||||
NV_OS_GPIO_FUNC_HOTPLUG_B,
|
||||
} NV_OS_GPIO_FUNC_NAMES;
|
||||
|
||||
#endif
|
||||
@@ -83,7 +83,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvg
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_own_access_cntr_intr(nvidia_stack_t *, nvgpuSessionHandle_t, nvgpuAccessCntrInfo_t, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, const nvgpuAccessCntrConfig_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_disable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32, NvU64 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_unset_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_nvlink_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuNvlinkInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_p2p_object_create(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuDeviceHandle_t, NvHandle *);
|
||||
|
||||
@@ -662,27 +662,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PCI_GET_DOMAIN_BUS_AND_SLOT_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
hash__remap_4k_pfn)
|
||||
#
|
||||
# Determine if the hash__remap_4k_pfn() function is
|
||||
# present.
|
||||
#
|
||||
# Added by commit 6cc1a0ee4ce2 ("powerpc/mm/radix: Add radix
|
||||
# callback for pmd accessors") in v4.7 (committed 2016-04-29).
|
||||
# Present only in arch/powerpc
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_ASM_BOOK3S_64_HASH_64K_H_PRESENT)
|
||||
#include <linux/mm.h>
|
||||
#include <asm/book3s/64/hash-64k.h>
|
||||
#endif
|
||||
void conftest_hash__remap_4k_pfn(void) {
|
||||
hash__remap_4k_pfn();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_HASH__REMAP_4K_PFN_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
register_cpu_notifier)
|
||||
#
|
||||
# Determine if register_cpu_notifier() is present
|
||||
@@ -1633,7 +1612,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PHYS_TO_DMA_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
|
||||
dma_attr_macros)
|
||||
#
|
||||
# Determine if the NV_DMA_ATTR_SKIP_CPU_SYNC_PRESENT macro present.
|
||||
@@ -2441,6 +2419,45 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_ATOMIC_HELPER_LEGACY_GAMMA_SET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_plane_create_color_properties)
|
||||
#
|
||||
# Determine if the function drm_plane_create_color_properties() is
|
||||
# present.
|
||||
#
|
||||
# Added by commit 80f690e9e3a6 ("drm: Add optional COLOR_ENCODING
|
||||
# and COLOR_RANGE properties to drm_plane") in v4.17 (2018-02-19).
|
||||
#
|
||||
CODE="
|
||||
#include <linux/types.h>
|
||||
#if defined(NV_DRM_DRM_COLOR_MGMT_H_PRESENT)
|
||||
#include <drm/drm_color_mgmt.h>
|
||||
#endif
|
||||
void conftest_drm_plane_create_color_properties(void) {
|
||||
drm_plane_create_color_properties();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_format_info_has_is_yuv)
|
||||
#
|
||||
# Determine if struct drm_format_info has .is_yuv member.
|
||||
#
|
||||
# Added by commit ce2d54619a10 ("drm/fourcc: Add is_yuv field to
|
||||
# drm_format_info to denote if format is yuv") in v4.19
|
||||
# (2018-07-17).
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_FOURCC_H_PRESENT)
|
||||
#include <drm/drm_fourcc.h>
|
||||
#endif
|
||||
int conftest_drm_format_info_has_is_yuv(void) {
|
||||
return offsetof(struct drm_format_info, is_yuv);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_FORMAT_INFO_HAS_IS_YUV" "" "types"
|
||||
;;
|
||||
|
||||
pci_stop_and_remove_bus_device)
|
||||
#
|
||||
# Determine if the pci_stop_and_remove_bus_device() function is present.
|
||||
@@ -3132,6 +3149,21 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_FOLL_LONGTERM_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
has_enum_pidtype_tgid)
|
||||
# Determine if PIDTYPE_TGID is present in the kernel as an enum
|
||||
#
|
||||
# Added by commit 6883f81aac6f ("pid: Implement PIDTYPE_TGID")
|
||||
# in v4.19
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pid.h>
|
||||
|
||||
enum pid_type type = PIDTYPE_TGID;
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_HAS_ENUM_PIDTYPE_TGID" "" "types"
|
||||
;;
|
||||
|
||||
vfio_pin_pages_has_vfio_device_arg)
|
||||
#
|
||||
# Determine if vfio_pin_pages() kABI accepts "struct vfio_device *"
|
||||
@@ -3519,60 +3551,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_VM_OPS_FAULT_REMOVED_VMA_ARG" "" "types"
|
||||
;;
|
||||
|
||||
pnv_npu2_init_context)
|
||||
#
|
||||
# Determine if the pnv_npu2_init_context() function is
|
||||
# present and the signature of its callback.
|
||||
#
|
||||
# Added by commit 1ab66d1fbada ("powerpc/powernv: Introduce
|
||||
# address translation services for Nvlink2") in v4.12
|
||||
# (2017-04-03).
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_ASM_POWERNV_H_PRESENT)
|
||||
#include <linux/pci.h>
|
||||
#include <asm/powernv.h>
|
||||
#endif
|
||||
void conftest_pnv_npu2_init_context(void) {
|
||||
pnv_npu2_init_context();
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
if [ -f conftest$$.o ]; then
|
||||
echo "#undef NV_PNV_NPU2_INIT_CONTEXT_PRESENT" | append_conftest "functions"
|
||||
echo "#undef NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID" | append_conftest "functions"
|
||||
rm -f conftest$$.o
|
||||
return
|
||||
fi
|
||||
|
||||
echo "#define NV_PNV_NPU2_INIT_CONTEXT_PRESENT" | append_conftest "functions"
|
||||
|
||||
# Check the callback signature
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_ASM_POWERNV_H_PRESENT)
|
||||
#include <linux/pci.h>
|
||||
#include <asm/powernv.h>
|
||||
#endif
|
||||
|
||||
struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
|
||||
unsigned long flags,
|
||||
void (*cb)(struct npu_context *, void *),
|
||||
void *priv) {
|
||||
return NULL;
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
if [ -f conftest$$.o ]; then
|
||||
echo "#define NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID" | append_conftest "functions"
|
||||
rm -f conftest$$.o
|
||||
return
|
||||
fi
|
||||
|
||||
echo "#undef NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID" | append_conftest "functions"
|
||||
;;
|
||||
|
||||
of_get_ibm_chip_id)
|
||||
#
|
||||
# Determine if the of_get_ibm_chip_id() function is present.
|
||||
@@ -4028,33 +4006,6 @@ compile_test() {
|
||||
fi
|
||||
;;
|
||||
|
||||
dma_buf_has_dynamic_attachment)
|
||||
#
|
||||
# Determine if the function dma_buf_attachment_is_dynamic()
|
||||
# is present.
|
||||
#
|
||||
# Added by commit: 15fd552d186c
|
||||
# ("dma-buf: change DMA-buf locking convention v3") in v5.5 (2018-07-03)
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/dma-buf.h>
|
||||
bool conftest_dma_buf_attachment_is_dynamic(void) {
|
||||
return dma_buf_attachment_is_dynamic(NULL);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
echo "#define NV_DMA_BUF_HAS_DYNAMIC_ATTACHMENT" | append_conftest "functions"
|
||||
rm -f conftest$$.o
|
||||
return
|
||||
else
|
||||
echo "#undef NV_DMA_BUF_HAS_DYNAMIC_ATTACHMENT" | append_conftest "functions"
|
||||
return
|
||||
fi
|
||||
;;
|
||||
|
||||
dma_buf_attachment_has_peer2peer)
|
||||
#
|
||||
# Determine if peer2peer is present in struct dma_buf_attachment.
|
||||
@@ -5517,6 +5468,31 @@ compile_test() {
|
||||
fi
|
||||
;;
|
||||
|
||||
of_property_for_each_u32_has_internal_args)
|
||||
#
|
||||
# Determine if the internal arguments for the macro
|
||||
# of_property_for_each_u32() are present.
|
||||
#
|
||||
# Commit 9722c3b66e21 ("of: remove internal arguments from
|
||||
# of_property_for_each_u32()") removes two arguments from
|
||||
# of_property_for_each_u32() which are used internally within
|
||||
# the macro and so do not need to be passed. This change was
|
||||
# made for Linux v6.11.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/of.h>
|
||||
void conftest_of_property_for_each_u32(struct device_node *np,
|
||||
char *propname) {
|
||||
struct property *iparam1;
|
||||
const __be32 *iparam2;
|
||||
u32 val;
|
||||
|
||||
of_property_for_each_u32(np, propname, iparam1, iparam2, val);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_OF_PROPERTY_FOR_EACH_U32_HAS_INTERNAL_ARGS" "" "types"
|
||||
;;
|
||||
|
||||
of_property_read_variable_u8_array)
|
||||
#
|
||||
# Determine if of_property_read_variable_u8_array is present
|
||||
@@ -5613,8 +5589,8 @@ compile_test() {
|
||||
|
||||
of_dma_configure)
|
||||
#
|
||||
# Determine if of_dma_configure() function is present, and how
|
||||
# many arguments it takes.
|
||||
# Determine if of_dma_configure() function is present, if it
|
||||
# returns int, and how many arguments it takes.
|
||||
#
|
||||
# Added by commit 591c1ee465ce ("of: configure the platform
|
||||
# device dma parameters") in v3.16. However, it was a static,
|
||||
@@ -5624,6 +5600,10 @@ compile_test() {
|
||||
# commit 1f5c69aa51f9 ("of: Move of_dma_configure() to device.c
|
||||
# to help re-use") in v4.1.
|
||||
#
|
||||
# Its return type was changed from void to int by commit
|
||||
# 7b07cbefb68d ("iommu: of: Handle IOMMU lookup failure with
|
||||
# deferred probing or error") in v4.12.
|
||||
#
|
||||
# It subsequently began taking a third parameter with commit
|
||||
# 3d6ce86ee794 ("drivers: remove force dma flag from buses")
|
||||
# in v4.18.
|
||||
@@ -5648,6 +5628,7 @@ compile_test() {
|
||||
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT" | append_conftest "functions"
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
|
||||
else
|
||||
echo "#define NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
|
||||
|
||||
@@ -5666,6 +5647,26 @@ compile_test() {
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 3" | append_conftest "functions"
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
|
||||
#include <linux/of_device.h>
|
||||
#endif
|
||||
|
||||
int conftest_of_dma_configure_has_int_return_type(void) {
|
||||
return of_dma_configure(NULL, NULL, false);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
|
||||
else
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
|
||||
fi
|
||||
|
||||
return
|
||||
fi
|
||||
|
||||
@@ -5684,6 +5685,26 @@ compile_test() {
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 2" | append_conftest "functions"
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
|
||||
#include <linux/of_device.h>
|
||||
#endif
|
||||
|
||||
int conftest_of_dma_configure_has_int_return_type(void) {
|
||||
return of_dma_configure(NULL, NULL);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
|
||||
else
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
|
||||
fi
|
||||
|
||||
return
|
||||
fi
|
||||
fi
|
||||
@@ -7546,6 +7567,22 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA" "" "types"
|
||||
;;
|
||||
|
||||
page_pgmap)
|
||||
#
|
||||
# Determine if the page_pgmap() function is present.
|
||||
#
|
||||
# Added by commit 82ba975e4c43 ("mm: allow compound zone device
|
||||
# pages") in v6.14
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mmzone.h>
|
||||
int conftest_page_pgmap(void) {
|
||||
return page_pgmap();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PAGE_PGMAP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
folio_test_swapcache)
|
||||
#
|
||||
# Determine if the folio_test_swapcache() function is present.
|
||||
@@ -7562,6 +7599,34 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_FOLIO_TEST_SWAPCACHE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
platform_driver_struct_remove_returns_void)
|
||||
#
|
||||
# Determine if the 'platform_driver' structure 'remove' function
|
||||
# pointer returns void.
|
||||
#
|
||||
# Commit 0edb555a65d1 ("platform: Make platform_driver::remove()
|
||||
# return void") updated the platform_driver structure 'remove'
|
||||
# callback to return void instead of int in Linux v6.11-rc1.
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/platform_device.h>
|
||||
int conftest_platform_driver_struct_remove_returns_void(struct platform_device *pdev,
|
||||
struct platform_driver *driver) {
|
||||
return driver->remove(pdev);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
|
||||
echo "#undef NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" | append_conftest "types"
|
||||
else
|
||||
echo "#define NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" | append_conftest "types"
|
||||
fi
|
||||
;;
|
||||
|
||||
module_import_ns_takes_constant)
|
||||
#
|
||||
# Determine if the MODULE_IMPORT_NS macro takes a string literal
|
||||
@@ -7579,6 +7644,62 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_MODULE_IMPORT_NS_TAKES_CONSTANT" "" "generic"
|
||||
;;
|
||||
|
||||
assign_str)
|
||||
#
|
||||
# Determine whether the __assign_str() macro, used in tracepoint
|
||||
# event definitions, has the 'src' parameter.
|
||||
#
|
||||
# The 'src' parameter was removed by commit 2c92ca849fcc
|
||||
# ("tracing/treewide: Remove second parameter of __assign_str()") in
|
||||
# v6.10.
|
||||
#
|
||||
# The expected usage of __assign_str() inside the TRACE_EVENT()
|
||||
# macro, which involves multiple include passes and assumes it is
|
||||
# in a header file, requires a non-standard conftest approach of
|
||||
# producing both a header and a C file.
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM conftest
|
||||
|
||||
#if !defined(_TRACE_CONFTEST_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_CONFTEST_H
|
||||
#include <linux/tracepoint.h>
|
||||
TRACE_EVENT(conftest,
|
||||
TP_PROTO(const char *s),
|
||||
TP_ARGS(s),
|
||||
TP_STRUCT__entry(__string(s, s)),
|
||||
TP_fast_assign(__assign_str(s);),
|
||||
TP_printk(\"%s\", __get_str(s))
|
||||
);
|
||||
#endif
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_INCLUDE_PATH .
|
||||
#define TRACE_INCLUDE_FILE conftest$$
|
||||
#include <trace/define_trace.h>
|
||||
" > conftest$$.h
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include \"conftest$$.h\"
|
||||
|
||||
void conftest_assign_str(void) {
|
||||
trace_conftest(\"conftest\");
|
||||
}
|
||||
" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c >/dev/null 2>&1
|
||||
rm -f conftest$$.c conftest$$.h
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
|
||||
echo "#define NV_ASSIGN_STR_ARGUMENT_COUNT 1" | append_conftest "functions"
|
||||
else
|
||||
echo "#define NV_ASSIGN_STR_ARGUMENT_COUNT 2" | append_conftest "functions"
|
||||
fi
|
||||
;;
|
||||
|
||||
drm_driver_has_date)
|
||||
#
|
||||
@@ -7604,6 +7725,33 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_DRIVER_HAS_DATE" "" "types"
|
||||
;;
|
||||
|
||||
drm_connector_helper_funcs_mode_valid_has_const_mode_arg)
|
||||
#
|
||||
# Determine if the 'mode' pointer argument is const in
|
||||
# drm_connector_helper_funcs::mode_valid.
|
||||
#
|
||||
# The 'mode' pointer argument in
|
||||
# drm_connector_helper_funcs::mode_valid was made const by commit
|
||||
# 26d6fd81916e ("drm/connector: make mode_valid take a const struct
|
||||
# drm_display_mode") in linux-next, expected in v6.15.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_ATOMIC_HELPER_H_PRESENT)
|
||||
#include <drm/drm_atomic_helper.h>
|
||||
#endif
|
||||
|
||||
static int conftest_drm_connector_mode_valid(struct drm_connector *connector,
|
||||
const struct drm_display_mode *mode) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct drm_connector_helper_funcs conftest_drm_connector_helper_funcs = {
|
||||
.mode_valid = conftest_drm_connector_mode_valid,
|
||||
};"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG" "" "types"
|
||||
;;
|
||||
|
||||
# When adding a new conftest entry, please use the correct format for
|
||||
# specifying the relevant upstream Linux kernel commit. Please
|
||||
# avoid specifying -rc kernels, and only use SHAs that actually exist
|
||||
|
||||
@@ -14,6 +14,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_encoder.h \
|
||||
drm/drm_atomic_uapi.h \
|
||||
drm/drm_drv.h \
|
||||
drm/drm_edid.h \
|
||||
drm/drm_fbdev_generic.h \
|
||||
drm/drm_fbdev_ttm.h \
|
||||
drm/drm_client_setup.h \
|
||||
@@ -65,13 +66,10 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/nvhost.h \
|
||||
linux/nvhost_t194.h \
|
||||
linux/host1x-next.h \
|
||||
asm/book3s/64/hash-64k.h \
|
||||
asm/set_memory.h \
|
||||
asm/prom.h \
|
||||
asm/powernv.h \
|
||||
linux/atomic.h \
|
||||
asm/barrier.h \
|
||||
asm/opal-api.h \
|
||||
sound/hdaudio.h \
|
||||
asm/pgtable_types.h \
|
||||
asm/page.h \
|
||||
|
||||
@@ -62,6 +62,20 @@
|
||||
#undef NV_DRM_FENCE_AVAILABLE
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) && \
|
||||
defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_FBDEV_GENERIC_AVAILABLE
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_TTM_SETUP_PRESENT) && \
|
||||
defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
#if IS_ENABLED(CONFIG_DRM_TTM_HELPER)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_FBDEV_TTM_AVAILABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_CLIENT_SETUP_PRESENT) && \
|
||||
(defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT) || \
|
||||
defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT))
|
||||
|
||||
@@ -314,7 +314,11 @@ static int nv_drm_connector_get_modes(struct drm_connector *connector)
|
||||
}
|
||||
|
||||
static int nv_drm_connector_mode_valid(struct drm_connector *connector,
|
||||
#if defined(NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG)
|
||||
const struct drm_display_mode *mode)
|
||||
#else
|
||||
struct drm_display_mode *mode)
|
||||
#endif
|
||||
{
|
||||
struct drm_device *dev = connector->dev;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
|
||||
@@ -372,23 +372,88 @@ cursor_plane_req_config_update(struct drm_plane *plane,
|
||||
old_config.dstY != req_config->dstY;
|
||||
}
|
||||
|
||||
static void free_drm_lut_surface(struct kref *ref)
|
||||
static void release_drm_nvkms_surface(struct nv_drm_nvkms_surface *drm_nvkms_surface)
|
||||
{
|
||||
struct nv_drm_lut_surface *drm_lut_surface =
|
||||
container_of(ref, struct nv_drm_lut_surface, refcount);
|
||||
struct NvKmsKapiDevice *pDevice = drm_lut_surface->pDevice;
|
||||
struct NvKmsKapiDevice *pDevice = drm_nvkms_surface->pDevice;
|
||||
|
||||
BUG_ON(drm_lut_surface->nvkms_surface == NULL);
|
||||
BUG_ON(drm_lut_surface->nvkms_memory == NULL);
|
||||
BUG_ON(drm_lut_surface->buffer == NULL);
|
||||
BUG_ON(drm_nvkms_surface->nvkms_surface == NULL);
|
||||
BUG_ON(drm_nvkms_surface->nvkms_memory == NULL);
|
||||
BUG_ON(drm_nvkms_surface->buffer == NULL);
|
||||
|
||||
nvKms->destroySurface(pDevice, drm_lut_surface->nvkms_surface);
|
||||
nvKms->unmapMemory(pDevice, drm_lut_surface->nvkms_memory,
|
||||
nvKms->destroySurface(pDevice, drm_nvkms_surface->nvkms_surface);
|
||||
nvKms->unmapMemory(pDevice, drm_nvkms_surface->nvkms_memory,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
|
||||
drm_lut_surface->buffer);
|
||||
nvKms->freeMemory(pDevice, drm_lut_surface->nvkms_memory);
|
||||
drm_nvkms_surface->buffer);
|
||||
nvKms->freeMemory(pDevice, drm_nvkms_surface->nvkms_memory);
|
||||
}
|
||||
|
||||
nv_drm_free(drm_lut_surface);
|
||||
static int init_drm_nvkms_surface(struct nv_drm_device *nv_dev,
|
||||
struct nv_drm_nvkms_surface *drm_nvkms_surface,
|
||||
struct nv_drm_nvkms_surface_params *surface_params)
|
||||
{
|
||||
struct NvKmsKapiDevice *pDevice = nv_dev->pDevice;
|
||||
NvU8 compressible = 0; // No compression
|
||||
|
||||
struct NvKmsKapiCreateSurfaceParams params = {};
|
||||
struct NvKmsKapiMemory *surface_mem;
|
||||
struct NvKmsKapiSurface *surface;
|
||||
void *buffer;
|
||||
|
||||
params.format = surface_params->format;
|
||||
params.width = surface_params->width;
|
||||
params.height = surface_params->height;
|
||||
|
||||
/* Allocate displayable memory. */
|
||||
if (nv_dev->hasVideoMemory) {
|
||||
surface_mem =
|
||||
nvKms->allocateVideoMemory(pDevice,
|
||||
NvKmsSurfaceMemoryLayoutPitch,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
|
||||
surface_params->surface_size,
|
||||
&compressible);
|
||||
} else {
|
||||
surface_mem =
|
||||
nvKms->allocateSystemMemory(pDevice,
|
||||
NvKmsSurfaceMemoryLayoutPitch,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
|
||||
surface_params->surface_size,
|
||||
&compressible);
|
||||
}
|
||||
if (surface_mem == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Map memory in order to populate it. */
|
||||
if (!nvKms->mapMemory(pDevice, surface_mem,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
|
||||
&buffer)) {
|
||||
nvKms->freeMemory(pDevice, surface_mem);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
params.planes[0].memory = surface_mem;
|
||||
params.planes[0].offset = 0;
|
||||
params.planes[0].pitch = surface_params->surface_size;
|
||||
|
||||
/* Create surface. */
|
||||
surface = nvKms->createSurface(pDevice, ¶ms);
|
||||
if (surface == NULL) {
|
||||
nvKms->unmapMemory(pDevice, surface_mem,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL, buffer);
|
||||
nvKms->freeMemory(pDevice, surface_mem);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Pack into struct nv_drm_nvkms_surface. */
|
||||
drm_nvkms_surface->pDevice = pDevice;
|
||||
drm_nvkms_surface->nvkms_memory = surface_mem;
|
||||
drm_nvkms_surface->nvkms_surface = surface;
|
||||
drm_nvkms_surface->buffer = buffer;
|
||||
|
||||
/* Init refcount. */
|
||||
kref_init(&drm_nvkms_surface->refcount);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct nv_drm_lut_surface *alloc_drm_lut_surface(
|
||||
@@ -399,86 +464,49 @@ static struct nv_drm_lut_surface *alloc_drm_lut_surface(
|
||||
NvU32 num_vss_header_entries,
|
||||
NvU32 num_entries)
|
||||
{
|
||||
struct NvKmsKapiDevice *pDevice = nv_dev->pDevice;
|
||||
struct nv_drm_lut_surface *drm_lut_surface;
|
||||
NvU8 compressible = 0; // No compression
|
||||
size_t size =
|
||||
const size_t surface_size =
|
||||
(((num_vss_header_entries + num_entries) *
|
||||
NVKMS_LUT_CAPS_LUT_ENTRY_SIZE) + 255) & ~255; // 256-byte aligned
|
||||
|
||||
struct NvKmsKapiMemory *surface_mem;
|
||||
struct NvKmsKapiSurface *surface;
|
||||
struct NvKmsKapiCreateSurfaceParams params = {};
|
||||
NvU16 *lut_data;
|
||||
struct nv_drm_nvkms_surface_params params = {};
|
||||
|
||||
/* Allocate displayable memory. */
|
||||
if (nv_dev->hasVideoMemory) {
|
||||
surface_mem =
|
||||
nvKms->allocateVideoMemory(pDevice,
|
||||
NvKmsSurfaceMemoryLayoutPitch,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
|
||||
size,
|
||||
&compressible);
|
||||
} else {
|
||||
surface_mem =
|
||||
nvKms->allocateSystemMemory(pDevice,
|
||||
NvKmsSurfaceMemoryLayoutPitch,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
|
||||
size,
|
||||
&compressible);
|
||||
}
|
||||
if (surface_mem == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Map memory in order to populate it. */
|
||||
if (!nvKms->mapMemory(pDevice, surface_mem,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
|
||||
(void **) &lut_data)) {
|
||||
nvKms->freeMemory(pDevice, surface_mem);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Create surface. */
|
||||
params.format = NvKmsSurfaceMemoryFormatR16G16B16A16;
|
||||
params.width = num_vss_header_entries + num_entries;
|
||||
params.height = 1;
|
||||
params.planes[0].memory = surface_mem;
|
||||
params.planes[0].offset = 0;
|
||||
params.planes[0].pitch = size;
|
||||
params.surface_size = surface_size;
|
||||
|
||||
surface = nvKms->createSurface(pDevice, ¶ms);
|
||||
if (surface == NULL) {
|
||||
nvKms->unmapMemory(pDevice, surface_mem,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL, (void *) lut_data);
|
||||
nvKms->freeMemory(pDevice, surface_mem);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Pack into struct nv_drm_lut_surface. */
|
||||
drm_lut_surface = nv_drm_calloc(1, sizeof(struct nv_drm_lut_surface));
|
||||
if (drm_lut_surface == NULL) {
|
||||
nvKms->destroySurface(pDevice, surface);
|
||||
nvKms->unmapMemory(pDevice, surface_mem,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL, (void *) lut_data);
|
||||
nvKms->freeMemory(pDevice, surface_mem);
|
||||
return NULL;
|
||||
}
|
||||
drm_lut_surface->pDevice = pDevice;
|
||||
drm_lut_surface->nvkms_memory = surface_mem;
|
||||
drm_lut_surface->nvkms_surface = surface;
|
||||
drm_lut_surface->buffer = lut_data;
|
||||
|
||||
if (init_drm_nvkms_surface(nv_dev, &drm_lut_surface->base, ¶ms) != 0) {
|
||||
nv_drm_free(drm_lut_surface);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
drm_lut_surface->properties.vssSegments = num_vss_header_segments;
|
||||
drm_lut_surface->properties.vssType = vss_type;
|
||||
drm_lut_surface->properties.lutEntries = num_entries;
|
||||
drm_lut_surface->properties.entryFormat = entry_format;
|
||||
|
||||
/* Init refcount. */
|
||||
kref_init(&drm_lut_surface->refcount);
|
||||
|
||||
return drm_lut_surface;
|
||||
}
|
||||
|
||||
static void free_drm_lut_surface(struct kref *ref)
|
||||
{
|
||||
struct nv_drm_nvkms_surface *drm_nvkms_surface =
|
||||
container_of(ref, struct nv_drm_nvkms_surface, refcount);
|
||||
struct nv_drm_lut_surface *drm_lut_surface =
|
||||
container_of(drm_nvkms_surface, struct nv_drm_lut_surface, base);
|
||||
|
||||
// Clean up base
|
||||
release_drm_nvkms_surface(drm_nvkms_surface);
|
||||
|
||||
nv_drm_free(drm_lut_surface);
|
||||
}
|
||||
|
||||
static NvU32 fp32_lut_interp(
|
||||
NvU16 entry0,
|
||||
NvU16 entry1,
|
||||
@@ -582,7 +610,7 @@ static struct nv_drm_lut_surface *create_drm_ilut_surface_vss(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lut_data = (NvU16 *) drm_lut_surface->buffer;
|
||||
lut_data = (NvU16 *) drm_lut_surface->base.buffer;
|
||||
|
||||
/* Calculate VSS header. */
|
||||
if (vss_header_seg_sizes != NULL) {
|
||||
@@ -733,7 +761,7 @@ static struct nv_drm_lut_surface *create_drm_ilut_surface_legacy(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lut_data = (NvU16 *) drm_lut_surface->buffer;
|
||||
lut_data = (NvU16 *) drm_lut_surface->base.buffer;
|
||||
|
||||
/* Fill LUT surface. */
|
||||
for (entry_idx = 0; entry_idx < NVKMS_LUT_ARRAY_SIZE; entry_idx++) {
|
||||
@@ -799,7 +827,7 @@ static struct nv_drm_lut_surface *create_drm_tmo_surface(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lut_data = (NvU16 *) drm_lut_surface->buffer;
|
||||
lut_data = (NvU16 *) drm_lut_surface->base.buffer;
|
||||
|
||||
/* Calculate linear VSS header. */
|
||||
for (entry_idx = 0; entry_idx < NUM_VSS_HEADER_ENTRIES; entry_idx++) {
|
||||
@@ -901,7 +929,7 @@ static struct nv_drm_lut_surface *create_drm_olut_surface_vss(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lut_data = (NvU16 *) drm_lut_surface->buffer;
|
||||
lut_data = (NvU16 *) drm_lut_surface->base.buffer;
|
||||
|
||||
/* Calculate VSS header. */
|
||||
if (vss_header_seg_sizes != NULL) {
|
||||
@@ -1021,7 +1049,7 @@ static struct nv_drm_lut_surface *create_drm_olut_surface_legacy(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lut_data = (NvU16 *) drm_lut_surface->buffer;
|
||||
lut_data = (NvU16 *) drm_lut_surface->base.buffer;
|
||||
|
||||
/* Fill LUT surface. */
|
||||
for (entry_idx = 0; entry_idx < NVKMS_LUT_ARRAY_SIZE; entry_idx++) {
|
||||
@@ -1057,6 +1085,74 @@ update_matrix_override(struct drm_property_blob *blob,
|
||||
return enabled;
|
||||
}
|
||||
|
||||
static enum NvKmsInputColorSpace nv_get_nvkms_input_colorspace(
|
||||
enum nv_drm_input_color_space colorSpace)
|
||||
{
|
||||
switch (colorSpace) {
|
||||
case NV_DRM_INPUT_COLOR_SPACE_NONE:
|
||||
return NVKMS_INPUT_COLOR_SPACE_NONE;
|
||||
case NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR:
|
||||
return NVKMS_INPUT_COLOR_SPACE_BT709;
|
||||
case NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ:
|
||||
return NVKMS_INPUT_COLOR_SPACE_BT2100;
|
||||
default:
|
||||
/* We shouldn't hit this */
|
||||
WARN_ON("Unsupported input colorspace");
|
||||
return NVKMS_INPUT_COLOR_SPACE_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
static enum NvKmsInputTf nv_get_nvkms_input_tf(
|
||||
enum nv_drm_input_color_space colorSpace)
|
||||
{
|
||||
switch (colorSpace) {
|
||||
case NV_DRM_INPUT_COLOR_SPACE_NONE:
|
||||
return NVKMS_INPUT_TF_LINEAR;
|
||||
case NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR:
|
||||
return NVKMS_INPUT_TF_LINEAR;
|
||||
case NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ:
|
||||
return NVKMS_INPUT_TF_PQ;
|
||||
default:
|
||||
/* We shouldn't hit this */
|
||||
WARN_ON("Unsupported input colorspace");
|
||||
return NVKMS_INPUT_TF_LINEAR;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
|
||||
static enum NvKmsInputColorSpace nv_drm_color_encoding_to_nvkms_colorspace(
|
||||
enum drm_color_encoding color_encoding)
|
||||
{
|
||||
switch(color_encoding) {
|
||||
case DRM_COLOR_YCBCR_BT601:
|
||||
return NVKMS_INPUT_COLOR_SPACE_BT601;
|
||||
case DRM_COLOR_YCBCR_BT709:
|
||||
return NVKMS_INPUT_COLOR_SPACE_BT709;
|
||||
case DRM_COLOR_YCBCR_BT2020:
|
||||
return NVKMS_INPUT_COLOR_SPACE_BT2020;
|
||||
default:
|
||||
/* We shouldn't hit this */
|
||||
WARN_ON("Unsupported DRM color_encoding");
|
||||
return NVKMS_INPUT_COLOR_SPACE_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
static enum NvKmsInputColorRange nv_drm_color_range_to_nvkms_color_range(
|
||||
enum drm_color_range color_range)
|
||||
{
|
||||
switch(color_range) {
|
||||
case DRM_COLOR_YCBCR_FULL_RANGE:
|
||||
return NVKMS_INPUT_COLOR_RANGE_FULL;
|
||||
case DRM_COLOR_YCBCR_LIMITED_RANGE:
|
||||
return NVKMS_INPUT_COLOR_RANGE_LIMITED;
|
||||
default:
|
||||
/* We shouldn't hit this */
|
||||
WARN_ON("Unsupported DRM color_range");
|
||||
return NVKMS_INPUT_COLOR_RANGE_DEFAULT;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
plane_req_config_update(struct drm_plane *plane,
|
||||
struct drm_plane_state *plane_state,
|
||||
@@ -1190,8 +1286,37 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
nv_plane->defaultCompositionMode;
|
||||
#endif
|
||||
|
||||
req_config->config.inputColorSpace =
|
||||
nv_drm_plane_state->input_colorspace;
|
||||
#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
|
||||
if ((nv_drm_plane_state->input_colorspace == NV_DRM_INPUT_COLOR_SPACE_NONE) &&
|
||||
nv_drm_format_is_yuv(plane_state->fb->format->format)) {
|
||||
|
||||
if (nv_plane->supportsColorProperties) {
|
||||
req_config->config.inputColorSpace =
|
||||
nv_drm_color_encoding_to_nvkms_colorspace(plane_state->color_encoding);
|
||||
req_config->config.inputColorRange =
|
||||
nv_drm_color_range_to_nvkms_color_range(plane_state->color_range);
|
||||
} else {
|
||||
req_config->config.inputColorSpace = NVKMS_INPUT_COLOR_SPACE_NONE;
|
||||
req_config->config.inputColorRange = NVKMS_INPUT_COLOR_RANGE_DEFAULT;
|
||||
}
|
||||
req_config->config.inputTf = NVKMS_INPUT_TF_LINEAR;
|
||||
} else {
|
||||
#endif
|
||||
req_config->config.inputColorSpace =
|
||||
nv_get_nvkms_input_colorspace(nv_drm_plane_state->input_colorspace);
|
||||
req_config->config.inputColorRange = NVKMS_INPUT_COLOR_RANGE_DEFAULT;
|
||||
req_config->config.inputTf =
|
||||
nv_get_nvkms_input_tf(nv_drm_plane_state->input_colorspace);
|
||||
#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
|
||||
}
|
||||
#endif
|
||||
|
||||
req_config->flags.inputTfChanged =
|
||||
(old_config.inputTf != req_config->config.inputTf);
|
||||
req_config->flags.inputColorSpaceChanged =
|
||||
(old_config.inputColorSpace != req_config->config.inputColorSpace);
|
||||
req_config->flags.inputColorRangeChanged =
|
||||
(old_config.inputColorRange != req_config->config.inputColorRange);
|
||||
|
||||
req_config->config.syncParams.preSyncptSpecified = false;
|
||||
req_config->config.syncParams.postSyncptRequested = false;
|
||||
@@ -1240,10 +1365,10 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
|
||||
switch (info_frame->eotf) {
|
||||
case HDMI_EOTF_SMPTE_ST2084:
|
||||
req_config->config.tf = NVKMS_OUTPUT_TF_PQ;
|
||||
req_config->config.outputTf = NVKMS_OUTPUT_TF_PQ;
|
||||
break;
|
||||
case HDMI_EOTF_TRADITIONAL_GAMMA_SDR:
|
||||
req_config->config.tf =
|
||||
req_config->config.outputTf =
|
||||
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR;
|
||||
break;
|
||||
default:
|
||||
@@ -1254,7 +1379,7 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
req_config->config.hdrMetadata.enabled = true;
|
||||
} else {
|
||||
req_config->config.hdrMetadata.enabled = false;
|
||||
req_config->config.tf = NVKMS_OUTPUT_TF_NONE;
|
||||
req_config->config.outputTf = NVKMS_OUTPUT_TF_NONE;
|
||||
}
|
||||
|
||||
req_config->flags.hdrMetadataChanged =
|
||||
@@ -1264,7 +1389,7 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
&req_config->config.hdrMetadata.val,
|
||||
sizeof(struct NvKmsHDRStaticMetadata)));
|
||||
|
||||
req_config->flags.tfChanged = (old_config.tf != req_config->config.tf);
|
||||
req_config->flags.outputTfChanged = (old_config.outputTf != req_config->config.outputTf);
|
||||
#endif
|
||||
|
||||
req_config->config.matrixOverrides.enabled.lmsCtm =
|
||||
@@ -1295,7 +1420,7 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
|
||||
if (nv_drm_plane_state->degamma_changed) {
|
||||
if (nv_drm_plane_state->degamma_drm_lut_surface != NULL) {
|
||||
kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->refcount,
|
||||
kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->base.refcount,
|
||||
free_drm_lut_surface);
|
||||
nv_drm_plane_state->degamma_drm_lut_surface = NULL;
|
||||
}
|
||||
@@ -1327,7 +1452,7 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
if (nv_drm_plane_state->degamma_drm_lut_surface != NULL) {
|
||||
req_config->config.ilut.enabled = NV_TRUE;
|
||||
req_config->config.ilut.lutSurface =
|
||||
nv_drm_plane_state->degamma_drm_lut_surface->nvkms_surface;
|
||||
nv_drm_plane_state->degamma_drm_lut_surface->base.nvkms_surface;
|
||||
req_config->config.ilut.offset = 0;
|
||||
req_config->config.ilut.vssSegments =
|
||||
nv_drm_plane_state->degamma_drm_lut_surface->properties.vssSegments;
|
||||
@@ -1346,7 +1471,7 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
|
||||
if (nv_drm_plane_state->tmo_changed) {
|
||||
if (nv_drm_plane_state->tmo_drm_lut_surface != NULL) {
|
||||
kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->refcount,
|
||||
kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->base.refcount,
|
||||
free_drm_lut_surface);
|
||||
nv_drm_plane_state->tmo_drm_lut_surface = NULL;
|
||||
}
|
||||
@@ -1363,7 +1488,7 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
if (nv_drm_plane_state->tmo_drm_lut_surface != NULL) {
|
||||
req_config->config.tmo.enabled = NV_TRUE;
|
||||
req_config->config.tmo.lutSurface =
|
||||
nv_drm_plane_state->tmo_drm_lut_surface->nvkms_surface;
|
||||
nv_drm_plane_state->tmo_drm_lut_surface->base.nvkms_surface;
|
||||
req_config->config.tmo.offset = 0;
|
||||
req_config->config.tmo.vssSegments =
|
||||
nv_drm_plane_state->tmo_drm_lut_surface->properties.vssSegments;
|
||||
@@ -1870,7 +1995,7 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
|
||||
nv_plane_state->degamma_drm_lut_surface =
|
||||
nv_old_plane_state->degamma_drm_lut_surface;
|
||||
if (nv_plane_state->degamma_drm_lut_surface) {
|
||||
kref_get(&nv_plane_state->degamma_drm_lut_surface->refcount);
|
||||
kref_get(&nv_plane_state->degamma_drm_lut_surface->base.refcount);
|
||||
}
|
||||
|
||||
nv_plane_state->tmo_lut = nv_old_plane_state->tmo_lut;
|
||||
@@ -1881,7 +2006,7 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
|
||||
nv_plane_state->tmo_drm_lut_surface =
|
||||
nv_old_plane_state->tmo_drm_lut_surface;
|
||||
if (nv_plane_state->tmo_drm_lut_surface) {
|
||||
kref_get(&nv_plane_state->tmo_drm_lut_surface->refcount);
|
||||
kref_get(&nv_plane_state->tmo_drm_lut_surface->base.refcount);
|
||||
}
|
||||
|
||||
return &nv_plane_state->base;
|
||||
@@ -1909,13 +2034,13 @@ static inline void __nv_drm_plane_atomic_destroy_state(
|
||||
|
||||
nv_drm_property_blob_put(nv_drm_plane_state->degamma_lut);
|
||||
if (nv_drm_plane_state->degamma_drm_lut_surface != NULL) {
|
||||
kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->refcount,
|
||||
kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->base.refcount,
|
||||
free_drm_lut_surface);
|
||||
}
|
||||
|
||||
nv_drm_property_blob_put(nv_drm_plane_state->tmo_lut);
|
||||
if (nv_drm_plane_state->tmo_drm_lut_surface != NULL) {
|
||||
kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->refcount,
|
||||
kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->base.refcount,
|
||||
free_drm_lut_surface);
|
||||
}
|
||||
}
|
||||
@@ -2113,7 +2238,7 @@ nv_drm_atomic_crtc_duplicate_state(struct drm_crtc *crtc)
|
||||
}
|
||||
nv_state->regamma_divisor = nv_old_state->regamma_divisor;
|
||||
if (nv_state->regamma_drm_lut_surface) {
|
||||
kref_get(&nv_state->regamma_drm_lut_surface->refcount);
|
||||
kref_get(&nv_state->regamma_drm_lut_surface->base.refcount);
|
||||
}
|
||||
nv_state->regamma_changed = false;
|
||||
|
||||
@@ -2142,7 +2267,7 @@ static void nv_drm_atomic_crtc_destroy_state(struct drm_crtc *crtc,
|
||||
|
||||
nv_drm_property_blob_put(nv_state->regamma_lut);
|
||||
if (nv_state->regamma_drm_lut_surface != NULL) {
|
||||
kref_put(&nv_state->regamma_drm_lut_surface->refcount,
|
||||
kref_put(&nv_state->regamma_drm_lut_surface->base.refcount,
|
||||
free_drm_lut_surface);
|
||||
}
|
||||
|
||||
@@ -2386,7 +2511,7 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
|
||||
|
||||
if (nv_crtc_state->regamma_changed) {
|
||||
if (nv_crtc_state->regamma_drm_lut_surface != NULL) {
|
||||
kref_put(&nv_crtc_state->regamma_drm_lut_surface->refcount,
|
||||
kref_put(&nv_crtc_state->regamma_drm_lut_surface->base.refcount,
|
||||
free_drm_lut_surface);
|
||||
nv_crtc_state->regamma_drm_lut_surface = NULL;
|
||||
}
|
||||
@@ -2417,7 +2542,7 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
|
||||
if (nv_crtc_state->regamma_drm_lut_surface != NULL) {
|
||||
req_config->modeSetConfig.olut.enabled = NV_TRUE;
|
||||
req_config->modeSetConfig.olut.lutSurface =
|
||||
nv_crtc_state->regamma_drm_lut_surface->nvkms_surface;
|
||||
nv_crtc_state->regamma_drm_lut_surface->base.nvkms_surface;
|
||||
req_config->modeSetConfig.olut.offset = 0;
|
||||
req_config->modeSetConfig.olut.vssSegments =
|
||||
nv_crtc_state->regamma_drm_lut_surface->properties.vssSegments;
|
||||
@@ -2521,7 +2646,7 @@ static void nv_drm_plane_install_properties(
|
||||
if (nv_dev->nv_input_colorspace_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_input_colorspace_property,
|
||||
NVKMS_INPUT_COLORSPACE_NONE);
|
||||
NV_DRM_INPUT_COLOR_SPACE_NONE);
|
||||
}
|
||||
|
||||
if (supportsICtCp) {
|
||||
@@ -2531,17 +2656,14 @@ static void nv_drm_plane_install_properties(
|
||||
&plane->base, nv_dev->nv_hdr_output_metadata_property, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* The old DRM_OBJECT_MAX_PROPERTY limit of 24 is too small to
|
||||
* accomodate all of the properties for the ICtCp pipeline.
|
||||
*
|
||||
* Commit 1e13c5644c44 ("drm/drm_mode_object: increase max objects to
|
||||
* accommodate new color props") in Linux v6.8 increased the limit to
|
||||
* 64. To be safe, require this before attaching any properties for the
|
||||
* ICtCp pipeline.
|
||||
*/
|
||||
if (DRM_OBJECT_MAX_PROPERTY >= 64) {
|
||||
/*
|
||||
* Per-plane HDR properties get us dangerously close to the 24 property
|
||||
* limit on kernels that don't support NV_DRM_USE_EXTENDED_PROPERTIES.
|
||||
*/
|
||||
if (NV_DRM_USE_EXTENDED_PROPERTIES) {
|
||||
if (supportsICtCp) {
|
||||
if (nv_dev->nv_plane_lms_ctm_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_lms_ctm_property, 0);
|
||||
@@ -2568,36 +2690,36 @@ static void nv_drm_plane_install_properties(
|
||||
NVKMS_LUT_ARRAY_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nv_dev->nv_plane_blend_ctm_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_blend_ctm_property, 0);
|
||||
}
|
||||
if (nv_dev->nv_plane_blend_ctm_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_blend_ctm_property, 0);
|
||||
}
|
||||
|
||||
if (nv_plane->ilut_caps.supported) {
|
||||
if (nv_plane->ilut_caps.vssSupport == NVKMS_LUT_VSS_SUPPORTED) {
|
||||
if (nv_dev->nv_plane_degamma_tf_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_degamma_tf_property,
|
||||
NV_DRM_TRANSFER_FUNCTION_DEFAULT);
|
||||
if (nv_plane->ilut_caps.supported) {
|
||||
if (nv_plane->ilut_caps.vssSupport == NVKMS_LUT_VSS_SUPPORTED) {
|
||||
if (nv_dev->nv_plane_degamma_tf_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_degamma_tf_property,
|
||||
NV_DRM_TRANSFER_FUNCTION_DEFAULT);
|
||||
}
|
||||
if (nv_dev->nv_plane_degamma_multiplier_property) {
|
||||
/* Default to 1 in S31.32 Sign-Magnitude Format */
|
||||
nv_plane_state->degamma_multiplier = ((uint64_t) 1) << 32;
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_degamma_multiplier_property,
|
||||
nv_plane_state->degamma_multiplier);
|
||||
}
|
||||
}
|
||||
if (nv_dev->nv_plane_degamma_multiplier_property) {
|
||||
/* Default to 1 in S31.32 Sign-Magnitude Format */
|
||||
nv_plane_state->degamma_multiplier = ((uint64_t) 1) << 32;
|
||||
if (nv_dev->nv_plane_degamma_lut_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_degamma_multiplier_property,
|
||||
nv_plane_state->degamma_multiplier);
|
||||
&plane->base, nv_dev->nv_plane_degamma_lut_property, 0);
|
||||
}
|
||||
if (nv_dev->nv_plane_degamma_lut_size_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_degamma_lut_size_property,
|
||||
NVKMS_LUT_ARRAY_SIZE);
|
||||
}
|
||||
}
|
||||
if (nv_dev->nv_plane_degamma_lut_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_degamma_lut_property, 0);
|
||||
}
|
||||
if (nv_dev->nv_plane_degamma_lut_size_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_degamma_lut_size_property,
|
||||
NVKMS_LUT_ARRAY_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2776,6 +2898,29 @@ nv_drm_plane_create(struct drm_device *dev,
|
||||
goto failed_plane_init;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
|
||||
if (pResInfo->caps.supportsInputColorSpace &&
|
||||
pResInfo->caps.supportsInputColorRange) {
|
||||
|
||||
nv_plane->supportsColorProperties = true;
|
||||
|
||||
drm_plane_create_color_properties(
|
||||
plane,
|
||||
NVBIT(DRM_COLOR_YCBCR_BT601) |
|
||||
NVBIT(DRM_COLOR_YCBCR_BT709) |
|
||||
NVBIT(DRM_COLOR_YCBCR_BT2020),
|
||||
NVBIT(DRM_COLOR_YCBCR_FULL_RANGE) |
|
||||
NVBIT(DRM_COLOR_YCBCR_LIMITED_RANGE),
|
||||
DRM_COLOR_YCBCR_BT709,
|
||||
DRM_COLOR_YCBCR_FULL_RANGE
|
||||
);
|
||||
} else {
|
||||
nv_plane->supportsColorProperties = false;
|
||||
}
|
||||
#else
|
||||
nv_plane->supportsColorProperties = false;
|
||||
#endif
|
||||
|
||||
drm_plane_helper_add(plane, &nv_plane_helper_funcs);
|
||||
|
||||
if (plane_type != DRM_PLANE_TYPE_CURSOR) {
|
||||
|
||||
@@ -191,6 +191,13 @@ struct nv_drm_plane {
|
||||
*/
|
||||
uint32_t layer_idx;
|
||||
|
||||
/**
|
||||
* @supportsColorProperties
|
||||
*
|
||||
* If true, supports the COLOR_ENCODING and COLOR_RANGE properties.
|
||||
*/
|
||||
bool supportsColorProperties;
|
||||
|
||||
struct NvKmsLUTCaps ilut_caps;
|
||||
struct NvKmsLUTCaps tmo_caps;
|
||||
};
|
||||
@@ -203,10 +210,23 @@ static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
|
||||
return container_of(plane, struct nv_drm_plane, base);
|
||||
}
|
||||
|
||||
struct nv_drm_lut_surface {
|
||||
struct nv_drm_nvkms_surface {
|
||||
struct NvKmsKapiDevice *pDevice;
|
||||
struct NvKmsKapiMemory *nvkms_memory;
|
||||
struct NvKmsKapiSurface *nvkms_surface;
|
||||
void *buffer;
|
||||
struct kref refcount;
|
||||
};
|
||||
|
||||
struct nv_drm_nvkms_surface_params {
|
||||
NvU32 width;
|
||||
NvU32 height;
|
||||
size_t surface_size;
|
||||
enum NvKmsSurfaceMemoryFormat format;
|
||||
};
|
||||
|
||||
struct nv_drm_lut_surface {
|
||||
struct nv_drm_nvkms_surface base;
|
||||
struct {
|
||||
NvU32 vssSegments;
|
||||
enum NvKmsLUTVssType vssType;
|
||||
@@ -215,14 +235,12 @@ struct nv_drm_lut_surface {
|
||||
enum NvKmsLUTFormat entryFormat;
|
||||
|
||||
} properties;
|
||||
void *buffer;
|
||||
struct kref refcount;
|
||||
};
|
||||
|
||||
struct nv_drm_plane_state {
|
||||
struct drm_plane_state base;
|
||||
s32 __user *fd_user_ptr;
|
||||
enum NvKmsInputColorSpace input_colorspace;
|
||||
enum nv_drm_input_color_space input_colorspace;
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct drm_property_blob *hdr_output_metadata;
|
||||
#endif
|
||||
|
||||
@@ -35,6 +35,8 @@
|
||||
#include "nvidia-drm-gem-nvkms-memory.h"
|
||||
#include "nvidia-drm-gem-user-memory.h"
|
||||
#include "nvidia-drm-gem-dma-buf.h"
|
||||
#include "nvidia-drm-utils.h"
|
||||
#include "nv_dpy_id.h"
|
||||
|
||||
#if defined(NV_DRM_AVAILABLE)
|
||||
|
||||
@@ -90,6 +92,7 @@
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
/*
|
||||
* Commit fcd70cd36b9b ("drm: Split out drm_probe_helper.h")
|
||||
@@ -120,15 +123,15 @@ static int nv_drm_revoke_sub_ownership(struct drm_device *dev);
|
||||
|
||||
static struct nv_drm_device *dev_list = NULL;
|
||||
|
||||
static char* nv_get_input_colorspace_name(
|
||||
enum NvKmsInputColorSpace colorSpace)
|
||||
static const char* nv_get_input_colorspace_name(
|
||||
enum nv_drm_input_color_space colorSpace)
|
||||
{
|
||||
switch (colorSpace) {
|
||||
case NVKMS_INPUT_COLORSPACE_NONE:
|
||||
case NV_DRM_INPUT_COLOR_SPACE_NONE:
|
||||
return "None";
|
||||
case NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR:
|
||||
case NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR:
|
||||
return "scRGB Linear FP16";
|
||||
case NVKMS_INPUT_COLORSPACE_BT2100_PQ:
|
||||
case NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ:
|
||||
return "BT.2100 PQ";
|
||||
default:
|
||||
/* We shoudn't hit this */
|
||||
@@ -284,6 +287,123 @@ done:
|
||||
mutex_unlock(&nv_dev->lock);
|
||||
}
|
||||
|
||||
struct nv_drm_mst_display_info {
|
||||
NvKmsKapiDisplay handle;
|
||||
NvBool isDpMST;
|
||||
char dpAddress[NVKMS_DP_ADDRESS_STRING_LENGTH];
|
||||
};
|
||||
|
||||
/*
|
||||
* Helper function to get DpMST display info.
|
||||
* dpMSTDisplayInfos is allocated dynamically,
|
||||
* so it needs to be freed after finishing the query.
|
||||
*/
|
||||
static int nv_drm_get_mst_display_infos
|
||||
(
|
||||
struct nv_drm_device *nv_dev,
|
||||
NvKmsKapiDisplay hDisplay,
|
||||
struct nv_drm_mst_display_info **dpMSTDisplayInfos,
|
||||
NvU32 *nDynamicDisplays
|
||||
)
|
||||
{
|
||||
struct NvKmsKapiStaticDisplayInfo *displayInfo = NULL;
|
||||
struct NvKmsKapiStaticDisplayInfo *dynamicDisplayInfo = NULL;
|
||||
struct NvKmsKapiConnectorInfo *connectorInfo = NULL;
|
||||
struct nv_drm_mst_display_info *displayInfos = NULL;
|
||||
NvU32 i = 0;
|
||||
int ret = 0;
|
||||
NVDpyId dpyId;
|
||||
*nDynamicDisplays = 0;
|
||||
|
||||
/* Query NvKmsKapiStaticDisplayInfo and NvKmsKapiConnectorInfo */
|
||||
|
||||
if ((displayInfo = nv_drm_calloc(1, sizeof(*displayInfo))) == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if ((dynamicDisplayInfo = nv_drm_calloc(1, sizeof(*dynamicDisplayInfo))) == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!nvKms->getStaticDisplayInfo(nv_dev->pDevice, hDisplay, displayInfo)) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
connectorInfo = nvkms_get_connector_info(nv_dev->pDevice,
|
||||
displayInfo->connectorHandle);
|
||||
|
||||
if (IS_ERR(connectorInfo)) {
|
||||
ret = PTR_ERR(connectorInfo);
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
||||
*nDynamicDisplays = nvCountDpyIdsInDpyIdList(connectorInfo->dynamicDpyIdList);
|
||||
|
||||
if (*nDynamicDisplays == 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
if ((displayInfos = nv_drm_calloc(*nDynamicDisplays, sizeof(*displayInfos))) == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
|
||||
FOR_ALL_DPY_IDS(dpyId, connectorInfo->dynamicDpyIdList) {
|
||||
if (!nvKms->getStaticDisplayInfo(nv_dev->pDevice,
|
||||
nvDpyIdToNvU32(dpyId),
|
||||
dynamicDisplayInfo)) {
|
||||
ret = -EINVAL;
|
||||
nv_drm_free(displayInfos);
|
||||
goto done;
|
||||
}
|
||||
|
||||
displayInfos[i].handle = dynamicDisplayInfo->handle;
|
||||
displayInfos[i].isDpMST = dynamicDisplayInfo->isDpMST;
|
||||
memcpy(displayInfos[i].dpAddress, dynamicDisplayInfo->dpAddress, sizeof(dynamicDisplayInfo->dpAddress));
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
*dpMSTDisplayInfos = displayInfos;
|
||||
|
||||
done:
|
||||
|
||||
nv_drm_free(displayInfo);
|
||||
|
||||
nv_drm_free(dynamicDisplayInfo);
|
||||
|
||||
nv_drm_free(connectorInfo);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nv_drm_disp_cmp (const void *l, const void *r)
|
||||
{
|
||||
struct nv_drm_mst_display_info *l_info = (struct nv_drm_mst_display_info *)l;
|
||||
struct nv_drm_mst_display_info *r_info = (struct nv_drm_mst_display_info *)r;
|
||||
|
||||
return strcmp(l_info->dpAddress, r_info->dpAddress);
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function to sort the dpAddress in terms of string.
|
||||
* This function is to create DRM connectors ID order deterministically.
|
||||
* It's not numerically.
|
||||
*/
|
||||
static void nv_drm_sort_dynamic_displays_by_dp_addr
|
||||
(
|
||||
struct nv_drm_mst_display_info *infos,
|
||||
int nDynamicDisplays
|
||||
)
|
||||
{
|
||||
sort(infos, nDynamicDisplays, sizeof(*infos), nv_drm_disp_cmp, NULL);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Helper function to initialize drm_device::mode_config from
|
||||
* NvKmsKapiDevice's resource information.
|
||||
@@ -365,9 +485,11 @@ static void nv_drm_enumerate_encoders_and_connectors
|
||||
nv_dev,
|
||||
"Failed to enumurate NvKmsKapiDisplay handles");
|
||||
} else {
|
||||
NvU32 i;
|
||||
NvU32 i, j;
|
||||
NvU32 nDynamicDisplays = 0;
|
||||
|
||||
for (i = 0; i < nDisplays; i++) {
|
||||
struct nv_drm_mst_display_info *displayInfos = NULL;
|
||||
struct drm_encoder *encoder =
|
||||
nv_drm_add_encoder(dev, hDisplays[i]);
|
||||
|
||||
@@ -377,6 +499,34 @@ static void nv_drm_enumerate_encoders_and_connectors
|
||||
"Failed to add connector for NvKmsKapiDisplay 0x%08x",
|
||||
hDisplays[i]);
|
||||
}
|
||||
|
||||
if (nv_drm_get_mst_display_infos(nv_dev, hDisplays[i],
|
||||
&displayInfos, &nDynamicDisplays)) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Failed to get dynamic displays");
|
||||
} else if (nDynamicDisplays) {
|
||||
nv_drm_sort_dynamic_displays_by_dp_addr(displayInfos, nDynamicDisplays);
|
||||
|
||||
for (j = 0; j < nDynamicDisplays; j++) {
|
||||
if (displayInfos[j].isDpMST) {
|
||||
struct drm_encoder *mst_encoder =
|
||||
nv_drm_add_encoder(dev, displayInfos[j].handle);
|
||||
|
||||
NV_DRM_DEV_DEBUG_DRIVER(nv_dev, "found DP MST port display handle %u",
|
||||
displayInfos[j].handle);
|
||||
|
||||
if (IS_ERR(mst_encoder)) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Failed to add connector for NvKmsKapiDisplay 0x%08x",
|
||||
displayInfos[j].handle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nv_drm_free(displayInfos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -602,6 +752,7 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
|
||||
memset(&allocateDeviceParams, 0, sizeof(allocateDeviceParams));
|
||||
|
||||
allocateDeviceParams.gpuId = nv_dev->gpu_info.gpu_id;
|
||||
allocateDeviceParams.migDevice = nv_dev->gpu_mig_device;
|
||||
|
||||
allocateDeviceParams.privateData = nv_dev;
|
||||
allocateDeviceParams.eventCallback = nv_drm_event_callback;
|
||||
@@ -672,6 +823,9 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
|
||||
|
||||
nv_dev->requiresVrrSemaphores = resInfo.caps.requiresVrrSemaphores;
|
||||
|
||||
nv_dev->vtFbBaseAddress = resInfo.vtFbBaseAddress;
|
||||
nv_dev->vtFbSize = resInfo.vtFbSize;
|
||||
|
||||
#if defined(NV_DRM_FORMAT_MODIFIERS_PRESENT)
|
||||
gen = nv_dev->pageKindGeneration;
|
||||
kind = nv_dev->genericPageKind;
|
||||
@@ -855,6 +1009,62 @@ static void nv_drm_master_set(struct drm_device *dev,
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
int nv_drm_reset_input_colorspace(struct drm_device *dev)
|
||||
{
|
||||
struct drm_atomic_state *state;
|
||||
struct drm_plane_state *plane_state;
|
||||
struct drm_plane *plane;
|
||||
struct nv_drm_plane_state *nv_drm_plane_state;
|
||||
struct drm_modeset_acquire_ctx ctx;
|
||||
int ret = 0;
|
||||
bool do_reset = false;
|
||||
NvU32 flags = 0;
|
||||
|
||||
state = drm_atomic_state_alloc(dev);
|
||||
if (!state)
|
||||
return -ENOMEM;
|
||||
|
||||
#if defined(DRM_MODESET_ACQUIRE_INTERRUPTIBLE)
|
||||
flags |= DRM_MODESET_ACQUIRE_INTERRUPTIBLE;
|
||||
#endif
|
||||
drm_modeset_acquire_init(&ctx, flags);
|
||||
state->acquire_ctx = &ctx;
|
||||
|
||||
nv_drm_for_each_plane(plane, dev) {
|
||||
plane_state = drm_atomic_get_plane_state(state, plane);
|
||||
if (IS_ERR(plane_state)) {
|
||||
ret = PTR_ERR(plane_state);
|
||||
goto out;
|
||||
}
|
||||
|
||||
nv_drm_plane_state = to_nv_drm_plane_state(plane_state);
|
||||
if (nv_drm_plane_state) {
|
||||
if (nv_drm_plane_state->input_colorspace != NV_DRM_INPUT_COLOR_SPACE_NONE) {
|
||||
nv_drm_plane_state->input_colorspace = NV_DRM_INPUT_COLOR_SPACE_NONE;
|
||||
do_reset = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (do_reset) {
|
||||
ret = drm_atomic_commit(state);
|
||||
}
|
||||
|
||||
out:
|
||||
#if defined(NV_DRM_ATOMIC_STATE_REF_COUNTING_PRESENT)
|
||||
drm_atomic_state_put(state);
|
||||
#else
|
||||
// In case of success, drm_atomic_commit() takes care to cleanup and free state.
|
||||
if (ret != 0) {
|
||||
drm_atomic_state_free(state);
|
||||
}
|
||||
#endif
|
||||
drm_modeset_drop_locks(&ctx);
|
||||
drm_modeset_acquire_fini(&ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_MASTER_DROP_HAS_FROM_RELEASE_ARG)
|
||||
static
|
||||
@@ -898,6 +1108,12 @@ void nv_drm_master_drop(struct drm_device *dev, struct drm_file *file_priv)
|
||||
drm_modeset_unlock_all(dev);
|
||||
|
||||
nvKms->releaseOwnership(nv_dev->pDevice);
|
||||
} else {
|
||||
int err = nv_drm_reset_input_colorspace(dev);
|
||||
if (err != 0) {
|
||||
NV_DRM_DEV_LOG_WARN(nv_dev,
|
||||
"nv_drm_reset_input_colorspace failed with error code: %d !", err);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
@@ -935,6 +1151,7 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
|
||||
}
|
||||
|
||||
params->gpu_id = nv_dev->gpu_info.gpu_id;
|
||||
params->mig_device = nv_dev->gpu_mig_device;
|
||||
params->primary_index = dev->primary->index;
|
||||
params->supports_alloc = false;
|
||||
params->generic_page_kind = 0;
|
||||
@@ -1725,7 +1942,7 @@ static const struct file_operations nv_drm_fops = {
|
||||
|
||||
.llseek = noop_llseek,
|
||||
|
||||
#if defined(NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT)
|
||||
#if defined(FOP_UNSIGNED_OFFSET)
|
||||
.fop_flags = FOP_UNSIGNED_OFFSET,
|
||||
#endif
|
||||
};
|
||||
@@ -1967,16 +2184,16 @@ void nv_drm_update_drm_driver_features(void)
|
||||
/*
|
||||
* Helper function for allocate/register DRM device for given NVIDIA GPU ID.
|
||||
*/
|
||||
void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
void nv_drm_register_drm_device(const struct NvKmsKapiGpuInfo *gpu_info)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = NULL;
|
||||
struct drm_device *dev = NULL;
|
||||
struct device *device = gpu_info->os_device_ptr;
|
||||
struct device *device = gpu_info->gpuInfo.os_device_ptr;
|
||||
bool bus_is_pci;
|
||||
|
||||
DRM_DEBUG(
|
||||
"Registering device for NVIDIA GPU ID 0x08%x",
|
||||
gpu_info->gpu_id);
|
||||
gpu_info->gpuInfo.gpu_id);
|
||||
|
||||
/* Allocate NVIDIA-DRM device */
|
||||
|
||||
@@ -1988,7 +2205,8 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
return;
|
||||
}
|
||||
|
||||
nv_dev->gpu_info = *gpu_info;
|
||||
nv_dev->gpu_info = gpu_info->gpuInfo;
|
||||
nv_dev->gpu_mig_device = gpu_info->migDevice;
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
mutex_init(&nv_dev->lock);
|
||||
@@ -2045,9 +2263,30 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
aperture_remove_conflicting_pci_devices(pdev, nv_drm_driver.name);
|
||||
#endif
|
||||
nvKms->framebufferConsoleDisabled(nv_dev->pDevice);
|
||||
} else {
|
||||
resource_size_t base = (resource_size_t) nv_dev->vtFbBaseAddress;
|
||||
resource_size_t size = (resource_size_t) nv_dev->vtFbSize;
|
||||
|
||||
if (base > 0 && size > 0) {
|
||||
#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_PRESENT)
|
||||
|
||||
#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_DRIVER_ARG)
|
||||
drm_aperture_remove_conflicting_framebuffers(base, size, false, &nv_drm_driver);
|
||||
#elif defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_NO_PRIMARY_ARG)
|
||||
drm_aperture_remove_conflicting_framebuffers(base, size, &nv_drm_driver);
|
||||
#else
|
||||
drm_aperture_remove_conflicting_framebuffers(base, size, false, nv_drm_driver.name);
|
||||
#endif
|
||||
|
||||
#elif defined(NV_APERTURE_REMOVE_CONFLICTING_DEVICES_PRESENT)
|
||||
aperture_remove_conflicting_devices(base, size, nv_drm_driver.name);
|
||||
#endif
|
||||
} else {
|
||||
NV_DRM_DEV_LOG_INFO(nv_dev, "Invalid framebuffer console info");
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CLIENT_AVAILABLE)
|
||||
drm_client_setup(dev, NULL);
|
||||
drm_client_setup(dev, NULL);
|
||||
#elif defined(NV_DRM_FBDEV_TTM_AVAILABLE)
|
||||
drm_fbdev_ttm_setup(dev, 32);
|
||||
#elif defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
@@ -2078,7 +2317,7 @@ failed_drm_alloc:
|
||||
#if defined(NV_LINUX)
|
||||
int nv_drm_probe_devices(void)
|
||||
{
|
||||
nv_gpu_info_t *gpu_info = NULL;
|
||||
struct NvKmsKapiGpuInfo *gpu_info = NULL;
|
||||
NvU32 gpu_count = 0;
|
||||
NvU32 i;
|
||||
|
||||
|
||||
@@ -27,13 +27,15 @@
|
||||
|
||||
#if defined(NV_DRM_AVAILABLE)
|
||||
|
||||
struct NvKmsKapiGpuInfo;
|
||||
|
||||
int nv_drm_probe_devices(void);
|
||||
|
||||
void nv_drm_remove_devices(void);
|
||||
|
||||
void nv_drm_suspend_resume(NvBool suspend);
|
||||
|
||||
void nv_drm_register_drm_device(const nv_gpu_info_t *);
|
||||
void nv_drm_register_drm_device(const struct NvKmsKapiGpuInfo *);
|
||||
|
||||
void nv_drm_update_drm_driver_features(void);
|
||||
|
||||
|
||||
@@ -319,7 +319,7 @@ void nv_drm_handle_dynamic_display_connected(struct nv_drm_device *nv_dev,
|
||||
nv_encoder = get_nv_encoder_from_nvkms_display(dev, hDisplay);
|
||||
|
||||
if (nv_encoder != NULL) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
NV_DRM_DEV_LOG_INFO(
|
||||
nv_dev,
|
||||
"Encoder with NvKmsKapiDisplay 0x%08x already exists.",
|
||||
hDisplay);
|
||||
|
||||
@@ -202,6 +202,43 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
|
||||
params.explicit_layout = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX work around an invalid pitch assumption in DRM.
|
||||
*
|
||||
* The smallest pitch the display hardware allows is 256.
|
||||
*
|
||||
* If a DRM client allocates a 32x32 cursor surface through
|
||||
* DRM_IOCTL_MODE_CREATE_DUMB, we'll correctly round the pitch to 256:
|
||||
*
|
||||
* pitch = round(32width * 4Bpp, 256) = 256
|
||||
*
|
||||
* and then allocate an 8k surface:
|
||||
*
|
||||
* size = pitch * 32height = 8196
|
||||
*
|
||||
* and report the rounded pitch and size back to the client through the
|
||||
* struct drm_mode_create_dumb ioctl params.
|
||||
*
|
||||
* But when the DRM client passes that buffer object handle to
|
||||
* DRM_IOCTL_MODE_CURSOR, the client has no way to specify the pitch. This
|
||||
* path in drm:
|
||||
*
|
||||
* DRM_IOCTL_MODE_CURSOR
|
||||
* drm_mode_cursor_ioctl()
|
||||
* drm_mode_cursor_common()
|
||||
* drm_mode_cursor_universal()
|
||||
*
|
||||
* will implicitly create a framebuffer from the buffer object, and compute
|
||||
* the pitch as width x 32 (without aligning to our minimum pitch).
|
||||
*
|
||||
* Intercept this case and force the pitch back to 256.
|
||||
*/
|
||||
if ((params.width == 32) &&
|
||||
(params.height == 32) &&
|
||||
(params.planes[0].pitch == 128)) {
|
||||
params.planes[0].pitch = 256;
|
||||
}
|
||||
|
||||
/* Create NvKmsKapiSurface */
|
||||
|
||||
nv_fb->pSurface = nvKms->createSurface(nv_dev->pDevice, ¶ms);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2016-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -1132,7 +1132,7 @@ static void __nv_drm_semsurf_fence_ctx_destroy(
|
||||
*/
|
||||
nv_drm_workthread_shutdown(&ctx->worker);
|
||||
|
||||
nv_drm_del_timer_sync(&ctx->timer);
|
||||
nv_timer_delete_sync(&ctx->timer.kernel_timer);
|
||||
|
||||
/*
|
||||
* The semaphore surface could still be sending callbacks, so it is still
|
||||
|
||||
@@ -166,4 +166,37 @@ uint32_t *nv_drm_format_array_alloc(
|
||||
return array;
|
||||
}
|
||||
|
||||
bool nv_drm_format_is_yuv(u32 format)
|
||||
{
|
||||
#if defined(NV_DRM_FORMAT_INFO_HAS_IS_YUV)
|
||||
const struct drm_format_info *format_info = drm_format_info(format);
|
||||
return (format_info != NULL) && format_info->is_yuv;
|
||||
#else
|
||||
switch (format) {
|
||||
case DRM_FORMAT_YUYV:
|
||||
case DRM_FORMAT_UYVY:
|
||||
|
||||
case DRM_FORMAT_NV24:
|
||||
case DRM_FORMAT_NV42:
|
||||
case DRM_FORMAT_NV16:
|
||||
case DRM_FORMAT_NV61:
|
||||
case DRM_FORMAT_NV12:
|
||||
case DRM_FORMAT_NV21:
|
||||
|
||||
#if defined(DRM_FORMAT_P210)
|
||||
case DRM_FORMAT_P210:
|
||||
#endif
|
||||
#if defined(DRM_FORMAT_P010)
|
||||
case DRM_FORMAT_P010:
|
||||
#endif
|
||||
#if defined(DRM_FORMAT_P012)
|
||||
case DRM_FORMAT_P012:
|
||||
#endif
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -38,6 +38,8 @@ uint32_t *nv_drm_format_array_alloc(
|
||||
unsigned int *count,
|
||||
const long unsigned int nvkms_format_mask);
|
||||
|
||||
bool nv_drm_format_is_yuv(u32 format);
|
||||
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
|
||||
#endif /* __NVIDIA_DRM_FORMAT_H__ */
|
||||
|
||||
@@ -308,12 +308,12 @@ static int __nv_drm_nvkms_gem_obj_init(
|
||||
nv_nvkms_memory->pWriteCombinedIORemapAddress = NULL;
|
||||
nv_nvkms_memory->physically_mapped = false;
|
||||
|
||||
if (!nvKms->getMemoryPages(nv_dev->pDevice,
|
||||
if (!nvKms->isVidmem(pMemory) &&
|
||||
!nvKms->getMemoryPages(nv_dev->pDevice,
|
||||
pMemory,
|
||||
&pages,
|
||||
&numPages) &&
|
||||
!nvKms->isVidmem(pMemory)) {
|
||||
/* GetMemoryPages may fail for vidmem allocations,
|
||||
&numPages)) {
|
||||
/* GetMemoryPages will fail for vidmem allocations,
|
||||
* but it should not fail for sysmem allocations. */
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev,
|
||||
"Failed to get memory pages for NvKmsKapiMemory 0x%p",
|
||||
|
||||
@@ -69,6 +69,13 @@
|
||||
|
||||
#endif //NV_DRM_ROTATION_AVAILABLE
|
||||
|
||||
/*
|
||||
* Commit 1e13c5644c44 ("drm/drm_mode_object: increase max objects to
|
||||
* accommodate new color props") in Linux v6.8 increased the pre-object
|
||||
* property limit to from 24 to 64.
|
||||
*/
|
||||
#define NV_DRM_USE_EXTENDED_PROPERTIES (DRM_OBJECT_MAX_PROPERTY >= 64)
|
||||
|
||||
/*
|
||||
* drm_dev_put() is added by commit 9a96f55034e41b4e002b767e9218d55f03bdff7d
|
||||
* (2017-09-26) and drm_dev_unref() is removed by
|
||||
|
||||
@@ -182,6 +182,7 @@ struct drm_nvidia_gem_import_userspace_memory_params {
|
||||
|
||||
struct drm_nvidia_get_dev_info_params {
|
||||
uint32_t gpu_id; /* OUT */
|
||||
uint32_t mig_device; /* OUT */
|
||||
uint32_t primary_index; /* OUT; the "card%d" value */
|
||||
|
||||
uint32_t supports_alloc; /* OUT */
|
||||
|
||||
@@ -677,6 +677,33 @@ int nv_drm_atomic_commit(struct drm_device *dev,
|
||||
"Flip event timeout on head %u", nv_crtc->head);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
|
||||
/*
|
||||
* If the legacy LUT needs to be updated, ensure that the previous LUT
|
||||
* update is complete first.
|
||||
*/
|
||||
if (crtc_state->color_mgmt_changed) {
|
||||
NvBool complete = nvKms->checkLutNotifier(nv_dev->pDevice,
|
||||
nv_crtc->head,
|
||||
!nonblock /* waitForCompletion */);
|
||||
|
||||
/* If checking the LUT notifier failed, assume no LUT notifier is set. */
|
||||
if (!complete) {
|
||||
if (nonblock) {
|
||||
return -EBUSY;
|
||||
} else {
|
||||
/*
|
||||
* checkLutNotifier should wait on the notifier in this
|
||||
* case, so we should only get here if the wait timed out.
|
||||
*/
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"LUT notifier timeout on head %u", nv_crtc->head);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_HELPER_SWAP_STATE_HAS_STALL_ARG)
|
||||
@@ -803,6 +830,19 @@ int nv_drm_atomic_commit(struct drm_device *dev,
|
||||
__nv_drm_handle_flip_event(nv_crtc);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
|
||||
if (crtc_state->color_mgmt_changed) {
|
||||
NvBool complete = nvKms->checkLutNotifier(nv_dev->pDevice,
|
||||
nv_crtc->head,
|
||||
true /* waitForCompletion */);
|
||||
if (!complete) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"LUT notifier timeout on head %u", nv_crtc->head);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -236,15 +236,6 @@ unsigned long nv_drm_timeout_from_ms(NvU64 relative_timeout_ms)
|
||||
return jiffies + msecs_to_jiffies(relative_timeout_ms);
|
||||
}
|
||||
|
||||
bool nv_drm_del_timer_sync(nv_drm_timer *timer)
|
||||
{
|
||||
if (del_timer_sync(&timer->kernel_timer)) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
int nv_drm_create_sync_file(nv_dma_fence_t *fence)
|
||||
{
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -58,16 +58,6 @@ typedef struct nv_timer nv_drm_timer;
|
||||
#error "Need to define kernel timer callback primitives for this OS"
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_FBDEV_GENERIC_AVAILABLE
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_TTM_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_FBDEV_TTM_AVAILABLE
|
||||
#endif
|
||||
|
||||
struct page;
|
||||
|
||||
/* Set to true when the atomic modeset feature is enabled. */
|
||||
@@ -111,8 +101,6 @@ void nv_drm_timer_setup(nv_drm_timer *timer,
|
||||
|
||||
void nv_drm_mod_timer(nv_drm_timer *timer, unsigned long relative_timeout_ms);
|
||||
|
||||
bool nv_drm_del_timer_sync(nv_drm_timer *timer);
|
||||
|
||||
unsigned long nv_drm_timer_now(void);
|
||||
|
||||
unsigned long nv_drm_timeout_from_ms(NvU64 relative_timeout_ms);
|
||||
|
||||
@@ -85,8 +85,15 @@
|
||||
DRM_DEBUG_DRIVER("[GPU ID 0x%08x] " __fmt, \
|
||||
__dev->gpu_info.gpu_id, ##__VA_ARGS__)
|
||||
|
||||
enum nv_drm_input_color_space {
|
||||
NV_DRM_INPUT_COLOR_SPACE_NONE,
|
||||
NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR,
|
||||
NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ
|
||||
};
|
||||
|
||||
struct nv_drm_device {
|
||||
nv_gpu_info_t gpu_info;
|
||||
MIGDeviceId gpu_mig_device;
|
||||
|
||||
struct drm_device *dev;
|
||||
|
||||
@@ -182,6 +189,9 @@ struct nv_drm_device {
|
||||
struct drm_property *nv_crtc_regamma_divisor_property;
|
||||
|
||||
struct nv_drm_device *next;
|
||||
|
||||
NvU64 vtFbBaseAddress;
|
||||
NvU64 vtFbSize;
|
||||
};
|
||||
|
||||
static inline NvU32 nv_drm_next_display_semaphore(
|
||||
|
||||
@@ -37,6 +37,8 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_dec_and_test
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_alpha_blending_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_fd_to_handle
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_handle_to_fd
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl___vma_start_write
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
|
||||
@@ -65,6 +67,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_fence_set_error
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_devices
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_pci_devices
|
||||
@@ -74,6 +77,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_client_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_plane_create_color_properties
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_legacy_gamma_set
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_mixed
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pfn_to_pfn_t
|
||||
@@ -133,6 +137,8 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers_has_driver_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers_has_no_primary_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers_has_driver_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_syncobj_features_present
|
||||
@@ -140,8 +146,9 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_framebuffer_obj_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_ctm_3x4_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_lut
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_info_has_is_yuv
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_property_blob_put
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_prime_mmap
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_helper_funcs_mode_valid_has_const_mode_arg
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -53,6 +53,7 @@
|
||||
#include "nv-procfs.h"
|
||||
#include "nv-kthread-q.h"
|
||||
#include "nv-time.h"
|
||||
#include "nv-timer.h"
|
||||
#include "nv-lock.h"
|
||||
#include "nv-chardev-numbers.h"
|
||||
|
||||
@@ -102,6 +103,11 @@ MODULE_PARM_DESC(malloc_verbose, "Report information about malloc calls on modul
|
||||
static bool malloc_verbose = false;
|
||||
module_param_named(malloc_verbose, malloc_verbose, bool, 0400);
|
||||
|
||||
MODULE_PARM_DESC(conceal_vrr_caps,
|
||||
"Conceal all display VRR capabilities");
|
||||
static bool conceal_vrr_caps = false;
|
||||
module_param_named(conceal_vrr_caps, conceal_vrr_caps, bool, 0400);
|
||||
|
||||
/* Fail allocating the RM core channel for NVKMS using the i-th method (see
|
||||
* FailAllocCoreChannelMethod). Failures not using the i-th method are ignored. */
|
||||
MODULE_PARM_DESC(fail_alloc_core_channel, "Control testing for hardware core channel allocation failure");
|
||||
@@ -135,7 +141,12 @@ NvBool nvkms_test_fail_alloc_core_channel(
|
||||
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
|
||||
NvBool nvkms_conceal_vrr_caps(void)
|
||||
{
|
||||
return conceal_vrr_caps;
|
||||
}
|
||||
|
||||
NvBool nvkms_output_rounding_fix(void)
|
||||
{
|
||||
return output_rounding_fix;
|
||||
@@ -738,7 +749,7 @@ static void nvkms_kthread_q_callback(void *arg)
|
||||
* pending timers and than waiting for workqueue callbacks.
|
||||
*/
|
||||
if (timer->kernel_timer_created) {
|
||||
del_timer_sync(&timer->kernel_timer);
|
||||
nv_timer_delete_sync(&timer->kernel_timer);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1922,7 +1933,11 @@ restart:
|
||||
* completion, and we wait for queue completion with
|
||||
* nv_kthread_q_stop below.
|
||||
*/
|
||||
#if !defined(NV_BSD) && NV_IS_EXPORT_SYMBOL_PRESENT_timer_delete_sync
|
||||
if (timer_delete_sync(&timer->kernel_timer) == 1) {
|
||||
#else
|
||||
if (del_timer_sync(&timer->kernel_timer) == 1) {
|
||||
#endif
|
||||
/* We've deactivated timer so we need to clean after it */
|
||||
list_del(&timer->timers_list);
|
||||
|
||||
|
||||
@@ -110,6 +110,7 @@ enum FailAllocCoreChannelMethod {
|
||||
};
|
||||
|
||||
NvBool nvkms_test_fail_alloc_core_channel(enum FailAllocCoreChannelMethod method);
|
||||
NvBool nvkms_conceal_vrr_caps(void);
|
||||
NvBool nvkms_output_rounding_fix(void);
|
||||
NvBool nvkms_disable_hdmi_frl(void);
|
||||
NvBool nvkms_disable_vrr_memclk_switch(void);
|
||||
|
||||
@@ -52,7 +52,7 @@ nvidia-modeset-y += $(NVIDIA_MODESET_BINARY_OBJECT_O)
|
||||
# Define nvidia-modeset.ko-specific CFLAGS.
|
||||
#
|
||||
|
||||
NVIDIA_MODESET_CFLAGS += -I$(src)/nvidia-modeset
|
||||
NVIDIA_MODESET_CFLAGS += -I$(src)/nvidia-modeset -I$(src)/common/inc
|
||||
NVIDIA_MODESET_CFLAGS += -UDEBUG -U_DEBUG -DNDEBUG -DNV_BUILD_MODULE_INSTANCES=0
|
||||
|
||||
# Some Android kernels prohibit driver use of filesystem functions like
|
||||
@@ -103,4 +103,5 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_register_backlight
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kernel_read_has_pointer_pos_arg
|
||||
|
||||
@@ -453,35 +453,19 @@ typedef struct nvidia_p2p_rsync_reg_info {
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Gets rsync (GEN-ID) register information associated with the supported
|
||||
* NPUs.
|
||||
*
|
||||
* The caller would use the returned information {GPU device, NPU device,
|
||||
* socket-id, cluster-id} to pick the optimal generation registers to issue
|
||||
* RSYNC (NVLink HW flush).
|
||||
*
|
||||
* The interface allocates structures to return the information, hence
|
||||
* nvidia_p2p_put_rsync_registers() must be called to free the structures.
|
||||
*
|
||||
* Note, cluster-id is hardcoded to zero as early system configurations would
|
||||
* only support cluster mode i.e. all devices would share the same cluster-id
|
||||
* (0). In the future, appropriate kernel support would be needed to query
|
||||
* cluster-ids.
|
||||
*
|
||||
* @param[out] reg_info
|
||||
* A pointer to the rsync reg info structure.
|
||||
* This interface is no longer supported and will always return an error. It
|
||||
* is left in place (for now) to allow third-party callers to build without
|
||||
* any errors.
|
||||
*
|
||||
* @Returns
|
||||
* 0 Upon successful completion. Otherwise, returns negative value.
|
||||
* -ENODEV
|
||||
*/
|
||||
int nvidia_p2p_get_rsync_registers(nvidia_p2p_rsync_reg_info_t **reg_info);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Frees the structures allocated by nvidia_p2p_get_rsync_registers().
|
||||
*
|
||||
* @param[in] reg_info
|
||||
* A pointer to the rsync reg info structure.
|
||||
* This interface is no longer supported. It is left in place (for now) to
|
||||
* allow third-party callers to build without any errors.
|
||||
*/
|
||||
void nvidia_p2p_put_rsync_registers(nvidia_p2p_rsync_reg_info_t *reg_info);
|
||||
|
||||
|
||||
@@ -1,51 +1,31 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2012-2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#ifndef _clc86f_h_
|
||||
#define _clc86f_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
/* class HOPPER_CHANNEL_GPFIFO */
|
||||
/*
|
||||
* Documentation for HOPPER_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
|
||||
* chapter "User Control Registers". It is documented as device NV_UDMA.
|
||||
* The GPFIFO format itself is also documented in dev_pbdma.ref,
|
||||
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
|
||||
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Note there is no .mfs file for this class.
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __gh100_clc86f_h__
|
||||
#define __gh100_clc86f_h__
|
||||
|
||||
#define HOPPER_CHANNEL_GPFIFO_A (0x0000C86F)
|
||||
|
||||
#define NVC86F_TYPEDEF HOPPER_CHANNELChannelGPFifoA
|
||||
|
||||
/* dma flow control data structure */
|
||||
typedef volatile struct Nvc86fControl_struct {
|
||||
NvU32 Ignored00[0x010]; /* 0000-003f*/
|
||||
NvU32 Put; /* put offset, read/write 0040-0043*/
|
||||
@@ -64,54 +44,7 @@ typedef volatile struct Nvc86fControl_struct {
|
||||
NvU32 Ignored05[0x5c];
|
||||
} Nvc86fControl, HopperAControlGPFifo;
|
||||
|
||||
/* fields and values */
|
||||
#define NVC86F_NUMBER_OF_SUBCHANNELS (8)
|
||||
#define NVC86F_SET_OBJECT (0x00000000)
|
||||
#define NVC86F_SET_OBJECT_NVCLASS 15:0
|
||||
#define NVC86F_SET_OBJECT_ENGINE 20:16
|
||||
#define NVC86F_SET_OBJECT_ENGINE_SW 0x0000001f
|
||||
#define NVC86F_ILLEGAL (0x00000004)
|
||||
#define NVC86F_ILLEGAL_HANDLE 31:0
|
||||
#define NVC86F_NOP (0x00000008)
|
||||
#define NVC86F_NOP_HANDLE 31:0
|
||||
#define NVC86F_SEMAPHOREA (0x00000010)
|
||||
#define NVC86F_SEMAPHOREA_OFFSET_UPPER 7:0
|
||||
#define NVC86F_SEMAPHOREB (0x00000014)
|
||||
#define NVC86F_SEMAPHOREB_OFFSET_LOWER 31:2
|
||||
#define NVC86F_SEMAPHOREC (0x00000018)
|
||||
#define NVC86F_SEMAPHOREC_PAYLOAD 31:0
|
||||
#define NVC86F_SEMAPHORED (0x0000001C)
|
||||
#define NVC86F_SEMAPHORED_OPERATION 4:0
|
||||
#define NVC86F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
|
||||
#define NVC86F_SEMAPHORED_OPERATION_RELEASE 0x00000002
|
||||
#define NVC86F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
|
||||
#define NVC86F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
|
||||
#define NVC86F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
|
||||
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH 12:12
|
||||
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
|
||||
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
|
||||
#define NVC86F_SEMAPHORED_RELEASE_WFI 20:20
|
||||
#define NVC86F_SEMAPHORED_RELEASE_WFI_EN 0x00000000
|
||||
#define NVC86F_SEMAPHORED_RELEASE_WFI_DIS 0x00000001
|
||||
#define NVC86F_SEMAPHORED_RELEASE_SIZE 24:24
|
||||
#define NVC86F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
|
||||
#define NVC86F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
|
||||
#define NVC86F_SEMAPHORED_REDUCTION 30:27
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_MIN 0x00000000
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_MAX 0x00000001
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_XOR 0x00000002
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_AND 0x00000003
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_OR 0x00000004
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_ADD 0x00000005
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_INC 0x00000006
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_DEC 0x00000007
|
||||
#define NVC86F_SEMAPHORED_FORMAT 31:31
|
||||
#define NVC86F_SEMAPHORED_FORMAT_SIGNED 0x00000000
|
||||
#define NVC86F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
|
||||
#define NVC86F_NON_STALL_INTERRUPT (0x00000020)
|
||||
#define NVC86F_NON_STALL_INTERRUPT_HANDLE 31:0
|
||||
#define NVC86F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
|
||||
#define NVC86F_FB_FLUSH_HANDLE 31:0
|
||||
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
|
||||
// specifying the page address for a targeted TLB invalidate and the uTLB for
|
||||
// a targeted REPLAY_CANCEL for UVM.
|
||||
@@ -206,67 +139,31 @@ typedef volatile struct Nvc86fControl_struct {
|
||||
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE 23:20
|
||||
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE_RESERVED 0x00000000
|
||||
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE_VIDMEM_ACCESS_BIT_DUMP 0x00000001
|
||||
#define NVC86F_SET_REFERENCE (0x00000050)
|
||||
#define NVC86F_SET_REFERENCE_COUNT 31:0
|
||||
#define NVC86F_SEM_ADDR_LO (0x0000005c)
|
||||
#define NVC86F_SEM_ADDR_LO_OFFSET 31:2
|
||||
#define NVC86F_SEM_ADDR_HI (0x00000060)
|
||||
#define NVC86F_SEM_ADDR_HI_OFFSET 24:0
|
||||
#define NVC86F_SEM_PAYLOAD_LO (0x00000064)
|
||||
#define NVC86F_SEM_PAYLOAD_LO_PAYLOAD 31:0
|
||||
#define NVC86F_SEM_PAYLOAD_HI (0x00000068)
|
||||
#define NVC86F_SEM_PAYLOAD_HI_PAYLOAD 31:0
|
||||
#define NVC86F_SEM_EXECUTE (0x0000006c)
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION 2:0
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ 0x00000002
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ 0x00000003
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_AND 0x00000004
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_NOR 0x00000005
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_REDUCTION 0x00000006
|
||||
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG 12:12
|
||||
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_WFI 20:20
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_WFI_EN 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
|
||||
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP 25:25
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION 30:27
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IMIN 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IMAX 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IXOR 0x00000002
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IAND 0x00000003
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IOR 0x00000004
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IADD 0x00000005
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_INC 0x00000006
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_DEC 0x00000007
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT 31:31
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED 0x00000001
|
||||
#define NVC86F_WFI (0x00000078)
|
||||
#define NVC86F_WFI_SCOPE 0:0
|
||||
#define NVC86F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
|
||||
#define NVC86F_WFI_SCOPE_CURRENT_VEID 0x00000000
|
||||
#define NVC86F_WFI_SCOPE_ALL 0x00000001
|
||||
#define NVC86F_YIELD (0x00000080)
|
||||
#define NVC86F_YIELD_OP 1:0
|
||||
#define NVC86F_YIELD_OP_NOP 0x00000000
|
||||
#define NVC86F_YIELD_OP_TSG 0x00000003
|
||||
#define NVC86F_CLEAR_FAULTED (0x00000084)
|
||||
// Note: RM provides the HANDLE as an opaque value; the internal detail fields
|
||||
// are intentionally not exposed to the driver through these defines.
|
||||
#define NVC86F_CLEAR_FAULTED_HANDLE 30:0
|
||||
#define NVC86F_CLEAR_FAULTED_TYPE 31:31
|
||||
#define NVC86F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
|
||||
#define NVC86F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
|
||||
#define NVC86F_QUADRO_VERIFY (0x000000a0)
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVC86F_GP_ENTRY__SIZE 8
|
||||
@@ -291,85 +188,4 @@ typedef volatile struct Nvc86fControl_struct {
|
||||
#define NVC86F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
#define NVC86F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE 0x00000004
|
||||
|
||||
/* dma method formats */
|
||||
#define NVC86F_DMA_METHOD_ADDRESS_OLD 12:2
|
||||
#define NVC86F_DMA_METHOD_ADDRESS 11:0
|
||||
#define NVC86F_DMA_SUBDEVICE_MASK 15:4
|
||||
#define NVC86F_DMA_METHOD_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_TERT_OP 17:16
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
|
||||
#define NVC86F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
|
||||
#define NVC86F_DMA_METHOD_COUNT_OLD 28:18
|
||||
#define NVC86F_DMA_METHOD_COUNT 28:16
|
||||
#define NVC86F_DMA_IMMD_DATA 28:16
|
||||
#define NVC86F_DMA_SEC_OP 31:29
|
||||
#define NVC86F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
|
||||
#define NVC86F_DMA_SEC_OP_INC_METHOD (0x00000001)
|
||||
#define NVC86F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
|
||||
#define NVC86F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
|
||||
#define NVC86F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
|
||||
#define NVC86F_DMA_SEC_OP_ONE_INC (0x00000005)
|
||||
#define NVC86F_DMA_SEC_OP_RESERVED6 (0x00000006)
|
||||
#define NVC86F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
|
||||
/* dma incrementing method format */
|
||||
#define NVC86F_DMA_INCR_ADDRESS 11:0
|
||||
#define NVC86F_DMA_INCR_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_INCR_COUNT 28:16
|
||||
#define NVC86F_DMA_INCR_OPCODE 31:29
|
||||
#define NVC86F_DMA_INCR_OPCODE_VALUE (0x00000001)
|
||||
#define NVC86F_DMA_INCR_DATA 31:0
|
||||
/* dma non-incrementing method format */
|
||||
#define NVC86F_DMA_NONINCR_ADDRESS 11:0
|
||||
#define NVC86F_DMA_NONINCR_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_NONINCR_COUNT 28:16
|
||||
#define NVC86F_DMA_NONINCR_OPCODE 31:29
|
||||
#define NVC86F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
|
||||
#define NVC86F_DMA_NONINCR_DATA 31:0
|
||||
/* dma increment-once method format */
|
||||
#define NVC86F_DMA_ONEINCR_ADDRESS 11:0
|
||||
#define NVC86F_DMA_ONEINCR_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_ONEINCR_COUNT 28:16
|
||||
#define NVC86F_DMA_ONEINCR_OPCODE 31:29
|
||||
#define NVC86F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
|
||||
#define NVC86F_DMA_ONEINCR_DATA 31:0
|
||||
/* dma no-operation format */
|
||||
#define NVC86F_DMA_NOP (0x00000000)
|
||||
/* dma immediate-data format */
|
||||
#define NVC86F_DMA_IMMD_ADDRESS 11:0
|
||||
#define NVC86F_DMA_IMMD_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_IMMD_DATA 28:16
|
||||
#define NVC86F_DMA_IMMD_OPCODE 31:29
|
||||
#define NVC86F_DMA_IMMD_OPCODE_VALUE (0x00000004)
|
||||
/* dma set sub-device mask format */
|
||||
#define NVC86F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC86F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC86F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
|
||||
/* dma store sub-device mask format */
|
||||
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
|
||||
/* dma use sub-device mask format */
|
||||
#define NVC86F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC86F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
|
||||
/* dma end-segment format */
|
||||
#define NVC86F_DMA_ENDSEG_OPCODE 31:29
|
||||
#define NVC86F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
|
||||
/* dma legacy incrementing/non-incrementing formats */
|
||||
#define NVC86F_DMA_ADDRESS 12:2
|
||||
#define NVC86F_DMA_SUBCH 15:13
|
||||
#define NVC86F_DMA_OPCODE3 17:16
|
||||
#define NVC86F_DMA_OPCODE3_NONE (0x00000000)
|
||||
#define NVC86F_DMA_COUNT 28:18
|
||||
#define NVC86F_DMA_OPCODE 31:29
|
||||
#define NVC86F_DMA_OPCODE_METHOD (0x00000000)
|
||||
#define NVC86F_DMA_OPCODE_NONINC_METHOD (0x00000002)
|
||||
#define NVC86F_DMA_DATA 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* _clc86f_h_ */
|
||||
#endif // __gh100_clc86f_h__
|
||||
|
||||
@@ -1,160 +1,46 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 1993-2004 NVIDIA Corporation
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
#ifndef __gh100_clc8b5_h__
|
||||
#define __gh100_clc8b5_h__
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#ifndef _clc8b5_h_
|
||||
#define _clc8b5_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define HOPPER_DMA_COPY_A (0x0000C8B5)
|
||||
|
||||
typedef volatile struct _clc8b5_tag0 {
|
||||
NvV32 Reserved00[0x40];
|
||||
NvV32 Nop; // 0x00000100 - 0x00000103
|
||||
NvV32 Reserved01[0xF];
|
||||
NvV32 PmTrigger; // 0x00000140 - 0x00000143
|
||||
NvV32 Reserved02[0x36];
|
||||
NvV32 SetMonitoredFenceType; // 0x0000021C - 0x0000021F
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseUpper; // 0x00000220 - 0x00000223
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseLower; // 0x00000224 - 0x00000227
|
||||
NvV32 Reserved03[0x6];
|
||||
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
|
||||
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
|
||||
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
|
||||
NvV32 SetSemaphorePayloadUpper; // 0x0000024C - 0x0000024F
|
||||
NvV32 Reserved04[0x1];
|
||||
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
|
||||
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
|
||||
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
|
||||
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
|
||||
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
|
||||
NvV32 Reserved05[0x26];
|
||||
NvV32 LaunchDma; // 0x00000300 - 0x00000303
|
||||
NvV32 Reserved06[0x3F];
|
||||
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
|
||||
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
|
||||
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
|
||||
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
|
||||
NvV32 PitchIn; // 0x00000410 - 0x00000413
|
||||
NvV32 PitchOut; // 0x00000414 - 0x00000417
|
||||
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
|
||||
NvV32 LineCount; // 0x0000041C - 0x0000041F
|
||||
NvV32 Reserved07[0x38];
|
||||
NvV32 SetSecureCopyMode; // 0x00000500 - 0x00000503
|
||||
NvV32 SetDecryptIv0; // 0x00000504 - 0x00000507
|
||||
NvV32 SetDecryptIv1; // 0x00000508 - 0x0000050B
|
||||
NvV32 SetDecryptIv2; // 0x0000050C - 0x0000050F
|
||||
NvV32 Reserved_SetAESCounter; // 0x00000510 - 0x00000513
|
||||
NvV32 SetDecryptAuthTagCompareAddrUpper; // 0x00000514 - 0x00000517
|
||||
NvV32 SetDecryptAuthTagCompareAddrLower; // 0x00000518 - 0x0000051B
|
||||
NvV32 Reserved08[0x5];
|
||||
NvV32 SetEncryptAuthTagAddrUpper; // 0x00000530 - 0x00000533
|
||||
NvV32 SetEncryptAuthTagAddrLower; // 0x00000534 - 0x00000537
|
||||
NvV32 SetEncryptIvAddrUpper; // 0x00000538 - 0x0000053B
|
||||
NvV32 SetEncryptIvAddrLower; // 0x0000053C - 0x0000053F
|
||||
NvV32 Reserved09[0x6F];
|
||||
NvV32 SetMemoryScrubParameters; // 0x000006FC - 0x000006FF
|
||||
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
|
||||
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
|
||||
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
|
||||
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
|
||||
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
|
||||
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
|
||||
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
|
||||
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
|
||||
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
|
||||
NvV32 Reserved10[0x1];
|
||||
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
|
||||
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
|
||||
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
|
||||
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
|
||||
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
|
||||
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
|
||||
NvV32 Reserved11[0x1];
|
||||
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
|
||||
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
|
||||
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
|
||||
NvV32 DstOriginY; // 0x00000750 - 0x00000753
|
||||
NvV32 Reserved12[0x270];
|
||||
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
|
||||
NvV32 Reserved13[0x3BA];
|
||||
} hopper_dma_copy_aControlPio;
|
||||
|
||||
#define NVC8B5_NOP (0x00000100)
|
||||
#define NVC8B5_NOP_PARAMETER 31:0
|
||||
#define NVC8B5_PM_TRIGGER (0x00000140)
|
||||
#define NVC8B5_PM_TRIGGER_V 31:0
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE (0x0000021C)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE 0:0
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE (0x00000000)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE_EXT (0x00000001)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER (0x00000220)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER_UPPER 24:0
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER (0x00000224)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER_LOWER 31:0
|
||||
#define HOPPER_DMA_COPY_A (0x0000C8B5)
|
||||
#define NVC8B5_SET_SEMAPHORE_A (0x00000240)
|
||||
#define NVC8B5_SET_SEMAPHORE_A_UPPER 24:0
|
||||
#define NVC8B5_SET_SEMAPHORE_B (0x00000244)
|
||||
#define NVC8B5_SET_SEMAPHORE_B_LOWER 31:0
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_UPPER (0x0000024C)
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD 31:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_A (0x00000254)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_A_UPPER 24:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_B (0x00000258)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_B_LOWER 31:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C (0x0000025C)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE 2:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE (0x00000260)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET 1:0
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_FLA 9:9
|
||||
#define NVC8B5_SET_DST_PHYS_MODE (0x00000264)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET 1:0
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_FLA 9:9
|
||||
#define NVC8B5_LAUNCH_DMA (0x00000300)
|
||||
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
|
||||
@@ -167,80 +53,41 @@ typedef volatile struct _clc8b5_tag0 {
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_NO_TIMESTAMP (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_WITH_TIMESTAMP (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE (0x00000003)
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
|
||||
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
|
||||
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE 10:10
|
||||
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE 11:11
|
||||
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_TYPE 12:12
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_TYPE 13:13
|
||||
#define NVC8B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDA (0x00000008)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDB (0x00000009)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMIN (0x0000000B)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMAX (0x0000000C)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDC (0x0000000D)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDD (0x0000000E)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDE (0x0000000F)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE 21:20
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_PROT2PROT (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_DEFAULT (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_SECURE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_NONPROT2NONPROT (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_RESERVED (0x00000003)
|
||||
#define NVC8B5_LAUNCH_DMA_VPRMODE 22:22
|
||||
#define NVC8B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE 23:23
|
||||
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC 26:26
|
||||
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE 27:27
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_ONE_WORD (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_TWO_WORD (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
|
||||
#define NVC8B5_OFFSET_IN_UPPER (0x00000400)
|
||||
#define NVC8B5_OFFSET_IN_UPPER_UPPER 24:0
|
||||
#define NVC8B5_OFFSET_IN_LOWER (0x00000404)
|
||||
@@ -249,41 +96,11 @@ typedef volatile struct _clc8b5_tag0 {
|
||||
#define NVC8B5_OFFSET_OUT_UPPER_UPPER 24:0
|
||||
#define NVC8B5_OFFSET_OUT_LOWER (0x0000040C)
|
||||
#define NVC8B5_OFFSET_OUT_LOWER_VALUE 31:0
|
||||
#define NVC8B5_PITCH_IN (0x00000410)
|
||||
#define NVC8B5_PITCH_IN_VALUE 31:0
|
||||
#define NVC8B5_PITCH_OUT (0x00000414)
|
||||
#define NVC8B5_PITCH_OUT_VALUE 31:0
|
||||
#define NVC8B5_LINE_LENGTH_IN (0x00000418)
|
||||
#define NVC8B5_LINE_LENGTH_IN_VALUE 31:0
|
||||
#define NVC8B5_LINE_COUNT (0x0000041C)
|
||||
#define NVC8B5_LINE_COUNT_VALUE 31:0
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE (0x00000500)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_MODE 0:0
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_MODE_ENCRYPT (0x00000000)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_MODE_DECRYPT (0x00000001)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET 20:19
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_PEER_ID 23:21
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_FLA 24:24
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET 26:25
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_PEER_ID 29:27
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_FLA 30:30
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_END_OF_COPY 31:31
|
||||
#define NVC8B5_SET_DECRYPT_IV0 (0x00000504)
|
||||
#define NVC8B5_SET_DECRYPT_IV0_VALUE 31:0
|
||||
#define NVC8B5_SET_DECRYPT_IV1 (0x00000508)
|
||||
#define NVC8B5_SET_DECRYPT_IV1_VALUE 31:0
|
||||
#define NVC8B5_SET_DECRYPT_IV2 (0x0000050C)
|
||||
#define NVC8B5_SET_DECRYPT_IV2_VALUE 31:0
|
||||
#define NVC8B5_RESERVED_SET_AESCOUNTER (0x00000510)
|
||||
#define NVC8B5_RESERVED_SET_AESCOUNTER_VALUE 31:0
|
||||
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER (0x00000514)
|
||||
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER (0x00000518)
|
||||
@@ -299,132 +116,18 @@ typedef volatile struct _clc8b5_tag0 {
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS (0x000006FC)
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE 0:0
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_CONST_A (0x00000700)
|
||||
#define NVC8B5_SET_REMAP_CONST_A_V 31:0
|
||||
#define NVC8B5_SET_REMAP_CONST_B (0x00000704)
|
||||
#define NVC8B5_SET_REMAP_CONST_B_V 31:0
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS (0x00000708)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X 2:0
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y 6:4
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z 10:8
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W 14:12
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE (0x0000070C)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC8B5_SET_DST_WIDTH (0x00000710)
|
||||
#define NVC8B5_SET_DST_WIDTH_V 31:0
|
||||
#define NVC8B5_SET_DST_HEIGHT (0x00000714)
|
||||
#define NVC8B5_SET_DST_HEIGHT_V 31:0
|
||||
#define NVC8B5_SET_DST_DEPTH (0x00000718)
|
||||
#define NVC8B5_SET_DST_DEPTH_V 31:0
|
||||
#define NVC8B5_SET_DST_LAYER (0x0000071C)
|
||||
#define NVC8B5_SET_DST_LAYER_V 31:0
|
||||
#define NVC8B5_SET_DST_ORIGIN (0x00000720)
|
||||
#define NVC8B5_SET_DST_ORIGIN_X 15:0
|
||||
#define NVC8B5_SET_DST_ORIGIN_Y 31:16
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE (0x00000728)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC8B5_SET_SRC_WIDTH (0x0000072C)
|
||||
#define NVC8B5_SET_SRC_WIDTH_V 31:0
|
||||
#define NVC8B5_SET_SRC_HEIGHT (0x00000730)
|
||||
#define NVC8B5_SET_SRC_HEIGHT_V 31:0
|
||||
#define NVC8B5_SET_SRC_DEPTH (0x00000734)
|
||||
#define NVC8B5_SET_SRC_DEPTH_V 31:0
|
||||
#define NVC8B5_SET_SRC_LAYER (0x00000738)
|
||||
#define NVC8B5_SET_SRC_LAYER_V 31:0
|
||||
#define NVC8B5_SET_SRC_ORIGIN (0x0000073C)
|
||||
#define NVC8B5_SET_SRC_ORIGIN_X 15:0
|
||||
#define NVC8B5_SET_SRC_ORIGIN_Y 31:16
|
||||
#define NVC8B5_SRC_ORIGIN_X (0x00000744)
|
||||
#define NVC8B5_SRC_ORIGIN_X_VALUE 31:0
|
||||
#define NVC8B5_SRC_ORIGIN_Y (0x00000748)
|
||||
#define NVC8B5_SRC_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC8B5_DST_ORIGIN_X (0x0000074C)
|
||||
#define NVC8B5_DST_ORIGIN_X_VALUE 31:0
|
||||
#define NVC8B5_DST_ORIGIN_Y (0x00000750)
|
||||
#define NVC8B5_DST_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC8B5_PM_TRIGGER_END (0x00001114)
|
||||
#define NVC8B5_PM_TRIGGER_END_V 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
#endif // _clc8b5_h
|
||||
|
||||
#endif // __gh100_clc8b5_h__
|
||||
|
||||
@@ -1,84 +1,42 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2012-2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#ifndef _clc96f_h_
|
||||
#define _clc96f_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
/* class BLACKWELL_CHANNEL_GPFIFO */
|
||||
/*
|
||||
* Documentation for BLACKWELL_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
|
||||
* chapter "User Control Registers". It is documented as device NV_UDMA.
|
||||
* The GPFIFO format itself is also documented in dev_pbdma.ref,
|
||||
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
|
||||
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Note there is no .mfs file for this class.
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __gb100_clc96f_h__
|
||||
#define __gb100_clc96f_h__
|
||||
|
||||
#define BLACKWELL_CHANNEL_GPFIFO_A (0x0000C96F)
|
||||
|
||||
#define NVC96F_TYPEDEF BLACKWELL_CHANNELChannelGPFifoA
|
||||
|
||||
/* dma flow control data structure */
|
||||
typedef volatile struct Nvc96fControl_struct {
|
||||
NvU32 Ignored00[0x23]; /* 0000-008b*/
|
||||
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
|
||||
NvU32 Ignored01[0x5c];
|
||||
} Nvc96fControl, BlackwellAControlGPFifo;
|
||||
|
||||
/* fields and values */
|
||||
#define NVC96F_NUMBER_OF_SUBCHANNELS (8)
|
||||
#define NVC96F_SET_OBJECT (0x00000000)
|
||||
#define NVC96F_SET_OBJECT_NVCLASS 15:0
|
||||
#define NVC96F_SET_OBJECT_ENGINE 20:16
|
||||
#define NVC96F_SET_OBJECT_ENGINE_SW 0x0000001f
|
||||
#define NVC96F_NOP (0x00000008)
|
||||
#define NVC96F_NOP_HANDLE 31:0
|
||||
#define NVC96F_NON_STALL_INTERRUPT (0x00000020)
|
||||
#define NVC96F_NON_STALL_INTERRUPT_HANDLE 31:0
|
||||
#define NVC96F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
|
||||
#define NVC96F_FB_FLUSH_HANDLE 31:0
|
||||
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
|
||||
// specifying the page address for a targeted TLB invalidate and the uTLB for
|
||||
// a targeted REPLAY_CANCEL for UVM.
|
||||
// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
|
||||
// rearranged fields.
|
||||
#define NVC96F_MEM_OP_A (0x00000028)
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID 5:0 // only relevant for REPLAY_CANCEL_TARGETED
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE 5:0 // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID 10:6 // only relevant for REPLAY_CANCEL_TARGETED
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE 7:6 // only relevant for invalidates with NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE for invalidating link TLB only, or non-link TLB only or all TLBs
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_ALL_TLBS 0
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_LINK_TLBS 1
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_NON_LINK_TLBS 2
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_RSVRVD 3
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID 8:0 // only relevant for REPLAY_CANCEL_VA_GLOBAL
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR 11:11
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN 0x00000001
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS 0x00000000
|
||||
@@ -86,9 +44,6 @@ typedef volatile struct Nvc96fControl_struct {
|
||||
#define NVC96F_MEM_OP_B (0x0000002c)
|
||||
#define NVC96F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI 31:0
|
||||
#define NVC96F_MEM_OP_C (0x00000030)
|
||||
#define NVC96F_MEM_OP_C_MEMBAR_TYPE 2:0
|
||||
#define NVC96F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR 0x00000000
|
||||
#define NVC96F_MEM_OP_C_MEMBAR_TYPE_MEMBAR 0x00000001
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB 0:0
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE 0x00000000
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL 0x00000001 // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
|
||||
@@ -97,130 +52,38 @@ typedef volatile struct Nvc96fControl_struct {
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE 0x00000001
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY 4:2 // only relevant if GPC ENABLE
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE 0x00000000
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START 0x00000001
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL 0x00000002
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED 0x00000003
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL 0x00000004
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL 0x00000005
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE 6:5 // only relevant if GPC ENABLE
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE 0x00000000
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY 0x00000001
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE 0x00000002
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE 9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ 0
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE 1
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG 2
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD 3
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK 4
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL 5
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC 6
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL 7
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL 9:7 // Invalidate affects this level and all below
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL 0x00000000 // Invalidate tlb caches at all levels of the page table
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY 0x00000001
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0 0x00000002
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1 0x00000003
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2 0x00000004
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 0x00000005
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 0x00000006
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5 0x00000007
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE 11:10 // only relevant if PDB_ONE
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM 0x00000000
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT 0x00000002
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO 31:12 // only relevant if PDB_ONE
|
||||
#define NVC96F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG 19:0
|
||||
// MEM_OP_D MUST be preceded by MEM_OPs A-C.
|
||||
|
||||
#define NVC96F_MEM_OP_D (0x00000034)
|
||||
#define NVC96F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI 26:0 // only relevant if PDB_ONE
|
||||
#define NVC96F_MEM_OP_D_OPERATION 31:27
|
||||
#define NVC96F_MEM_OP_D_OPERATION_MEMBAR 0x00000005
|
||||
#define NVC96F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009
|
||||
#define NVC96F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED 0x0000000a
|
||||
#define NVC96F_MEM_OP_D_OPERATION_MMU_OPERATION 0x0000000b
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
|
||||
// CLEAN_LINES is an alias for Tegra/GPU IP usage
|
||||
#define NVC96F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_NCOH_INVALIDATE 0x00000011
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_COH_INVALIDATE 0x00000012
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015
|
||||
#define NVC96F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR 0x00000016
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE 1:0
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC 0x00000000
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC 0x00000001
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL 0x00000002
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED 0x00000003
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE 2:2
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC 0x00000000
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC 0x00000001
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK 6:3
|
||||
#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE 23:20
|
||||
#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE_RESERVED 0x00000000
|
||||
#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE_VIDMEM_ACCESS_BIT_DUMP 0x00000001
|
||||
|
||||
#define NVC96F_SEM_ADDR_LO (0x0000005c)
|
||||
#define NVC96F_SEM_ADDR_LO_OFFSET 31:2
|
||||
#define NVC96F_SEM_ADDR_HI (0x00000060)
|
||||
#define NVC96F_SEM_ADDR_HI_OFFSET 24:0
|
||||
#define NVC96F_SEM_PAYLOAD_LO (0x00000064)
|
||||
#define NVC96F_SEM_PAYLOAD_LO_PAYLOAD 31:0
|
||||
#define NVC96F_SEM_PAYLOAD_HI (0x00000068)
|
||||
#define NVC96F_SEM_PAYLOAD_HI_PAYLOAD 31:0
|
||||
#define NVC96F_SEM_EXECUTE (0x0000006c)
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION 2:0
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ 0x00000002
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ 0x00000003
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_AND 0x00000004
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_NOR 0x00000005
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_REDUCTION 0x00000006
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG 12:12
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK 18:18
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK_DIS 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK_EN 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_WFI 20:20
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_WFI_EN 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
|
||||
#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP 25:25
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION 30:27
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IMIN 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IMAX 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IXOR 0x00000002
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IAND 0x00000003
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IOR 0x00000004
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IADD 0x00000005
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_INC 0x00000006
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_DEC 0x00000007
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT 31:31
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED 0x00000001
|
||||
#define NVC96F_WFI (0x00000078)
|
||||
#define NVC96F_WFI_SCOPE 0:0
|
||||
#define NVC96F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
|
||||
#define NVC96F_WFI_SCOPE_CURRENT_VEID 0x00000000
|
||||
#define NVC96F_WFI_SCOPE_ALL 0x00000001
|
||||
#define NVC96F_YIELD (0x00000080)
|
||||
#define NVC96F_YIELD_OP 1:0
|
||||
#define NVC96F_YIELD_OP_NOP 0x00000000
|
||||
#define NVC96F_YIELD_OP_TSG 0x00000003
|
||||
#define NVC96F_CLEAR_FAULTED (0x00000084)
|
||||
// Note: RM provides the HANDLE as an opaque value; the internal detail fields
|
||||
// are intentionally not exposed to the driver through these defines.
|
||||
#define NVC96F_CLEAR_FAULTED_HANDLE 30:0
|
||||
#define NVC96F_CLEAR_FAULTED_TYPE 31:31
|
||||
#define NVC96F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
|
||||
#define NVC96F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVC96F_GP_ENTRY__SIZE 8
|
||||
@@ -245,85 +108,4 @@ typedef volatile struct Nvc96fControl_struct {
|
||||
#define NVC96F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
#define NVC96F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE 0x00000004
|
||||
|
||||
/* dma method formats */
|
||||
#define NVC96F_DMA_METHOD_ADDRESS_OLD 12:2
|
||||
#define NVC96F_DMA_METHOD_ADDRESS 11:0
|
||||
#define NVC96F_DMA_SUBDEVICE_MASK 15:4
|
||||
#define NVC96F_DMA_METHOD_SUBCHANNEL 15:13
|
||||
#define NVC96F_DMA_TERT_OP 17:16
|
||||
#define NVC96F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
|
||||
#define NVC96F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
|
||||
#define NVC96F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
|
||||
#define NVC96F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
|
||||
#define NVC96F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
|
||||
#define NVC96F_DMA_METHOD_COUNT_OLD 28:18
|
||||
#define NVC96F_DMA_METHOD_COUNT 28:16
|
||||
#define NVC96F_DMA_IMMD_DATA 28:16
|
||||
#define NVC96F_DMA_SEC_OP 31:29
|
||||
#define NVC96F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
|
||||
#define NVC96F_DMA_SEC_OP_INC_METHOD (0x00000001)
|
||||
#define NVC96F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
|
||||
#define NVC96F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
|
||||
#define NVC96F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
|
||||
#define NVC96F_DMA_SEC_OP_ONE_INC (0x00000005)
|
||||
#define NVC96F_DMA_SEC_OP_RESERVED6 (0x00000006)
|
||||
#define NVC96F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
|
||||
/* dma incrementing method format */
|
||||
#define NVC96F_DMA_INCR_ADDRESS 11:0
|
||||
#define NVC96F_DMA_INCR_SUBCHANNEL 15:13
|
||||
#define NVC96F_DMA_INCR_COUNT 28:16
|
||||
#define NVC96F_DMA_INCR_OPCODE 31:29
|
||||
#define NVC96F_DMA_INCR_OPCODE_VALUE (0x00000001)
|
||||
#define NVC96F_DMA_INCR_DATA 31:0
|
||||
/* dma non-incrementing method format */
|
||||
#define NVC96F_DMA_NONINCR_ADDRESS 11:0
|
||||
#define NVC96F_DMA_NONINCR_SUBCHANNEL 15:13
|
||||
#define NVC96F_DMA_NONINCR_COUNT 28:16
|
||||
#define NVC96F_DMA_NONINCR_OPCODE 31:29
|
||||
#define NVC96F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
|
||||
#define NVC96F_DMA_NONINCR_DATA 31:0
|
||||
/* dma increment-once method format */
|
||||
#define NVC96F_DMA_ONEINCR_ADDRESS 11:0
|
||||
#define NVC96F_DMA_ONEINCR_SUBCHANNEL 15:13
|
||||
#define NVC96F_DMA_ONEINCR_COUNT 28:16
|
||||
#define NVC96F_DMA_ONEINCR_OPCODE 31:29
|
||||
#define NVC96F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
|
||||
#define NVC96F_DMA_ONEINCR_DATA 31:0
|
||||
/* dma no-operation format */
|
||||
#define NVC96F_DMA_NOP (0x00000000)
|
||||
/* dma immediate-data format */
|
||||
#define NVC96F_DMA_IMMD_ADDRESS 11:0
|
||||
#define NVC96F_DMA_IMMD_SUBCHANNEL 15:13
|
||||
#define NVC96F_DMA_IMMD_DATA 28:16
|
||||
#define NVC96F_DMA_IMMD_OPCODE 31:29
|
||||
#define NVC96F_DMA_IMMD_OPCODE_VALUE (0x00000004)
|
||||
/* dma set sub-device mask format */
|
||||
#define NVC96F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC96F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC96F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
|
||||
/* dma store sub-device mask format */
|
||||
#define NVC96F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC96F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC96F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
|
||||
/* dma use sub-device mask format */
|
||||
#define NVC96F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC96F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
|
||||
/* dma end-segment format */
|
||||
#define NVC96F_DMA_ENDSEG_OPCODE 31:29
|
||||
#define NVC96F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
|
||||
/* dma legacy incrementing/non-incrementing formats */
|
||||
#define NVC96F_DMA_ADDRESS 12:2
|
||||
#define NVC96F_DMA_SUBCH 15:13
|
||||
#define NVC96F_DMA_OPCODE3 17:16
|
||||
#define NVC96F_DMA_OPCODE3_NONE (0x00000000)
|
||||
#define NVC96F_DMA_COUNT 28:18
|
||||
#define NVC96F_DMA_OPCODE 31:29
|
||||
#define NVC96F_DMA_OPCODE_METHOD (0x00000000)
|
||||
#define NVC96F_DMA_OPCODE_NONINC_METHOD (0x00000002)
|
||||
#define NVC96F_DMA_DATA 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* _clc96f_h_ */
|
||||
#endif // __gb100_clc96f_h__
|
||||
|
||||
@@ -1,460 +1,29 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 1993-2004 NVIDIA Corporation
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#ifndef _clc9b5_h_
|
||||
#define _clc9b5_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#ifndef __gb100_clc9b5_h__
|
||||
#define __gb100_clc9b5_h__
|
||||
|
||||
#define BLACKWELL_DMA_COPY_A (0x0000C9B5)
|
||||
|
||||
typedef volatile struct _clc9b5_tag0 {
|
||||
NvV32 Reserved00[0x40];
|
||||
NvV32 Nop; // 0x00000100 - 0x00000103
|
||||
NvV32 Reserved01[0xF];
|
||||
NvV32 PmTrigger; // 0x00000140 - 0x00000143
|
||||
NvV32 Reserved02[0x36];
|
||||
NvV32 SetMonitoredFenceType; // 0x0000021C - 0x0000021F
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseUpper; // 0x00000220 - 0x00000223
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseLower; // 0x00000224 - 0x00000227
|
||||
NvV32 Reserved03[0x6];
|
||||
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
|
||||
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
|
||||
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
|
||||
NvV32 SetSemaphorePayloadUpper; // 0x0000024C - 0x0000024F
|
||||
NvV32 Reserved04[0x1];
|
||||
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
|
||||
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
|
||||
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
|
||||
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
|
||||
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
|
||||
NvV32 Reserved05[0x26];
|
||||
NvV32 LaunchDma; // 0x00000300 - 0x00000303
|
||||
NvV32 Reserved06[0x3F];
|
||||
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
|
||||
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
|
||||
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
|
||||
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
|
||||
NvV32 PitchIn; // 0x00000410 - 0x00000413
|
||||
NvV32 PitchOut; // 0x00000414 - 0x00000417
|
||||
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
|
||||
NvV32 LineCount; // 0x0000041C - 0x0000041F
|
||||
NvV32 Reserved07[0x38];
|
||||
NvV32 SetSecureCopyMode; // 0x00000500 - 0x00000503
|
||||
NvV32 SetDecryptIv0; // 0x00000504 - 0x00000507
|
||||
NvV32 SetDecryptIv1; // 0x00000508 - 0x0000050B
|
||||
NvV32 SetDecryptIv2; // 0x0000050C - 0x0000050F
|
||||
NvV32 Reserved_SetAESCounter; // 0x00000510 - 0x00000513
|
||||
NvV32 SetDecryptAuthTagCompareAddrUpper; // 0x00000514 - 0x00000517
|
||||
NvV32 SetDecryptAuthTagCompareAddrLower; // 0x00000518 - 0x0000051B
|
||||
NvV32 Reserved08[0x5];
|
||||
NvV32 SetEncryptAuthTagAddrUpper; // 0x00000530 - 0x00000533
|
||||
NvV32 SetEncryptAuthTagAddrLower; // 0x00000534 - 0x00000537
|
||||
NvV32 SetEncryptIvAddrUpper; // 0x00000538 - 0x0000053B
|
||||
NvV32 SetEncryptIvAddrLower; // 0x0000053C - 0x0000053F
|
||||
NvV32 Reserved09[0x10];
|
||||
NvV32 SetCompressionParameters; // 0x00000580 - 0x00000583
|
||||
NvV32 SetDecompressOutLength; // 0x00000584 - 0x00000587
|
||||
NvV32 SetDecompressOutLengthAddrUpper; // 0x00000588 - 0x0000058B
|
||||
NvV32 SetDecompressOutLengthAddrLower; // 0x0000058C - 0x0000058F
|
||||
NvV32 SetDecompressChecksum; // 0x00000590 - 0x00000593
|
||||
NvV32 Reserved10[0x5A];
|
||||
NvV32 SetMemoryScrubParameters; // 0x000006FC - 0x000006FF
|
||||
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
|
||||
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
|
||||
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
|
||||
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
|
||||
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
|
||||
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
|
||||
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
|
||||
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
|
||||
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
|
||||
NvV32 Reserved11[0x1];
|
||||
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
|
||||
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
|
||||
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
|
||||
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
|
||||
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
|
||||
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
|
||||
NvV32 Reserved12[0x1];
|
||||
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
|
||||
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
|
||||
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
|
||||
NvV32 DstOriginY; // 0x00000750 - 0x00000753
|
||||
NvV32 Reserved13[0x270];
|
||||
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
|
||||
NvV32 Reserved14[0x3BA];
|
||||
} blackwell_dma_copy_aControlPio;
|
||||
|
||||
#define NVC9B5_NOP (0x00000100)
|
||||
#define NVC9B5_NOP_PARAMETER 31:0
|
||||
#define NVC9B5_PM_TRIGGER (0x00000140)
|
||||
#define NVC9B5_PM_TRIGGER_V 31:0
|
||||
#define NVC9B5_SET_MONITORED_FENCE_TYPE (0x0000021C)
|
||||
#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE 0:0
|
||||
#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE (0x00000000)
|
||||
#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE_EXT (0x00000001)
|
||||
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER (0x00000220)
|
||||
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER_UPPER 24:0
|
||||
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER (0x00000224)
|
||||
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER_LOWER 31:0
|
||||
#define NVC9B5_SET_SEMAPHORE_A (0x00000240)
|
||||
#define NVC9B5_SET_SEMAPHORE_A_UPPER 24:0
|
||||
#define NVC9B5_SET_SEMAPHORE_B (0x00000244)
|
||||
#define NVC9B5_SET_SEMAPHORE_B_LOWER 31:0
|
||||
#define NVC9B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
|
||||
#define NVC9B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
|
||||
#define NVC9B5_SET_SEMAPHORE_PAYLOAD_UPPER (0x0000024C)
|
||||
#define NVC9B5_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD 31:0
|
||||
#define NVC9B5_SET_RENDER_ENABLE_A (0x00000254)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_A_UPPER 24:0
|
||||
#define NVC9B5_SET_RENDER_ENABLE_B (0x00000258)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_B_LOWER 31:0
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C (0x0000025C)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE 2:0
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE (0x00000260)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET 1:0
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_FLA 9:9
|
||||
#define NVC9B5_SET_DST_PHYS_MODE (0x00000264)
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_TARGET 1:0
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_FLA 9:9
|
||||
#define NVC9B5_LAUNCH_DMA (0x00000300)
|
||||
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE 25:25
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_NO_TIMESTAMP (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_WITH_TIMESTAMP (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE (0x00000003)
|
||||
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
|
||||
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
|
||||
#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
|
||||
#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE 10:10
|
||||
#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE 11:11
|
||||
#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_TYPE 12:12
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_DST_TYPE 13:13
|
||||
#define NVC9B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDA (0x00000008)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDB (0x00000009)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMIN (0x0000000B)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMAX (0x0000000C)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDC (0x0000000D)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDD (0x0000000E)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDE (0x0000000F)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE 21:20
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_PROT2PROT (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_DEFAULT (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_SECURE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_NONPROT2NONPROT (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_RESERVED (0x00000003)
|
||||
#define NVC9B5_LAUNCH_DMA_VPRMODE 22:22
|
||||
#define NVC9B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE 23:23
|
||||
#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC9B5_LAUNCH_DMA_DISABLE_PLC 26:26
|
||||
#define NVC9B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_DISABLE_PLC_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE 27:27
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_ONE_WORD (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_TWO_WORD (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
|
||||
#define NVC9B5_OFFSET_IN_UPPER (0x00000400)
|
||||
#define NVC9B5_OFFSET_IN_UPPER_UPPER 24:0
|
||||
#define NVC9B5_OFFSET_IN_LOWER (0x00000404)
|
||||
#define NVC9B5_OFFSET_IN_LOWER_VALUE 31:0
|
||||
#define NVC9B5_OFFSET_OUT_UPPER (0x00000408)
|
||||
#define NVC9B5_OFFSET_OUT_UPPER_UPPER 24:0
|
||||
#define NVC9B5_OFFSET_OUT_LOWER (0x0000040C)
|
||||
#define NVC9B5_OFFSET_OUT_LOWER_VALUE 31:0
|
||||
#define NVC9B5_PITCH_IN (0x00000410)
|
||||
#define NVC9B5_PITCH_IN_VALUE 31:0
|
||||
#define NVC9B5_PITCH_OUT (0x00000414)
|
||||
#define NVC9B5_PITCH_OUT_VALUE 31:0
|
||||
#define NVC9B5_LINE_LENGTH_IN (0x00000418)
|
||||
#define NVC9B5_LINE_LENGTH_IN_VALUE 31:0
|
||||
#define NVC9B5_LINE_COUNT (0x0000041C)
|
||||
#define NVC9B5_LINE_COUNT_VALUE 31:0
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE (0x00000500)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_MODE 0:0
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_MODE_ENCRYPT (0x00000000)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_MODE_DECRYPT (0x00000001)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET 20:19
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_PEER_ID 23:21
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_FLA 24:24
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET 26:25
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_PEER_ID 29:27
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_FLA 30:30
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_END_OF_COPY 31:31
|
||||
#define NVC9B5_SET_DECRYPT_IV0 (0x00000504)
|
||||
#define NVC9B5_SET_DECRYPT_IV0_VALUE 31:0
|
||||
#define NVC9B5_SET_DECRYPT_IV1 (0x00000508)
|
||||
#define NVC9B5_SET_DECRYPT_IV1_VALUE 31:0
|
||||
#define NVC9B5_SET_DECRYPT_IV2 (0x0000050C)
|
||||
#define NVC9B5_SET_DECRYPT_IV2_VALUE 31:0
|
||||
#define NVC9B5_RESERVED_SET_AESCOUNTER (0x00000510)
|
||||
#define NVC9B5_RESERVED_SET_AESCOUNTER_VALUE 31:0
|
||||
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER (0x00000514)
|
||||
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER (0x00000518)
|
||||
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER (0x00000530)
|
||||
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER (0x00000534)
|
||||
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC9B5_SET_ENCRYPT_IV_ADDR_UPPER (0x00000538)
|
||||
#define NVC9B5_SET_ENCRYPT_IV_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC9B5_SET_ENCRYPT_IV_ADDR_LOWER (0x0000053C)
|
||||
#define NVC9B5_SET_ENCRYPT_IV_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS (0x00000580)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION 0:0
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION_DECOMPRESS (0x00000000)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION_COMPRESS (0x00000001)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO 3:1
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_SNAPPY (0x00000000)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_DATA_ONLY (0x00000001)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_BLOCK (0x00000002)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_BLOCK_CHECKSUM (0x00000003)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_DEFLATE (0x00000004)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_SNAPPY_WITH_LONG_FETCH (0x00000005)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM 29:28
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_NONE (0x00000000)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_ADLER32 (0x00000001)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_CRC32 (0x00000002)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_SNAPPY_CRC (0x00000003)
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH (0x00000584)
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_V 31:0
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_UPPER (0x00000588)
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_LOWER (0x0000058C)
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC9B5_SET_DECOMPRESS_CHECKSUM (0x00000590)
|
||||
#define NVC9B5_SET_DECOMPRESS_CHECKSUM_V 31:0
|
||||
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS (0x000006FC)
|
||||
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE 0:0
|
||||
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_CONST_A (0x00000700)
|
||||
#define NVC9B5_SET_REMAP_CONST_A_V 31:0
|
||||
#define NVC9B5_SET_REMAP_CONST_B (0x00000704)
|
||||
#define NVC9B5_SET_REMAP_CONST_B_V 31:0
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS (0x00000708)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X 2:0
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y 6:4
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z 10:8
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W 14:12
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE (0x0000070C)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC9B5_SET_DST_WIDTH (0x00000710)
|
||||
#define NVC9B5_SET_DST_WIDTH_V 31:0
|
||||
#define NVC9B5_SET_DST_HEIGHT (0x00000714)
|
||||
#define NVC9B5_SET_DST_HEIGHT_V 31:0
|
||||
#define NVC9B5_SET_DST_DEPTH (0x00000718)
|
||||
#define NVC9B5_SET_DST_DEPTH_V 31:0
|
||||
#define NVC9B5_SET_DST_LAYER (0x0000071C)
|
||||
#define NVC9B5_SET_DST_LAYER_V 31:0
|
||||
#define NVC9B5_SET_DST_ORIGIN (0x00000720)
|
||||
#define NVC9B5_SET_DST_ORIGIN_X 15:0
|
||||
#define NVC9B5_SET_DST_ORIGIN_Y 31:16
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE (0x00000728)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC9B5_SET_SRC_WIDTH (0x0000072C)
|
||||
#define NVC9B5_SET_SRC_WIDTH_V 31:0
|
||||
#define NVC9B5_SET_SRC_HEIGHT (0x00000730)
|
||||
#define NVC9B5_SET_SRC_HEIGHT_V 31:0
|
||||
#define NVC9B5_SET_SRC_DEPTH (0x00000734)
|
||||
#define NVC9B5_SET_SRC_DEPTH_V 31:0
|
||||
#define NVC9B5_SET_SRC_LAYER (0x00000738)
|
||||
#define NVC9B5_SET_SRC_LAYER_V 31:0
|
||||
#define NVC9B5_SET_SRC_ORIGIN (0x0000073C)
|
||||
#define NVC9B5_SET_SRC_ORIGIN_X 15:0
|
||||
#define NVC9B5_SET_SRC_ORIGIN_Y 31:16
|
||||
#define NVC9B5_SRC_ORIGIN_X (0x00000744)
|
||||
#define NVC9B5_SRC_ORIGIN_X_VALUE 31:0
|
||||
#define NVC9B5_SRC_ORIGIN_Y (0x00000748)
|
||||
#define NVC9B5_SRC_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC9B5_DST_ORIGIN_X (0x0000074C)
|
||||
#define NVC9B5_DST_ORIGIN_X_VALUE 31:0
|
||||
#define NVC9B5_DST_ORIGIN_Y (0x00000750)
|
||||
#define NVC9B5_DST_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC9B5_PM_TRIGGER_END (0x00001114)
|
||||
#define NVC9B5_PM_TRIGGER_END_V 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
#endif // _clc9b5_h
|
||||
|
||||
#endif // __gb100_clc9b5_h__
|
||||
|
||||
@@ -151,6 +151,7 @@ typedef volatile struct _clcba2_tag0 {
|
||||
#define NVCBA2_ERROR_SCRUBBER_INSUFFICIENT_PERMISSIONS (0x0000001b)
|
||||
#define NVCBA2_ERROR_SCRUBBER_MUTEX_ACQUIRE_FAILURE (0x0000001c)
|
||||
#define NVCBA2_ERROR_SCRUB_SIZE_MAX_EXCEEDED (0x0000001d)
|
||||
#define NVCBA2_ERROR_SIZE_ZERO (0x0000001e)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
|
||||
@@ -43,4 +43,7 @@
|
||||
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA100 (0x00000000)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA000 (0x00000001)
|
||||
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B (0x0000000B)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B (0x0000000B)
|
||||
#endif /* _ctrl2080mc_h_ */
|
||||
|
||||
@@ -21,6 +21,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rm_mem.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_channel.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_lock.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hal.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_fd_type.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_processors.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree.c
|
||||
@@ -95,7 +96,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_heuristics.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_thrashing.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_prefetch.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_ibm.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_faults.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
|
||||
@@ -127,3 +127,4 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_group_tree_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_thread_context_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_file.c
|
||||
|
||||
@@ -50,7 +50,6 @@ NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
|
||||
@@ -63,6 +62,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += find_next_bit_wrap
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_is_dma_domain
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += for_each_sgtable_dma_page
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += page_pgmap
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_invalidate_range
|
||||
@@ -81,3 +81,5 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += struct_page_has_zone_device_data
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present___iowrite64_lo_hi
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_make_device_exclusive
|
||||
|
||||
@@ -33,10 +33,12 @@
|
||||
#include "uvm_va_block.h"
|
||||
#include "uvm_tools.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_fd_type.h"
|
||||
#include "uvm_linux_ioctl.h"
|
||||
#include "uvm_hmm.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
#include "uvm_test_file.h"
|
||||
|
||||
#define NVIDIA_UVM_DEVICE_NAME "nvidia-uvm"
|
||||
|
||||
@@ -49,55 +51,9 @@ bool uvm_file_is_nvidia_uvm(struct file *filp)
|
||||
return (filp != NULL) && (filp->f_op == &uvm_fops);
|
||||
}
|
||||
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
|
||||
bool uvm_file_is_nvidia_uvm_va_space(struct file *filp)
|
||||
{
|
||||
unsigned long uptr;
|
||||
uvm_fd_type_t type;
|
||||
void *ptr;
|
||||
|
||||
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
|
||||
|
||||
uptr = atomic_long_read_acquire((atomic_long_t *) (&filp->private_data));
|
||||
type = (uvm_fd_type_t)(uptr & UVM_FD_TYPE_MASK);
|
||||
ptr = (void *)(uptr & ~UVM_FD_TYPE_MASK);
|
||||
BUILD_BUG_ON(UVM_FD_COUNT > UVM_FD_TYPE_MASK + 1);
|
||||
|
||||
switch (type) {
|
||||
case UVM_FD_UNINITIALIZED:
|
||||
case UVM_FD_INITIALIZING:
|
||||
UVM_ASSERT(!ptr);
|
||||
break;
|
||||
|
||||
case UVM_FD_VA_SPACE:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
case UVM_FD_MM:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(struct file) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
default:
|
||||
UVM_ASSERT(0);
|
||||
}
|
||||
|
||||
if (ptr_val)
|
||||
*ptr_val = ptr;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
|
||||
|
||||
if (uvm_fd_type(filp, &ptr) == type)
|
||||
return ptr;
|
||||
else
|
||||
return NULL;
|
||||
return uvm_file_is_nvidia_uvm(filp) && uvm_fd_type(filp, NULL) == UVM_FD_VA_SPACE;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct file *filp)
|
||||
@@ -105,7 +61,6 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_va_space_mm_t *va_space_mm;
|
||||
struct file *uvm_file;
|
||||
uvm_fd_type_t old_fd_type;
|
||||
struct mm_struct *mm;
|
||||
NV_STATUS status;
|
||||
|
||||
@@ -127,14 +82,9 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
|
||||
goto err;
|
||||
}
|
||||
|
||||
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type != UVM_FD_UNINITIALIZED) {
|
||||
status = NV_ERR_IN_USE;
|
||||
status = uvm_fd_type_init(filp);
|
||||
if (status != NV_OK)
|
||||
goto err;
|
||||
}
|
||||
|
||||
va_space_mm = &va_space->va_space_mm;
|
||||
uvm_spin_lock(&va_space_mm->lock);
|
||||
@@ -173,13 +123,13 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
|
||||
break;
|
||||
}
|
||||
uvm_spin_unlock(&va_space_mm->lock);
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)uvm_file | UVM_FD_MM);
|
||||
uvm_fd_type_set(filp, UVM_FD_MM, uvm_file);
|
||||
|
||||
return NV_OK;
|
||||
|
||||
err_release_unlock:
|
||||
uvm_spin_unlock(&va_space_mm->lock);
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
|
||||
uvm_fd_type_set(filp, UVM_FD_UNINITIALIZED, NULL);
|
||||
|
||||
err:
|
||||
if (uvm_file)
|
||||
@@ -249,44 +199,10 @@ static void uvm_release_deferred(void *data)
|
||||
uvm_up_read(&g_uvm_global.pm.lock);
|
||||
}
|
||||
|
||||
static void uvm_mm_release(struct file *filp, struct file *uvm_file)
|
||||
static void uvm_release_va_space(struct file *filp, uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(uvm_file);
|
||||
uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
|
||||
struct mm_struct *mm = va_space_mm->mm;
|
||||
|
||||
if (uvm_va_space_mm_enabled(va_space)) {
|
||||
uvm_va_space_mm_unregister(va_space);
|
||||
|
||||
if (uvm_va_space_mm_enabled(va_space))
|
||||
uvm_mmput(mm);
|
||||
|
||||
va_space_mm->mm = NULL;
|
||||
fput(uvm_file);
|
||||
}
|
||||
}
|
||||
|
||||
static int uvm_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
void *ptr;
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_fd_type_t fd_type;
|
||||
int ret;
|
||||
|
||||
fd_type = uvm_fd_type(filp, &ptr);
|
||||
UVM_ASSERT(fd_type != UVM_FD_INITIALIZING);
|
||||
if (fd_type == UVM_FD_UNINITIALIZED) {
|
||||
uvm_kvfree(filp->f_mapping);
|
||||
return 0;
|
||||
}
|
||||
else if (fd_type == UVM_FD_MM) {
|
||||
uvm_kvfree(filp->f_mapping);
|
||||
uvm_mm_release(filp, (struct file *)ptr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
UVM_ASSERT(fd_type == UVM_FD_VA_SPACE);
|
||||
va_space = (uvm_va_space_t *)ptr;
|
||||
filp->private_data = NULL;
|
||||
filp->f_mapping = NULL;
|
||||
|
||||
@@ -310,6 +226,52 @@ static int uvm_release(struct inode *inode, struct file *filp)
|
||||
ret = nv_kthread_q_schedule_q_item(&g_uvm_global.deferred_release_q, &va_space->deferred_release_q_item);
|
||||
UVM_ASSERT(ret != 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void uvm_release_mm(struct file *filp, struct file *uvm_file)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(uvm_file);
|
||||
uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
|
||||
struct mm_struct *mm = va_space_mm->mm;
|
||||
|
||||
uvm_kvfree(filp->f_mapping);
|
||||
|
||||
if (uvm_va_space_mm_enabled(va_space)) {
|
||||
uvm_va_space_mm_unregister(va_space);
|
||||
|
||||
if (uvm_va_space_mm_enabled(va_space))
|
||||
uvm_mmput(mm);
|
||||
|
||||
va_space_mm->mm = NULL;
|
||||
fput(uvm_file);
|
||||
}
|
||||
}
|
||||
|
||||
static int uvm_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
void *ptr;
|
||||
uvm_fd_type_t fd_type = uvm_fd_type(filp, &ptr);
|
||||
|
||||
switch (fd_type) {
|
||||
case UVM_FD_UNINITIALIZED:
|
||||
uvm_kvfree(filp->f_mapping);
|
||||
break;
|
||||
|
||||
case UVM_FD_VA_SPACE:
|
||||
uvm_release_va_space(filp, (uvm_va_space_t *)ptr);
|
||||
break;
|
||||
|
||||
case UVM_FD_MM:
|
||||
uvm_release_mm(filp, (struct file *)ptr);
|
||||
break;
|
||||
|
||||
case UVM_FD_TEST:
|
||||
uvm_test_file_release(filp, (uvm_test_file_t *)ptr);
|
||||
break;
|
||||
|
||||
default:
|
||||
UVM_ASSERT_MSG(0, "Unexpected fd type: %d\n", fd_type);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -829,6 +791,7 @@ static struct vm_operations_struct uvm_vm_ops_device_p2p =
|
||||
|
||||
static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
void *fd_type_ptr;
|
||||
uvm_va_space_t *va_space;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
int ret = 0;
|
||||
@@ -837,9 +800,17 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
if (status != NV_OK)
|
||||
return -nv_status_to_errno(status);
|
||||
|
||||
va_space = uvm_fd_va_space(filp);
|
||||
if (!va_space)
|
||||
return -EBADFD;
|
||||
switch (uvm_fd_type(filp, &fd_type_ptr)) {
|
||||
case UVM_FD_VA_SPACE:
|
||||
va_space = (uvm_va_space_t *)fd_type_ptr;
|
||||
break;
|
||||
|
||||
case UVM_FD_TEST:
|
||||
return uvm_test_file_mmap((uvm_test_file_t *)fd_type_ptr, vma);
|
||||
|
||||
default:
|
||||
return -EBADFD;
|
||||
}
|
||||
|
||||
// When the VA space is associated with an mm, all vmas under the VA space
|
||||
// must come from that mm.
|
||||
@@ -999,33 +970,40 @@ static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *
|
||||
// attempt to be made. This is safe because other threads will have only had
|
||||
// a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
|
||||
// case.
|
||||
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type == UVM_FD_UNINITIALIZED) {
|
||||
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
|
||||
if (status != NV_OK) {
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
|
||||
return status;
|
||||
}
|
||||
old_fd_type = uvm_fd_type_init_cas(filp);
|
||||
switch (old_fd_type) {
|
||||
case UVM_FD_UNINITIALIZED:
|
||||
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
|
||||
if (status != NV_OK) {
|
||||
uvm_fd_type_set(filp, UVM_FD_UNINITIALIZED, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)va_space | UVM_FD_VA_SPACE);
|
||||
}
|
||||
else if (old_fd_type == UVM_FD_VA_SPACE) {
|
||||
va_space = uvm_va_space_get(filp);
|
||||
uvm_fd_type_set(filp, UVM_FD_VA_SPACE, va_space);
|
||||
break;
|
||||
|
||||
if (params->flags != va_space->initialization_flags)
|
||||
case UVM_FD_VA_SPACE:
|
||||
va_space = uvm_va_space_get(filp);
|
||||
if (params->flags != va_space->initialization_flags)
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
else
|
||||
status = NV_OK;
|
||||
|
||||
break;
|
||||
|
||||
case UVM_FD_MM:
|
||||
case UVM_FD_TEST:
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
else
|
||||
status = NV_OK;
|
||||
}
|
||||
else if (old_fd_type == UVM_FD_MM) {
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(old_fd_type == UVM_FD_INITIALIZING);
|
||||
status = NV_ERR_BUSY_RETRY;
|
||||
break;
|
||||
|
||||
case UVM_FD_INITIALIZING:
|
||||
status = NV_ERR_BUSY_RETRY;
|
||||
break;
|
||||
|
||||
default:
|
||||
UVM_ASSERT(0);
|
||||
status = NV_ERR_INVALID_STATE; // Quiet compiler warnings
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
|
||||
@@ -1430,9 +1430,9 @@ NV_STATUS UvmAllocDeviceP2P(NvProcessorUuid gpuUuid,
|
||||
// UvmMigrate
|
||||
//
|
||||
// Migrates the backing of a given virtual address range to the specified
|
||||
// destination processor. If any page in the VA range is unpopulated, it is
|
||||
// populated at the destination processor. The migrated pages in the VA range
|
||||
// are also mapped on the destination processor.
|
||||
// destination processor's nearest memory. If any page in the VA range is
|
||||
// unpopulated, it is populated at the destination processor. The migrated pages
|
||||
// in the VA range are also mapped on the destination processor.
|
||||
//
|
||||
// Both base and length must be aligned to the smallest page size supported by
|
||||
// the CPU. The VA range must lie within the largest possible virtual address
|
||||
@@ -2207,9 +2207,9 @@ NV_STATUS UvmMapDynamicParallelismRegion(void *base,
|
||||
// allocated via a call to either UvmAlloc or UvmMemMap, or be supported
|
||||
// system-allocated pageable memory. If the input virtual range corresponds to
|
||||
// system-allocated pageable memory and UvmIsPageableMemoryAccessSupported
|
||||
// reports that pageable memory access is supported, the behavior described
|
||||
// below does not take effect, and read duplication will not be enabled for
|
||||
// the input range.
|
||||
// reports that pageable memory access is supported, or if a memoryless
|
||||
// processor is present, the behavior described below does not take effect, and
|
||||
// read duplication will not be enabled for the input range.
|
||||
//
|
||||
// Both base and length must be aligned to the smallest page size supported by
|
||||
// the CPU.
|
||||
@@ -2330,7 +2330,7 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// UvmSetPreferredLocation
|
||||
//
|
||||
// Sets the preferred location for the given virtual address range to be the
|
||||
// specified processor's memory.
|
||||
// specified processor's nearest memory.
|
||||
//
|
||||
// Both base and length must be aligned to the smallest page size supported by
|
||||
// the CPU. The VA range must lie within the largest possible virtual address
|
||||
|
||||
@@ -78,8 +78,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
|
||||
@@ -82,8 +82,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
|
||||
@@ -189,7 +189,7 @@ static bool uvm_api_range_invalid(NvU64 base, NvU64 length)
|
||||
}
|
||||
|
||||
// Some APIs can only enforce 4K alignment as it's the smallest GPU page size
|
||||
// even when the smallest host page is larger (e.g. 64K on ppc64le).
|
||||
// even when the smallest host page is larger.
|
||||
static bool uvm_api_range_invalid_4k(NvU64 base, NvU64 length)
|
||||
{
|
||||
return uvm_api_range_invalid_aligned(base, length, UVM_PAGE_SIZE_4K);
|
||||
|
||||
@@ -42,26 +42,11 @@ void uvm_ats_init(const UvmPlatformInfo *platform_info)
|
||||
uvm_va_space_mm_enabled_system();
|
||||
}
|
||||
|
||||
void uvm_ats_init_va_space(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_init_rwsem(&va_space->ats.lock, UVM_LOCK_ORDER_LEAF);
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_init_va_space(va_space);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
if (UVM_ATS_IBM_SUPPORTED()) {
|
||||
// uvm_ibm_add_gpu() needs to be called even if ATS is disabled since it
|
||||
// sets parent_gpu->npu. Not setting parent_gpu->npu will result in
|
||||
// incorrect NVLink addresses. See dma_addr_to_gpu_addr().
|
||||
|
||||
return uvm_ats_ibm_add_gpu(parent_gpu);
|
||||
}
|
||||
else if (UVM_ATS_SVA_SUPPORTED()) {
|
||||
if (g_uvm_global.ats.enabled)
|
||||
return uvm_ats_sva_add_gpu(parent_gpu);
|
||||
if (g_uvm_global.ats.enabled) {
|
||||
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
|
||||
return uvm_ats_sva_add_gpu(parent_gpu);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
@@ -69,38 +54,25 @@ NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
if (UVM_ATS_IBM_SUPPORTED()) {
|
||||
// uvm_ibm_remove_gpu() needs to be called even if ATS is disabled since
|
||||
// uvm_ibm_add_gpu() is called even in that case and
|
||||
// uvm_ibm_remove_gpu() needs to undo the work done by
|
||||
// uvm_ats_add_gpu() (gpu retained_count etc.).
|
||||
|
||||
uvm_ats_ibm_remove_gpu(parent_gpu);
|
||||
}
|
||||
else if (UVM_ATS_SVA_SUPPORTED()) {
|
||||
if (g_uvm_global.ats.enabled)
|
||||
uvm_ats_sva_remove_gpu(parent_gpu);
|
||||
if (g_uvm_global.ats.enabled) {
|
||||
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
|
||||
uvm_ats_sva_remove_gpu(parent_gpu);
|
||||
}
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
UVM_ASSERT(gpu_va_space);
|
||||
|
||||
if (!gpu_va_space->ats.enabled)
|
||||
return status;
|
||||
return NV_OK;
|
||||
|
||||
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
|
||||
|
||||
uvm_assert_lockable_order(UVM_LOCK_ORDER_MMAP_LOCK);
|
||||
uvm_assert_lockable_order(UVM_LOCK_ORDER_VA_SPACE);
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
status = uvm_ats_ibm_bind_gpu(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
status = uvm_ats_sva_bind_gpu(gpu_va_space);
|
||||
|
||||
return status;
|
||||
return uvm_ats_sva_bind_gpu(gpu_va_space);
|
||||
}
|
||||
|
||||
void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
@@ -110,10 +82,9 @@ void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
if (!gpu_va_space->ats.enabled)
|
||||
return;
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_unbind_gpu(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
uvm_ats_sva_unbind_gpu(gpu_va_space);
|
||||
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
|
||||
|
||||
uvm_ats_sva_unbind_gpu(gpu_va_space);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
@@ -127,6 +98,8 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
if (!gpu_va_space->ats.enabled)
|
||||
return status;
|
||||
|
||||
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
|
||||
|
||||
va_space = gpu_va_space->va_space;
|
||||
UVM_ASSERT(va_space);
|
||||
|
||||
@@ -138,10 +111,7 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
if (uvm_processor_mask_test(&va_space->ats.registered_gpu_va_spaces, gpu_id))
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
status = uvm_ats_ibm_register_gpu_va_space(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
status = uvm_ats_sva_register_gpu_va_space(gpu_va_space);
|
||||
status = uvm_ats_sva_register_gpu_va_space(gpu_va_space);
|
||||
|
||||
if (status == NV_OK)
|
||||
uvm_processor_mask_set(&va_space->ats.registered_gpu_va_spaces, gpu_id);
|
||||
@@ -159,25 +129,14 @@ void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
if (!gpu_va_space->ats.enabled)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
|
||||
|
||||
va_space = gpu_va_space->va_space;
|
||||
gpu_id = gpu_va_space->gpu->id;
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_unregister_gpu_va_space(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
uvm_ats_sva_unregister_gpu_va_space(gpu_va_space);
|
||||
uvm_ats_sva_unregister_gpu_va_space(gpu_va_space);
|
||||
|
||||
uvm_va_space_down_write(va_space);
|
||||
uvm_processor_mask_clear(&va_space->ats.registered_gpu_va_spaces, gpu_id);
|
||||
uvm_va_space_up_write(va_space);
|
||||
}
|
||||
|
||||
void uvm_ats_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
|
||||
{
|
||||
// We can only reach here from the mmu_notifier callbacks and these callbacks
|
||||
// wouldn't have been registered if ATS wasn't enabled.
|
||||
UVM_ASSERT(g_uvm_global.ats.enabled);
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_invalidate(va_space, start, end);
|
||||
}
|
||||
|
||||
@@ -26,12 +26,11 @@
|
||||
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "nv_uvm_types.h"
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_ats_sva.h"
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
|
||||
#define UVM_ATS_SUPPORTED() UVM_ATS_SVA_SUPPORTED()
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -43,12 +42,7 @@ typedef struct
|
||||
// being called in ats_compute_residency_mask().
|
||||
uvm_rw_semaphore_t lock;
|
||||
|
||||
union
|
||||
{
|
||||
uvm_ibm_va_space_t ibm;
|
||||
|
||||
uvm_sva_va_space_t sva;
|
||||
};
|
||||
uvm_sva_va_space_t sva;
|
||||
} uvm_ats_va_space_t;
|
||||
|
||||
typedef struct
|
||||
@@ -61,12 +55,7 @@ typedef struct
|
||||
|
||||
NvU32 pasid;
|
||||
|
||||
union
|
||||
{
|
||||
uvm_ibm_gpu_va_space_t ibm;
|
||||
|
||||
uvm_sva_gpu_va_space_t sva;
|
||||
};
|
||||
uvm_sva_gpu_va_space_t sva;
|
||||
} uvm_ats_gpu_va_space_t;
|
||||
|
||||
// Initializes driver-wide ATS state
|
||||
@@ -74,11 +63,6 @@ typedef struct
|
||||
// LOCKING: None
|
||||
void uvm_ats_init(const UvmPlatformInfo *platform_info);
|
||||
|
||||
// Initializes ATS specific GPU state
|
||||
//
|
||||
// LOCKING: None
|
||||
void uvm_ats_init_va_space(uvm_va_space_t *va_space);
|
||||
|
||||
// Enables ATS feature on the GPU.
|
||||
//
|
||||
// LOCKING: g_uvm_global.global lock mutex must be held.
|
||||
@@ -115,8 +99,6 @@ void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space);
|
||||
//
|
||||
// LOCKING: The VA space lock must be held in write mode.
|
||||
// mm has to be retained prior to calling this function.
|
||||
// current->mm->mmap_lock must be held in write mode iff
|
||||
// UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1.
|
||||
NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// Disables ATS access for the gpu_va_space. Prior to calling this function,
|
||||
@@ -124,19 +106,8 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
// accesses in this GPU VA space, and that no ATS fault handling for this
|
||||
// GPU will be attempted.
|
||||
//
|
||||
// LOCKING: This function may block on mmap_lock and will acquire the VA space
|
||||
// lock, so neither lock must be held.
|
||||
// LOCKING: This function will acquire the VA space lock, so it must not be
|
||||
// held.
|
||||
void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// Synchronously invalidate ATS translations cached by GPU TLBs. The
|
||||
// invalidate applies to all GPUs with active GPU VA spaces in va_space, and
|
||||
// covers all pages touching any part of the given range. end is inclusive.
|
||||
//
|
||||
// GMMU translations in the given range are not guaranteed to be
|
||||
// invalidated.
|
||||
//
|
||||
// LOCKING: No locks are required, but this function may be called with
|
||||
// interrupts disabled.
|
||||
void uvm_ats_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end);
|
||||
|
||||
#endif // __UVM_ATS_H__
|
||||
|
||||
@@ -58,37 +58,6 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
|
||||
bool is_fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
|
||||
bool is_prefetch_faults = (is_fault_service_type && (access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH));
|
||||
uvm_populate_permissions_t populate_permissions = is_fault_service_type ?
|
||||
(write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY) :
|
||||
UVM_POPULATE_PERMISSIONS_INHERIT;
|
||||
|
||||
|
||||
// Request uvm_migrate_pageable() to touch the corresponding page after
|
||||
// population.
|
||||
// Under virtualization ATS provides two translations:
|
||||
// 1) guest virtual -> guest physical
|
||||
// 2) guest physical -> host physical
|
||||
//
|
||||
// The overall ATS translation will fault if either of those translations is
|
||||
// invalid. The pin_user_pages() call within uvm_migrate_pageable() call
|
||||
// below handles translation #1, but not #2. We don't know if we're running
|
||||
// as a guest, but in case we are we can force that translation to be valid
|
||||
// by touching the guest physical address from the CPU. If the translation
|
||||
// is not valid then the access will cause a hypervisor fault. Note that
|
||||
// dma_map_page() can't establish mappings used by GPU ATS SVA translations.
|
||||
// GPU accesses to host physical addresses obtained as a result of the
|
||||
// address translation request uses the CPU address space instead of the
|
||||
// IOMMU address space since the translated host physical address isn't
|
||||
// necessarily an IOMMU address. The only way to establish guest physical to
|
||||
// host physical mapping in the CPU address space is to touch the page from
|
||||
// the CPU.
|
||||
//
|
||||
// We assume that the hypervisor mappings are all VM_PFNMAP, VM_SHARED, and
|
||||
// VM_WRITE, meaning that the mappings are all granted write access on any
|
||||
// fault and that the kernel will never revoke them.
|
||||
// drivers/vfio/pci/vfio_pci_nvlink2.c enforces this. Thus we can assume
|
||||
// that a read fault is always sufficient to also enable write access on the
|
||||
// guest translation.
|
||||
|
||||
uvm_migrate_args_t uvm_migrate_args =
|
||||
{
|
||||
@@ -98,8 +67,8 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
.dst_node_id = ats_context->residency_node,
|
||||
.start = start,
|
||||
.length = length,
|
||||
.populate_permissions = populate_permissions,
|
||||
.touch = is_fault_service_type,
|
||||
.populate_permissions = UVM_POPULATE_PERMISSIONS_INHERIT,
|
||||
.populate_flags = UVM_POPULATE_PAGEABLE_FLAG_SKIP_PROT_CHECK,
|
||||
.skip_mapped = is_fault_service_type,
|
||||
.populate_on_cpu_alloc_failures = is_fault_service_type,
|
||||
.populate_on_migrate_vma_failures = is_fault_service_type,
|
||||
@@ -115,6 +84,13 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
.fail_on_unresolved_sto_errors = !is_fault_service_type || is_prefetch_faults,
|
||||
};
|
||||
|
||||
if (is_fault_service_type) {
|
||||
uvm_migrate_args.populate_permissions = (write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY);
|
||||
|
||||
// If we're faulting, let the GPU access special vmas
|
||||
uvm_migrate_args.populate_flags |= UVM_POPULATE_PAGEABLE_FLAG_ALLOW_SPECIAL;
|
||||
}
|
||||
|
||||
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
|
||||
|
||||
// We are trying to use migrate_vma API in the kernel (if it exists) to
|
||||
@@ -533,8 +509,20 @@ static NV_STATUS uvm_ats_service_faults_region(uvm_gpu_va_space_t *gpu_va_space,
|
||||
access_type,
|
||||
UVM_ATS_SERVICE_TYPE_FAULTS,
|
||||
ats_context);
|
||||
if (status != NV_OK)
|
||||
if (status != NV_OK) {
|
||||
// This condition can occur if we unexpectedly fault on a vma that
|
||||
// doesn't support faulting (or at least doesn't support
|
||||
// pin_user_pages). This may be an incorrect mapping setup from the
|
||||
// vma's owning driver, a hardware bug, or just that the owning driver
|
||||
// didn't expect a device fault. Either way, we don't want to consider
|
||||
// this a global error so don't propagate it, but also don't indicate
|
||||
// that the faults were serviced. That way the caller knows to cancel
|
||||
// them precisely.
|
||||
if (status == NV_ERR_INVALID_ADDRESS)
|
||||
return NV_OK;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, region);
|
||||
|
||||
@@ -689,12 +677,14 @@ bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_v
|
||||
if (next->node.start <= gmmu_region_base + UVM_GMMU_ATS_GRANULARITY - 1)
|
||||
return true;
|
||||
|
||||
prev = uvm_va_range_container(uvm_range_tree_prev(&va_space->va_range_tree, &next->node));
|
||||
prev = uvm_va_range_gmmu_mappable_prev(next);
|
||||
}
|
||||
else {
|
||||
// No VA range exists after address, so check the last VA range in the
|
||||
// tree.
|
||||
prev = uvm_va_range_container(uvm_range_tree_last(&va_space->va_range_tree));
|
||||
while (prev && !uvm_va_range_is_gmmu_mappable(prev))
|
||||
prev = uvm_va_range_gmmu_mappable_prev(prev);
|
||||
}
|
||||
|
||||
return prev && (prev->node.end >= gmmu_region_base);
|
||||
@@ -767,6 +757,20 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
|
||||
&ats_context->access_counters.accessed_mask,
|
||||
&ats_context->prefetch_state.residency_mask);
|
||||
|
||||
// Pretend that pages that are already resident at the destination GPU were
|
||||
// migrated now. This makes sure that the access counter is cleared even if
|
||||
// the accessed pages, were already resident on the target.
|
||||
// TODO: Bug 5296998: [uvm][ats] Not clearing stale access counter
|
||||
// notifications can lead to missed migrations
|
||||
// The same problem of stale notification exists for migration to other
|
||||
// locations than local vidmem. However, stale notifications to data
|
||||
// migrated to another remote location are identical to those triggered
|
||||
// by accessing memory that cannot or should not be migrated.
|
||||
if (uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->id)) {
|
||||
uvm_page_mask_copy(&ats_context->access_counters.migrated_mask,
|
||||
&ats_context->prefetch_state.residency_mask);
|
||||
}
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, &ats_context->access_counters.accessed_mask, region) {
|
||||
NV_STATUS status;
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
@@ -779,7 +783,7 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
|
||||
|
||||
// clear access counters if pages were migrated or migration needs to
|
||||
// Clear access counters if pages were migrated or migration needs to
|
||||
// be retried
|
||||
if (status == NV_OK || status == NV_ERR_BUSY_RETRY)
|
||||
uvm_page_mask_region_fill(migrated_mask, subregion);
|
||||
|
||||
@@ -1,715 +0,0 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2019 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_api.h"
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_va_space_mm.h"
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "uvm_common.h"
|
||||
|
||||
#include <linux/pci.h>
|
||||
|
||||
#if UVM_IBM_NPU_SUPPORTED()
|
||||
|
||||
#include <linux/of.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <asm/pci-bridge.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/mmu_context.h>
|
||||
|
||||
#define NPU_ATSD_REG_MAP_SIZE 32
|
||||
|
||||
// There are three 8-byte registers in each ATSD mapping:
|
||||
#define NPU_ATSD_REG_LAUNCH 0
|
||||
#define NPU_ATSD_REG_AVA 1
|
||||
#define NPU_ATSD_REG_STAT 2
|
||||
|
||||
// Fields within the NPU_ATSD_REG_LAUNCH register:
|
||||
|
||||
// "PRS" (process-scoped) bit. 1 means to limit invalidates to the specified
|
||||
// PASID.
|
||||
#define NPU_ATSD_REG_LAUNCH_PASID_ENABLE 13
|
||||
|
||||
// "PID" field. This specifies the PASID target of the invalidate.
|
||||
#define NPU_ATSD_REG_LAUNCH_PASID_VAL 38
|
||||
|
||||
// "IS" bit. 0 means the specified virtual address range will be invalidated. 1
|
||||
// means all entries will be invalidated.
|
||||
#define NPU_ATSD_REG_LAUNCH_INVAL_ALL 12
|
||||
|
||||
// "AP" field. This encodes the size of a range-based invalidate.
|
||||
#define NPU_ATSD_REG_LAUNCH_INVAL_SIZE 17
|
||||
|
||||
// "No flush" bit. 0 will trigger a flush (membar) from the GPU following the
|
||||
// invalidate, 1 will not.
|
||||
#define NPU_ATSD_REG_LAUNCH_FLUSH_DISABLE 39
|
||||
|
||||
// Helper to iterate over the active NPUs in the given VA space (all NPUs with
|
||||
// GPUs that have GPU VA spaces registered in this VA space).
|
||||
#define for_each_npu_index_in_va_space(npu_index, va_space) \
|
||||
for (({uvm_assert_rwlock_locked(&(va_space)->ats.ibm.rwlock); \
|
||||
(npu_index) = find_first_bit((va_space)->ats.ibm.npu_active_mask, NV_MAX_NPUS);}); \
|
||||
(npu_index) < NV_MAX_NPUS; \
|
||||
(npu_index) = find_next_bit((va_space)->ats.ibm.npu_active_mask, NV_MAX_NPUS, (npu_index) + 1))
|
||||
|
||||
// An invalidate requires operating on one set of registers in each NPU. This
|
||||
// struct tracks which register set (id) is in use per NPU for a given
|
||||
// operation.
|
||||
typedef struct
|
||||
{
|
||||
NvU8 ids[NV_MAX_NPUS];
|
||||
} uvm_atsd_regs_t;
|
||||
|
||||
// Get the index of the input npu pointer within UVM's global npus array
|
||||
static size_t uvm_ibm_npu_index(uvm_ibm_npu_t *npu)
|
||||
{
|
||||
size_t npu_index = npu - &g_uvm_global.npus[0];
|
||||
UVM_ASSERT(npu_index < ARRAY_SIZE(g_uvm_global.npus));
|
||||
return npu_index;
|
||||
}
|
||||
|
||||
// Find an existing NPU matching pci_domain, or return an empty NPU slot if none
|
||||
// is found. Returns NULL if no slots are available.
|
||||
static uvm_ibm_npu_t *uvm_ibm_npu_find(int pci_domain)
|
||||
{
|
||||
size_t i;
|
||||
uvm_ibm_npu_t *npu, *first_free = NULL;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(g_uvm_global.npus); i++) {
|
||||
npu = &g_uvm_global.npus[i];
|
||||
if (npu->num_retained_gpus == 0) {
|
||||
if (!first_free)
|
||||
first_free = npu;
|
||||
}
|
||||
else if (npu->pci_domain == pci_domain) {
|
||||
return npu;
|
||||
}
|
||||
}
|
||||
|
||||
return first_free;
|
||||
}
|
||||
|
||||
static void uvm_ibm_npu_destroy(uvm_ibm_npu_t *npu)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
UVM_ASSERT(npu->num_retained_gpus == 0);
|
||||
UVM_ASSERT(bitmap_empty(npu->atsd_regs.locks, UVM_MAX_ATSD_REGS));
|
||||
|
||||
for (i = 0; i < npu->atsd_regs.count; i++) {
|
||||
UVM_ASSERT(npu->atsd_regs.io_addrs[i]);
|
||||
iounmap(npu->atsd_regs.io_addrs[i]);
|
||||
}
|
||||
|
||||
memset(npu, 0, sizeof(*npu));
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_ibm_npu_init(uvm_ibm_npu_t *npu, struct pci_dev *npu_dev)
|
||||
{
|
||||
struct pci_controller *hose;
|
||||
size_t i, reg_count, reg_size = sizeof(npu->atsd_regs.io_addrs[0]);
|
||||
int ret;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
UVM_ASSERT(npu->num_retained_gpus == 0);
|
||||
UVM_ASSERT(bitmap_empty(npu->atsd_regs.locks, UVM_MAX_ATSD_REGS));
|
||||
|
||||
npu->pci_domain = pci_domain_nr(npu_dev->bus);
|
||||
|
||||
if (!UVM_ATS_IBM_SUPPORTED_IN_DRIVER())
|
||||
return NV_OK;
|
||||
|
||||
hose = pci_bus_to_host(npu_dev->bus);
|
||||
|
||||
ret = of_property_count_elems_of_size(hose->dn, "ibm,mmio-atsd", reg_size);
|
||||
if (ret < 0) {
|
||||
UVM_ERR_PRINT("Failed to query NPU %d ATSD register count: %d\n", npu->pci_domain, ret);
|
||||
return errno_to_nv_status(ret);
|
||||
}
|
||||
|
||||
// For ATS to be enabled globally, we must have NPU ATSD registers
|
||||
reg_count = ret;
|
||||
if (reg_count == 0 || reg_count > UVM_MAX_ATSD_REGS) {
|
||||
UVM_ERR_PRINT("NPU %d has invalid ATSD register count: %zu\n", npu->pci_domain, reg_count);
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
// Map the ATSD registers
|
||||
for (i = 0; i < reg_count; i++) {
|
||||
u64 phys_addr;
|
||||
__be64 __iomem *io_addr;
|
||||
ret = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", i, &phys_addr);
|
||||
UVM_ASSERT(ret == 0);
|
||||
|
||||
io_addr = ioremap(phys_addr, NPU_ATSD_REG_MAP_SIZE);
|
||||
if (!io_addr) {
|
||||
uvm_ibm_npu_destroy(npu);
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
npu->atsd_regs.io_addrs[npu->atsd_regs.count++] = io_addr;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
struct pci_dev *npu_dev = pnv_pci_get_npu_dev(parent_gpu->pci_dev, 0);
|
||||
uvm_ibm_npu_t *npu;
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
if (!npu_dev)
|
||||
return NV_OK;
|
||||
|
||||
npu = uvm_ibm_npu_find(pci_domain_nr(npu_dev->bus));
|
||||
if (!npu) {
|
||||
// If this happens then we can't support the system configuation until
|
||||
// NV_MAX_NPUS is updated. Return the same error as when the number of
|
||||
// GPUs exceeds UVM_MAX_GPUS.
|
||||
UVM_ERR_PRINT("No more NPU slots available, update NV_MAX_NPUS\n");
|
||||
return NV_ERR_INSUFFICIENT_RESOURCES;
|
||||
}
|
||||
|
||||
if (npu->num_retained_gpus == 0) {
|
||||
status = uvm_ibm_npu_init(npu, npu_dev);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
// This npu field could be read concurrently by a thread in the ATSD
|
||||
// invalidate path. We don't need to provide ordering with those threads
|
||||
// because those invalidates won't apply to the GPU being added until a GPU
|
||||
// VA space on this GPU is registered.
|
||||
npu->atsd_regs.num_membars = max(npu->atsd_regs.num_membars, parent_gpu->num_hshub_tlb_invalidate_membars);
|
||||
|
||||
parent_gpu->npu = npu;
|
||||
++npu->num_retained_gpus;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_ibm_npu_t *npu = parent_gpu->npu;
|
||||
uvm_parent_gpu_t *other_parent_gpu;
|
||||
NvU32 num_membars_new = 0;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
if (!npu)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(npu->num_retained_gpus > 0);
|
||||
if (--npu->num_retained_gpus == 0) {
|
||||
uvm_ibm_npu_destroy(npu);
|
||||
}
|
||||
else {
|
||||
// Re-calculate the membar count
|
||||
for_each_parent_gpu(other_parent_gpu) {
|
||||
// The current GPU being removed should've already been removed from
|
||||
// the global list.
|
||||
UVM_ASSERT(other_parent_gpu != parent_gpu);
|
||||
if (other_parent_gpu->npu == npu)
|
||||
num_membars_new = max(num_membars_new, other_parent_gpu->num_hshub_tlb_invalidate_membars);
|
||||
}
|
||||
|
||||
UVM_ASSERT(num_membars_new > 0);
|
||||
npu->atsd_regs.num_membars = num_membars_new;
|
||||
}
|
||||
}
|
||||
|
||||
#if UVM_ATS_IBM_SUPPORTED()
|
||||
|
||||
void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_ibm_va_space_t *ibm_va_space;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
ibm_va_space = &va_space->ats.ibm;
|
||||
|
||||
uvm_rwlock_irqsave_init(&ibm_va_space->rwlock, UVM_LOCK_ORDER_LEAF);
|
||||
}
|
||||
|
||||
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
|
||||
static void npu_release_dummy(struct npu_context *npu_context, void *va_mm)
|
||||
{
|
||||
// See the comment on the call to pnv_npu2_init_context()
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_ats_ibm_register_gpu_va_space_kernel(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
|
||||
struct npu_context *npu_context;
|
||||
|
||||
// pnv_npu2_init_context() registers current->mm with
|
||||
// mmu_notifier_register(). We need that to match the mm we passed to our
|
||||
// own mmu_notifier_register() for this VA space.
|
||||
if (current->mm != va_space->va_space_mm.mm)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
uvm_assert_mmap_lock_locked_write(current->mm);
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
|
||||
// pnv_npu2_init_context() doesn't handle being called multiple times for
|
||||
// the same GPU under the same mm, which could happen if multiple VA spaces
|
||||
// are created in this process. To handle that we pass the VA space pointer
|
||||
// as the callback parameter: the callback values are shared by all devices
|
||||
// under this mm, so pnv_npu2_init_context() enforces that the values match
|
||||
// the ones already registered to the mm.
|
||||
//
|
||||
// Otherwise we don't use the callback, since we have our own callback
|
||||
// registered under the va_space_mm that will be called at the same point
|
||||
// (mmu_notifier release).
|
||||
npu_context = pnv_npu2_init_context(gpu_va_space->gpu->parent->pci_dev,
|
||||
(MSR_DR | MSR_PR | MSR_HV),
|
||||
npu_release_dummy,
|
||||
va_space);
|
||||
if (IS_ERR(npu_context)) {
|
||||
int err = PTR_ERR(npu_context);
|
||||
|
||||
// We'll get -EINVAL if the callback value (va_space) differs from the
|
||||
// one already registered to the npu_context associated with this mm.
|
||||
// That can only happen when multiple VA spaces attempt registration
|
||||
// within the same process, which is disallowed and should return
|
||||
// NV_ERR_NOT_SUPPORTED.
|
||||
if (err == -EINVAL)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
return errno_to_nv_status(err);
|
||||
}
|
||||
|
||||
ibm_gpu_va_space->npu_context = npu_context;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_unregister_gpu_va_space_kernel(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
uvm_gpu_va_space_state_t state;
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_ibm_va_space_t *ibm_va_space;
|
||||
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
|
||||
|
||||
if (!ibm_gpu_va_space->npu_context)
|
||||
return;
|
||||
|
||||
// va_space is guaranteed to not be NULL if ibm_gpu_va_space->npu_context is
|
||||
// not NULL.
|
||||
UVM_ASSERT(va_space);
|
||||
|
||||
state = uvm_gpu_va_space_state(gpu_va_space);
|
||||
UVM_ASSERT(state == UVM_GPU_VA_SPACE_STATE_INIT || state == UVM_GPU_VA_SPACE_STATE_DEAD);
|
||||
|
||||
ibm_va_space = &va_space->ats.ibm;
|
||||
|
||||
// pnv_npu2_destroy_context() may in turn call mmu_notifier_unregister().
|
||||
// If uvm_va_space_mm_shutdown() is concurrently executing in another
|
||||
// thread, mmu_notifier_unregister() will wait for
|
||||
// uvm_va_space_mm_shutdown() to finish. uvm_va_space_mm_shutdown() takes
|
||||
// mmap_lock and the VA space lock, so we can't be holding those locks on
|
||||
// this path.
|
||||
uvm_assert_unlocked_order(UVM_LOCK_ORDER_MMAP_LOCK);
|
||||
uvm_assert_unlocked_order(UVM_LOCK_ORDER_VA_SPACE);
|
||||
|
||||
pnv_npu2_destroy_context(ibm_gpu_va_space->npu_context, gpu_va_space->gpu->parent->pci_dev);
|
||||
ibm_gpu_va_space->npu_context = NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void uvm_ats_ibm_register_gpu_va_space_driver(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
size_t npu_index = uvm_ibm_npu_index(gpu->parent->npu);
|
||||
uvm_ibm_va_space_t *ibm_va_space;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
ibm_va_space = &va_space->ats.ibm;
|
||||
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
|
||||
uvm_write_lock_irqsave(&ibm_va_space->rwlock);
|
||||
|
||||
// If this is the first GPU VA space to use this NPU in the VA space, mark
|
||||
// the NPU as active so invalidates are issued to it.
|
||||
if (ibm_va_space->npu_ref_counts[npu_index] == 0) {
|
||||
// If this is the first active NPU in the entire VA space, we have to
|
||||
// tell the kernel to send TLB invalidations to the IOMMU. See kernel
|
||||
// commit 03b8abedf4f4965e7e9e0d4f92877c42c07ce19f for background.
|
||||
//
|
||||
// This is safe to do without holding mm_users high or mmap_lock.
|
||||
if (bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS))
|
||||
mm_context_add_copro(va_space->va_space_mm.mm);
|
||||
|
||||
UVM_ASSERT(!test_bit(npu_index, ibm_va_space->npu_active_mask));
|
||||
__set_bit(npu_index, ibm_va_space->npu_active_mask);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(test_bit(npu_index, ibm_va_space->npu_active_mask));
|
||||
}
|
||||
|
||||
++ibm_va_space->npu_ref_counts[npu_index];
|
||||
|
||||
// As soon as this lock is dropped, invalidates on this VA space's mm may
|
||||
// begin issuing ATSDs to this NPU.
|
||||
uvm_write_unlock_irqrestore(&ibm_va_space->rwlock);
|
||||
|
||||
ibm_gpu_va_space->did_ibm_driver_init = true;
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_unregister_gpu_va_space_driver(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
size_t npu_index = uvm_ibm_npu_index(gpu->parent->npu);
|
||||
bool do_remove = false;
|
||||
uvm_ibm_va_space_t *ibm_va_space;
|
||||
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
|
||||
|
||||
if (!ibm_gpu_va_space->did_ibm_driver_init)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
ibm_va_space = &va_space->ats.ibm;
|
||||
|
||||
// Note that we aren't holding the VA space lock here, so another thread
|
||||
// could be in uvm_ats_ibm_register_gpu_va_space() for this same GPU right
|
||||
// now. The write lock and ref counts below will handle that case.
|
||||
|
||||
// Once we return from this function with a bit cleared in the
|
||||
// npu_active_mask, we have to guarantee that this VA space no longer
|
||||
// accesses that NPU's ATSD registers. This is needed in case GPU unregister
|
||||
// needs to unmap those registers. We use the reader/writer lock to
|
||||
// guarantee this, which means that invalidations must not access the ATSD
|
||||
// registers outside of the lock.
|
||||
//
|
||||
// Future work: if we could synchronize_srcu() on the mmu_notifier SRCU we
|
||||
// might do that here instead to flush out all invalidates. That would allow
|
||||
// us to avoid taking a read lock in the invalidate path, though we'd have
|
||||
// to be careful when clearing the mask bit relative to the synchronize, and
|
||||
// we'd have to be careful in cases where this thread doesn't hold a
|
||||
// reference to mm_users.
|
||||
uvm_write_lock_irqsave(&ibm_va_space->rwlock);
|
||||
|
||||
UVM_ASSERT(ibm_va_space->npu_ref_counts[npu_index] > 0);
|
||||
UVM_ASSERT(test_bit(npu_index, ibm_va_space->npu_active_mask));
|
||||
|
||||
--ibm_va_space->npu_ref_counts[npu_index];
|
||||
if (ibm_va_space->npu_ref_counts[npu_index] == 0) {
|
||||
__clear_bit(npu_index, ibm_va_space->npu_active_mask);
|
||||
if (bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS))
|
||||
do_remove = true;
|
||||
}
|
||||
|
||||
uvm_write_unlock_irqrestore(&ibm_va_space->rwlock);
|
||||
|
||||
if (do_remove) {
|
||||
// mm_context_remove_copro() must be called outside of the spinlock
|
||||
// because it may issue invalidates across CPUs in this mm. The
|
||||
// coprocessor count is atomically refcounted by that function, so it's
|
||||
// safe to call here even if another thread jumps in with a register and
|
||||
// calls mm_context_add_copro() between this thread's unlock and this
|
||||
// call.
|
||||
UVM_ASSERT(va_space->va_space_mm.mm);
|
||||
mm_context_remove_copro(va_space->va_space_mm.mm);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
|
||||
|
||||
static mm_context_id_t va_space_pasid(uvm_va_space_t *va_space)
|
||||
{
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
UVM_ASSERT(mm);
|
||||
return mm->context.id;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
UVM_ASSERT(gpu_va_space->ats.enabled);
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_INIT);
|
||||
UVM_ASSERT(va_space->va_space_mm.mm);
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
|
||||
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
|
||||
status = uvm_ats_ibm_register_gpu_va_space_kernel(gpu_va_space);
|
||||
#else
|
||||
uvm_ats_ibm_register_gpu_va_space_driver(gpu_va_space);
|
||||
#endif
|
||||
|
||||
gpu_va_space->ats.pasid = (NvU32) va_space_pasid(gpu_va_space->va_space);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
|
||||
uvm_ats_ibm_unregister_gpu_va_space_kernel(gpu_va_space);
|
||||
#else
|
||||
uvm_ats_ibm_unregister_gpu_va_space_driver(gpu_va_space);
|
||||
#endif
|
||||
|
||||
gpu_va_space->ats.pasid = -1U;
|
||||
}
|
||||
|
||||
#if UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
|
||||
|
||||
// Find any available ATSD register set in this NPU and return that index. This
|
||||
// will busy wait until a register set is free.
|
||||
static NvU8 atsd_reg_acquire(uvm_ibm_npu_t *npu)
|
||||
{
|
||||
uvm_spin_loop_t spin;
|
||||
size_t i;
|
||||
bool first = true;
|
||||
|
||||
while (1) {
|
||||
// Using for_each_clear_bit is racy, since the bits could change at any
|
||||
// point. That's ok since we'll either just retry or use a real atomic
|
||||
// to lock the bit. Checking for clear bits first avoids spamming
|
||||
// atomics in the contended case.
|
||||
for_each_clear_bit(i, npu->atsd_regs.locks, npu->atsd_regs.count) {
|
||||
if (!test_and_set_bit_lock(i, npu->atsd_regs.locks))
|
||||
return (NvU8)i;
|
||||
}
|
||||
|
||||
// Back off and try again, avoiding the overhead of initializing the
|
||||
// tracking timers unless we need them.
|
||||
if (first) {
|
||||
uvm_spin_loop_init(&spin);
|
||||
first = false;
|
||||
}
|
||||
else {
|
||||
UVM_SPIN_LOOP(&spin);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void atsd_reg_release(uvm_ibm_npu_t *npu, NvU8 reg)
|
||||
{
|
||||
UVM_ASSERT(reg < npu->atsd_regs.count);
|
||||
UVM_ASSERT(test_bit(reg, npu->atsd_regs.locks));
|
||||
clear_bit_unlock(reg, npu->atsd_regs.locks);
|
||||
}
|
||||
|
||||
static __be64 atsd_reg_read(uvm_ibm_npu_t *npu, NvU8 reg, size_t offset)
|
||||
{
|
||||
__be64 __iomem *io_addr = npu->atsd_regs.io_addrs[reg] + offset;
|
||||
UVM_ASSERT(reg < npu->atsd_regs.count);
|
||||
return __raw_readq(io_addr);
|
||||
}
|
||||
|
||||
static void atsd_reg_write(uvm_ibm_npu_t *npu, NvU8 reg, size_t offset, NvU64 val)
|
||||
{
|
||||
__be64 __iomem *io_addr = npu->atsd_regs.io_addrs[reg] + offset;
|
||||
UVM_ASSERT(reg < npu->atsd_regs.count);
|
||||
__raw_writeq_be(val, io_addr);
|
||||
}
|
||||
|
||||
// Acquire a set of registers in each NPU which is active in va_space
|
||||
static void atsd_regs_acquire(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
|
||||
{
|
||||
size_t i;
|
||||
for_each_npu_index_in_va_space(i, va_space)
|
||||
regs->ids[i] = atsd_reg_acquire(&g_uvm_global.npus[i]);
|
||||
}
|
||||
|
||||
static void atsd_regs_release(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
|
||||
{
|
||||
size_t i;
|
||||
for_each_npu_index_in_va_space(i, va_space)
|
||||
atsd_reg_release(&g_uvm_global.npus[i], regs->ids[i]);
|
||||
}
|
||||
|
||||
// Write the provided value to each NPU active in va_space at the provided
|
||||
// register offset.
|
||||
static void atsd_regs_write(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, size_t offset, NvU64 val)
|
||||
{
|
||||
size_t i;
|
||||
for_each_npu_index_in_va_space(i, va_space)
|
||||
atsd_reg_write(&g_uvm_global.npus[i], regs->ids[i], offset, val);
|
||||
}
|
||||
|
||||
// Wait for all prior operations issued to active NPUs in va_space on the given
|
||||
// registers to finish.
|
||||
static void atsd_regs_wait(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
|
||||
{
|
||||
uvm_spin_loop_t spin;
|
||||
size_t i;
|
||||
for_each_npu_index_in_va_space(i, va_space) {
|
||||
UVM_SPIN_WHILE(atsd_reg_read(&g_uvm_global.npus[i], regs->ids[i], NPU_ATSD_REG_STAT), &spin)
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
// Encode an invalidate targeting the given pasid and the given size for the
|
||||
// NPU_ATSD_REG_LAUNCH register. The target address is encoded separately.
|
||||
//
|
||||
// psize must be one of the MMU_PAGE_* values defined in powerpc's asm/mmu.h. A
|
||||
// psize of MMU_PAGE_COUNT means to invalidate the entire address space.
|
||||
static NvU64 atsd_get_launch_val(mm_context_id_t pasid, int psize)
|
||||
{
|
||||
NvU64 val = 0;
|
||||
|
||||
val |= PPC_BIT(NPU_ATSD_REG_LAUNCH_PASID_ENABLE);
|
||||
val |= pasid << PPC_BITLSHIFT(NPU_ATSD_REG_LAUNCH_PASID_VAL);
|
||||
|
||||
if (psize == MMU_PAGE_COUNT) {
|
||||
val |= PPC_BIT(NPU_ATSD_REG_LAUNCH_INVAL_ALL);
|
||||
}
|
||||
else {
|
||||
// The NPU registers do not support arbitrary sizes
|
||||
UVM_ASSERT(psize == MMU_PAGE_64K || psize == MMU_PAGE_2M || psize == MMU_PAGE_1G);
|
||||
val |= (NvU64)mmu_get_ap(psize) << PPC_BITLSHIFT(NPU_ATSD_REG_LAUNCH_INVAL_SIZE);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
// Return the encoded size to use for an ATSD targeting the given range, in one
|
||||
// of the MMU_PAGE_* values defined in powerpc's asm/mmu.h. A return value of
|
||||
// MMU_PAGE_COUNT means the entire address space must be invalidated.
|
||||
//
|
||||
// start is an in/out parameter. On return start will be set to the aligned
|
||||
// starting address to use for the ATSD. end is inclusive.
|
||||
static int atsd_calc_size(NvU64 *start, NvU64 end)
|
||||
{
|
||||
// ATSDs have high latency, so we prefer to over-invalidate rather than
|
||||
// issue multiple precise invalidates. Supported sizes are only 64K, 2M, and
|
||||
// 1G.
|
||||
|
||||
*start = UVM_ALIGN_DOWN(*start, SZ_64K);
|
||||
end = UVM_ALIGN_DOWN(end, SZ_64K);
|
||||
if (*start == end)
|
||||
return MMU_PAGE_64K;
|
||||
|
||||
*start = UVM_ALIGN_DOWN(*start, SZ_2M);
|
||||
end = UVM_ALIGN_DOWN(end, SZ_2M);
|
||||
if (*start == end)
|
||||
return MMU_PAGE_2M;
|
||||
|
||||
*start = UVM_ALIGN_DOWN(*start, SZ_1G);
|
||||
end = UVM_ALIGN_DOWN(end, SZ_1G);
|
||||
if (*start == end)
|
||||
return MMU_PAGE_1G;
|
||||
|
||||
return MMU_PAGE_COUNT;
|
||||
}
|
||||
|
||||
// Issue an ATSD to all NPUs and wait for completion
|
||||
static void atsd_launch_wait(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, NvU64 val)
|
||||
{
|
||||
atsd_regs_write(va_space, regs, NPU_ATSD_REG_LAUNCH, val);
|
||||
atsd_regs_wait(va_space, regs);
|
||||
}
|
||||
|
||||
// Issue and wait for the required membars following an invalidate
|
||||
static void atsd_issue_membars(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
|
||||
{
|
||||
size_t i;
|
||||
NvU32 num_membars = 0;
|
||||
|
||||
// These membars are issued using ATSDs which target a reserved PASID of 0.
|
||||
// That PASID is valid on the GPU in order for the membar to be valid, but
|
||||
// 0 will never be used by the kernel for an actual address space so the
|
||||
// ATSD won't actually invalidate any entries.
|
||||
NvU64 val = atsd_get_launch_val(0, MMU_PAGE_COUNT);
|
||||
|
||||
for_each_npu_index_in_va_space(i, va_space) {
|
||||
uvm_ibm_npu_t *npu = &g_uvm_global.npus[i];
|
||||
num_membars = max(num_membars, npu->atsd_regs.num_membars);
|
||||
}
|
||||
|
||||
for (i = 0; i < num_membars; i++)
|
||||
atsd_launch_wait(va_space, regs, val);
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_invalidate_all(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
|
||||
{
|
||||
NvU64 val = atsd_get_launch_val(va_space_pasid(va_space), MMU_PAGE_COUNT);
|
||||
atsd_launch_wait(va_space, regs, val);
|
||||
atsd_issue_membars(va_space, regs);
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_invalidate_range(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, NvU64 start, int psize)
|
||||
{
|
||||
NvU64 val = atsd_get_launch_val(va_space_pasid(va_space), psize);
|
||||
|
||||
// Barriers are expensive, so write all address registers first then do a
|
||||
// single barrier for all of them.
|
||||
atsd_regs_write(va_space, regs, NPU_ATSD_REG_AVA, start);
|
||||
eieio();
|
||||
atsd_launch_wait(va_space, regs, val);
|
||||
atsd_issue_membars(va_space, regs);
|
||||
}
|
||||
|
||||
#endif // UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
|
||||
|
||||
void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
|
||||
{
|
||||
#if UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
|
||||
unsigned long irq_flags;
|
||||
uvm_atsd_regs_t regs;
|
||||
NvU64 atsd_start = start;
|
||||
int psize = atsd_calc_size(&atsd_start, end);
|
||||
uvm_ibm_va_space_t *ibm_va_space = &va_space->ats.ibm;
|
||||
|
||||
BUILD_BUG_ON(order_base_2(UVM_MAX_ATSD_REGS) > 8*sizeof(regs.ids[0]));
|
||||
|
||||
// We must hold this lock in at least read mode when accessing NPU
|
||||
// registers. See the comment in uvm_ats_ibm_unregister_gpu_va_space_driver.
|
||||
uvm_read_lock_irqsave(&ibm_va_space->rwlock, irq_flags);
|
||||
|
||||
if (!bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS)) {
|
||||
atsd_regs_acquire(va_space, ®s);
|
||||
|
||||
if (psize == MMU_PAGE_COUNT)
|
||||
uvm_ats_ibm_invalidate_all(va_space, ®s);
|
||||
else
|
||||
uvm_ats_ibm_invalidate_range(va_space, ®s, atsd_start, psize);
|
||||
|
||||
atsd_regs_release(va_space, ®s);
|
||||
}
|
||||
|
||||
uvm_read_unlock_irqrestore(&ibm_va_space->rwlock, irq_flags);
|
||||
#else
|
||||
UVM_ASSERT_MSG(0, "This function should not be called on this kernel version\n");
|
||||
#endif // UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
|
||||
}
|
||||
|
||||
#endif // UVM_ATS_IBM_SUPPORTED
|
||||
#endif // UVM_IBM_NPU_SUPPORTED
|
||||
@@ -1,266 +0,0 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2019 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef __UVM_ATS_IBM_H__
|
||||
#define __UVM_ATS_IBM_H__
|
||||
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_hal_types.h"
|
||||
|
||||
#if defined(NVCPU_PPC64LE) && defined(NV_PNV_PCI_GET_NPU_DEV_PRESENT)
|
||||
#include <asm/mmu.h>
|
||||
#if defined(NV_MAX_NPUS)
|
||||
#define UVM_IBM_NPU_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_IBM_NPU_SUPPORTED() 0
|
||||
#endif
|
||||
#else
|
||||
#define UVM_IBM_NPU_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
#if defined(NV_ASM_OPAL_API_H_PRESENT)
|
||||
// For OPAL_NPU_INIT_CONTEXT
|
||||
#include <asm/opal-api.h>
|
||||
#endif
|
||||
|
||||
// Timeline of kernel changes:
|
||||
//
|
||||
// 0) Before 1ab66d1fbadad86b1f4a9c7857e193af0ee0022c
|
||||
// - No NPU-ATS code existed, nor did the OPAL_NPU_INIT_CONTEXT firmware
|
||||
// call.
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Not defined
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Not defined
|
||||
// - OPAL_NPU_INIT_CONTEXT Not defined
|
||||
// - ATS support type None
|
||||
//
|
||||
// 1) NPU ATS code added: 1ab66d1fbadad86b1f4a9c7857e193af0ee0022c, v4.12
|
||||
// (2017-04-03)
|
||||
// - This commit added initial support for NPU ATS, including the necessary
|
||||
// OPAL firmware calls. This support was developmental and required
|
||||
// several bug fixes before it could be used in production.
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Defined
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Not defined
|
||||
// - OPAL_NPU_INIT_CONTEXT Defined
|
||||
// - ATS support type None
|
||||
//
|
||||
// 2) NPU ATS code fixed: a1409adac748f0db655e096521bbe6904aadeb98, v4.17
|
||||
// (2018-04-11)
|
||||
// - This commit changed the function signature for pnv_npu2_init_context's
|
||||
// callback parameter. Since all required bug fixes went in prior to this
|
||||
// change, we can use the callback signature as a flag to indicate
|
||||
// whether the PPC arch layer in the kernel supports ATS in production.
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Defined
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Defined
|
||||
// - OPAL_NPU_INIT_CONTEXT Defined
|
||||
// - ATS support type Kernel
|
||||
//
|
||||
// 3) NPU ATS code removed: 7eb3cf761927b2687164e182efa675e6c09cfe44, v5.3
|
||||
// (2019-06-25)
|
||||
// - This commit removed NPU-ATS support from the PPC arch layer, so the
|
||||
// driver needs to handle things instead. pnv_npu2_init_context is no
|
||||
// longer present, so we use OPAL_NPU_INIT_CONTEXT to differentiate
|
||||
// between this state and scenario #0.
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Not defined
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Not defined
|
||||
// - OPAL_NPU_INIT_CONTEXT Defined
|
||||
// - ATS support type Driver
|
||||
//
|
||||
#if defined(NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID)
|
||||
#define UVM_ATS_IBM_SUPPORTED_IN_KERNEL() 1
|
||||
#define UVM_ATS_IBM_SUPPORTED_IN_DRIVER() 0
|
||||
#elif !defined(NV_PNV_NPU2_INIT_CONTEXT_PRESENT) && defined(OPAL_NPU_INIT_CONTEXT) && UVM_CAN_USE_MMU_NOTIFIERS()
|
||||
#define UVM_ATS_IBM_SUPPORTED_IN_KERNEL() 0
|
||||
#define UVM_ATS_IBM_SUPPORTED_IN_DRIVER() 1
|
||||
#else
|
||||
#define UVM_ATS_IBM_SUPPORTED_IN_KERNEL() 0
|
||||
#define UVM_ATS_IBM_SUPPORTED_IN_DRIVER() 0
|
||||
#endif
|
||||
|
||||
#define UVM_ATS_IBM_SUPPORTED() (UVM_ATS_IBM_SUPPORTED_IN_KERNEL() || UVM_ATS_IBM_SUPPORTED_IN_DRIVER())
|
||||
|
||||
// Maximum number of parallel ATSD register sets per NPU
|
||||
#define UVM_MAX_ATSD_REGS 16
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#if UVM_IBM_NPU_SUPPORTED()
|
||||
// These are the active NPUs in this VA space, that is, all NPUs with
|
||||
// GPUs that have GPU VA spaces registered in this VA space.
|
||||
//
|
||||
// If a bit is clear in npu_active_mask then the corresponding entry of
|
||||
// npu_ref_counts is 0. If a bit is set then the corresponding entry of
|
||||
// npu_ref_counts is greater than 0.
|
||||
NvU32 npu_ref_counts[NV_MAX_NPUS];
|
||||
DECLARE_BITMAP(npu_active_mask, NV_MAX_NPUS);
|
||||
#endif
|
||||
|
||||
// Lock protecting npu_ref_counts and npu_active_mask. Invalidations
|
||||
// take this lock for read. GPU VA space register and unregister take
|
||||
// this lock for write. Since all invalidations take the lock for read
|
||||
// for the duration of the invalidate, taking the lock for write also
|
||||
// flushes all invalidates.
|
||||
//
|
||||
// This is a spinlock because the invalidation code paths may be called
|
||||
// with interrupts disabled, so those paths can't take the VA space
|
||||
// lock. We could use a normal exclusive spinlock instead, but a reader/
|
||||
// writer lock is preferred to allow concurrent invalidates in the same
|
||||
// VA space.
|
||||
uvm_rwlock_irqsave_t rwlock;
|
||||
} uvm_ibm_va_space_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
|
||||
struct npu_context *npu_context;
|
||||
#endif
|
||||
|
||||
// Used on the teardown path to know what to clean up. npu_context acts
|
||||
// as the equivalent flag for kernel-provided support.
|
||||
bool did_ibm_driver_init;
|
||||
} uvm_ibm_gpu_va_space_t;
|
||||
|
||||
struct uvm_ibm_npu_struct
|
||||
{
|
||||
// Number of retained GPUs under this NPU. The other fields in this struct
|
||||
// are only valid if this is non-zero.
|
||||
unsigned int num_retained_gpus;
|
||||
|
||||
// PCI domain containing this NPU. This acts as a unique system-wide ID for
|
||||
// this UVM NPU.
|
||||
int pci_domain;
|
||||
|
||||
// The ATS-related fields are only valid when ATS support is enabled and
|
||||
// UVM_ATS_IBM_SUPPORTED_IN_DRIVER() is 1.
|
||||
struct
|
||||
{
|
||||
// Mapped addresses of the ATSD trigger registers. There may be more
|
||||
// than one set of identical registers per NPU to enable concurrent
|
||||
// invalidates.
|
||||
//
|
||||
// These will not be accessed unless there is a GPU VA space registered
|
||||
// on a GPU under this NPU. They are protected by bit locks in the locks
|
||||
// field.
|
||||
__be64 __iomem *io_addrs[UVM_MAX_ATSD_REGS];
|
||||
|
||||
// Actual number of registers in the io_addrs array
|
||||
size_t count;
|
||||
|
||||
// Bitmask for allocation and locking of the registers. Bit index n
|
||||
// corresponds to io_addrs[n]. A set bit means that index is in use
|
||||
// (locked).
|
||||
DECLARE_BITMAP(locks, UVM_MAX_ATSD_REGS);
|
||||
|
||||
// Max value of any uvm_parent_gpu_t::num_hshub_tlb_invalidate_membars
|
||||
// for all retained GPUs under this NPU.
|
||||
NvU32 num_membars;
|
||||
} atsd_regs;
|
||||
};
|
||||
|
||||
#if UVM_IBM_NPU_SUPPORTED()
|
||||
NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
#else
|
||||
static NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
|
||||
}
|
||||
#endif // UVM_IBM_NPU_SUPPORTED
|
||||
|
||||
#if UVM_ATS_IBM_SUPPORTED()
|
||||
// Initializes IBM specific GPU state.
|
||||
//
|
||||
// LOCKING: None
|
||||
void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space);
|
||||
|
||||
// Enables ATS access for the gpu_va_space on the mm_struct associated with
|
||||
// the VA space (va_space_mm).
|
||||
//
|
||||
// If UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1, NV_ERR_NOT_SUPPORTED is
|
||||
// returned if current->mm does not match va_space_mm.mm or if a GPU VA
|
||||
// space within another VA space has already called this function on the
|
||||
// same mm.
|
||||
//
|
||||
// If UVM_ATS_IBM_SUPPORTED_IN_DRIVER() is 1 there are no such restrictions.
|
||||
//
|
||||
// LOCKING: The VA space lock must be held in write mode.
|
||||
// current->mm->mmap_lock must be held in write mode iff
|
||||
// UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1.
|
||||
NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// Disables ATS access for the gpu_va_space. Prior to calling this function,
|
||||
// the caller must guarantee that the GPU will no longer make any ATS
|
||||
// accesses in this GPU VA space, and that no ATS fault handling for this
|
||||
// GPU will be attempted.
|
||||
//
|
||||
// LOCKING: This function may block on mmap_lock and the VA space lock, so
|
||||
// neither must be held.
|
||||
void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// Synchronously invalidate ATS translations cached by GPU TLBs. The
|
||||
// invalidate applies to all GPUs with active GPU VA spaces in va_space, and
|
||||
// covers all pages touching any part of the given range. end is inclusive.
|
||||
//
|
||||
// GMMU translations in the given range are not guaranteed to be
|
||||
// invalidated.
|
||||
//
|
||||
// LOCKING: No locks are required, but this function may be called with
|
||||
// interrupts disabled.
|
||||
void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end);
|
||||
#else
|
||||
static void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space)
|
||||
{
|
||||
|
||||
}
|
||||
static NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
|
||||
{
|
||||
|
||||
}
|
||||
#endif // UVM_ATS_IBM_SUPPORTED
|
||||
|
||||
static NV_STATUS uvm_ats_ibm_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
#endif // __UVM_ATS_IBM_H__
|
||||
@@ -139,7 +139,11 @@ static NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg)
|
||||
|
||||
static void smmu_vcmdq_write64(void __iomem *smmu_cmdqv_base, int reg, NvU64 val)
|
||||
{
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT___iowrite64_lo_hi
|
||||
__iowrite64_lo_hi(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
|
||||
#else
|
||||
iowrite64(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Fix for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU
|
||||
@@ -304,12 +308,13 @@ void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr,
|
||||
|
||||
NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
#if NV_IS_EXPORT_SYMBOL_GPL_iommu_dev_enable_feature
|
||||
int ret;
|
||||
|
||||
ret = iommu_dev_enable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
if (ret)
|
||||
return errno_to_nv_status(ret);
|
||||
|
||||
#endif
|
||||
if (UVM_ATS_SMMU_WAR_REQUIRED())
|
||||
return uvm_ats_smmu_war_init(parent_gpu);
|
||||
else
|
||||
@@ -321,7 +326,9 @@ void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
if (UVM_ATS_SMMU_WAR_REQUIRED())
|
||||
uvm_ats_smmu_war_deinit(parent_gpu);
|
||||
|
||||
#if NV_IS_EXPORT_SYMBOL_GPL_iommu_dev_disable_feature
|
||||
iommu_dev_disable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
#endif
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_sva_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
|
||||
@@ -81,8 +81,6 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
@@ -102,4 +100,15 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->no_ats_range_required = true;
|
||||
|
||||
parent_gpu->conf_computing.per_channel_key_rotation = true;
|
||||
|
||||
// TODO: Bug 5023085: this should be queried from RM instead of determined
|
||||
// by UVM.
|
||||
if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100 &&
|
||||
parent_gpu->rm_info.gpuImplementation ==
|
||||
NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B)
|
||||
parent_gpu->is_integrated_gpu = true;
|
||||
if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 &&
|
||||
parent_gpu->rm_info.gpuImplementation ==
|
||||
NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B)
|
||||
parent_gpu->is_integrated_gpu = true;
|
||||
}
|
||||
|
||||
@@ -254,3 +254,31 @@ void uvm_hal_blackwell_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
HWVALUE(C96F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
}
|
||||
|
||||
uvm_access_counter_clear_op_t
|
||||
uvm_hal_blackwell_access_counter_query_clear_op_gb100(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_buffer_entry_t **buffer_entries,
|
||||
NvU32 num_entries)
|
||||
{
|
||||
if (parent_gpu->rm_info.accessCntrBufferCount > 1) {
|
||||
NvU32 i;
|
||||
|
||||
for (i = 0; i < num_entries; i++) {
|
||||
const uvm_access_counter_buffer_entry_t *entry = buffer_entries[i];
|
||||
|
||||
// The LSb identifies the die ID.
|
||||
if ((entry->tag & 0x1) == 1)
|
||||
return UVM_ACCESS_COUNTER_CLEAR_OP_ALL;
|
||||
}
|
||||
}
|
||||
|
||||
return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED;
|
||||
}
|
||||
|
||||
uvm_access_counter_clear_op_t
|
||||
uvm_hal_blackwell_access_counter_query_clear_op_gb20x(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_buffer_entry_t **buffer_entries,
|
||||
NvU32 num_entries)
|
||||
{
|
||||
return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED;
|
||||
}
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
#include "hwref/blackwell/gb100/dev_fault.h"
|
||||
|
||||
static uvm_mmu_mode_hal_t blackwell_mmu_mode_hal;
|
||||
static uvm_mmu_mode_hal_t blackwell_integrated_mmu_mode_hal;
|
||||
|
||||
static NvU32 page_table_depth_blackwell(NvU64 page_size)
|
||||
{
|
||||
@@ -59,35 +60,71 @@ static NvU64 page_sizes_blackwell(void)
|
||||
return UVM_PAGE_SIZE_256G | UVM_PAGE_SIZE_512M | UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
static NvU64 page_sizes_blackwell_integrated(void)
|
||||
{
|
||||
return UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
static uvm_mmu_mode_hal_t *__uvm_hal_mmu_mode_blackwell(uvm_mmu_mode_hal_t *mmu_mode_hal,
|
||||
NvU64 big_page_size)
|
||||
{
|
||||
uvm_mmu_mode_hal_t *hopper_mmu_mode_hal;
|
||||
|
||||
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
|
||||
|
||||
hopper_mmu_mode_hal = uvm_hal_mmu_mode_hopper(big_page_size);
|
||||
UVM_ASSERT(hopper_mmu_mode_hal);
|
||||
|
||||
// The assumption made is that arch_hal->mmu_mode_hal() will be called
|
||||
// under the global lock the first time, so check it here.
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
*mmu_mode_hal = *hopper_mmu_mode_hal;
|
||||
mmu_mode_hal->page_table_depth = page_table_depth_blackwell;
|
||||
|
||||
return mmu_mode_hal;
|
||||
}
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell(NvU64 big_page_size)
|
||||
{
|
||||
static bool initialized = false;
|
||||
|
||||
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
|
||||
|
||||
// TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with
|
||||
// 128K big page size for Pascal+ GPUs
|
||||
if (big_page_size == UVM_PAGE_SIZE_128K)
|
||||
return NULL;
|
||||
|
||||
if (!initialized) {
|
||||
uvm_mmu_mode_hal_t *hopper_mmu_mode_hal = uvm_hal_mmu_mode_hopper(big_page_size);
|
||||
UVM_ASSERT(hopper_mmu_mode_hal);
|
||||
|
||||
// The assumption made is that arch_hal->mmu_mode_hal() will be called
|
||||
// under the global lock the first time, so check it here.
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
blackwell_mmu_mode_hal = *hopper_mmu_mode_hal;
|
||||
blackwell_mmu_mode_hal.page_table_depth = page_table_depth_blackwell;
|
||||
blackwell_mmu_mode_hal.page_sizes = page_sizes_blackwell;
|
||||
uvm_mmu_mode_hal_t *mmu_mode_hal;
|
||||
|
||||
mmu_mode_hal = __uvm_hal_mmu_mode_blackwell(&blackwell_mmu_mode_hal, big_page_size);
|
||||
mmu_mode_hal->page_sizes = page_sizes_blackwell;
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
return &blackwell_mmu_mode_hal;
|
||||
}
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell_integrated(NvU64 big_page_size)
|
||||
{
|
||||
static bool initialized = false;
|
||||
|
||||
// TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with
|
||||
// 128K big page size for Pascal+ GPUs
|
||||
if (big_page_size == UVM_PAGE_SIZE_128K)
|
||||
return NULL;
|
||||
|
||||
if (!initialized) {
|
||||
uvm_mmu_mode_hal_t *mmu_mode_hal;
|
||||
|
||||
mmu_mode_hal = __uvm_hal_mmu_mode_blackwell(&blackwell_integrated_mmu_mode_hal, big_page_size);
|
||||
mmu_mode_hal->page_sizes = page_sizes_blackwell_integrated;
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
return &blackwell_integrated_mmu_mode_hal;
|
||||
}
|
||||
|
||||
NvU16 uvm_hal_blackwell_mmu_client_id_to_utlb_id(NvU16 client_id)
|
||||
{
|
||||
switch (client_id) {
|
||||
|
||||
@@ -65,7 +65,10 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
|
||||
memset(host_ptr, 0, CE_TEST_MEM_SIZE);
|
||||
|
||||
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
|
||||
status = uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, CE_TEST_MEM_SIZE, 0, &mem[i]);
|
||||
uvm_rm_mem_type_t type;
|
||||
|
||||
type = gpu->mem_info.size ? UVM_RM_MEM_TYPE_GPU : UVM_RM_MEM_TYPE_SYS;
|
||||
status = uvm_rm_mem_alloc(gpu, type, CE_TEST_MEM_SIZE, 0, &mem[i]);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
}
|
||||
|
||||
@@ -405,6 +408,7 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
uvm_rm_mem_t *sys_rm_mem = NULL;
|
||||
uvm_rm_mem_t *gpu_rm_mem = NULL;
|
||||
uvm_gpu_address_t gpu_addresses[4] = {0};
|
||||
size_t gpu_addresses_length = 0;
|
||||
size_t size = gpu->big_page.internal_size;
|
||||
static const size_t element_sizes[] = {1, 4, 8};
|
||||
const size_t iterations = 4;
|
||||
@@ -435,7 +439,7 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by sysmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
|
||||
gpu_addresses[0] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
|
||||
gpu_addresses[gpu_addresses_length++] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
for (i = 0; i < iterations; ++i) {
|
||||
@@ -472,21 +476,23 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
// Physical address in sysmem
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &sys_uvm_mem), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(sys_uvm_mem, gpu), done);
|
||||
gpu_addresses[1] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
|
||||
gpu_addresses[gpu_addresses_length++] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
|
||||
|
||||
// Physical address in vidmem
|
||||
mem_params.backing_gpu = gpu;
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
|
||||
gpu_addresses[2] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
|
||||
if (gpu->mem_info.size > 0) {
|
||||
// Physical address in vidmem
|
||||
mem_params.backing_gpu = gpu;
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
|
||||
gpu_addresses[gpu_addresses_length++] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by vidmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
|
||||
gpu_addresses[3] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
|
||||
// Virtual address (in UVM's internal address space) backed by vidmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
|
||||
gpu_addresses[gpu_addresses_length++] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
|
||||
}
|
||||
|
||||
|
||||
for (i = 0; i < iterations; ++i) {
|
||||
for (j = 0; j < ARRAY_SIZE(gpu_addresses); ++j) {
|
||||
for (k = 0; k < ARRAY_SIZE(gpu_addresses); ++k) {
|
||||
for (j = 0; j < gpu_addresses_length; ++j) {
|
||||
for (k = 0; k < gpu_addresses_length; ++k) {
|
||||
for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
|
||||
TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
|
||||
gpu_addresses[k],
|
||||
|
||||
@@ -2901,6 +2901,13 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
|
||||
|
||||
static bool ce_is_usable(const UvmGpuCopyEngineCaps *cap)
|
||||
{
|
||||
// When Confidential Computing is enabled, all Copy Engines must support
|
||||
// encryption / decryption, tracked by 'secure' flag. This holds even for
|
||||
// non-CPU-GPU transactions because each channel has an associate semaphore,
|
||||
// and semaphore release must be observable by all processing units.
|
||||
if (g_uvm_global.conf_computing_enabled && !cap->secure)
|
||||
return false;
|
||||
|
||||
return cap->supported && !cap->grce;
|
||||
}
|
||||
|
||||
|
||||
@@ -250,15 +250,6 @@ static inline NvBool uvm_ranges_overlap(NvU64 a_start, NvU64 a_end, NvU64 b_star
|
||||
return a_end >= b_start && b_end >= a_start;
|
||||
}
|
||||
|
||||
static int debug_mode(void)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void kmem_cache_destroy_safe(struct kmem_cache **ppCache)
|
||||
{
|
||||
if (ppCache)
|
||||
@@ -336,22 +327,6 @@ typedef struct
|
||||
NvHandle user_object;
|
||||
} uvm_rm_user_object_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING,
|
||||
UVM_FD_VA_SPACE,
|
||||
UVM_FD_MM,
|
||||
UVM_FD_COUNT
|
||||
} uvm_fd_type_t;
|
||||
|
||||
// This should be large enough to fit the valid values from uvm_fd_type_t above.
|
||||
// Note we can't use order_base_2(UVM_FD_COUNT) to define this because our code
|
||||
// coverage tool fails due when the preprocessor expands that to a huge mess of
|
||||
// ternary operators.
|
||||
#define UVM_FD_TYPE_BITS 2
|
||||
#define UVM_FD_TYPE_MASK ((1UL << UVM_FD_TYPE_BITS) - 1)
|
||||
|
||||
// Macro used to compare two values for types that support less than operator.
|
||||
// It returns -1 if a < b, 1 if a > b and 0 if a == 0
|
||||
#define UVM_CMP_DEFAULT(a,b) \
|
||||
@@ -374,37 +349,13 @@ typedef enum
|
||||
// file. A NULL input returns false.
|
||||
bool uvm_file_is_nvidia_uvm(struct file *filp);
|
||||
|
||||
// Returns the type of data filp->private_data contains to and if ptr_val !=
|
||||
// NULL returns the value of the pointer.
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val);
|
||||
|
||||
// Returns the pointer stored in filp->private_data if the type
|
||||
// matches, otherwise returns NULL.
|
||||
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type);
|
||||
|
||||
// Reads the first word in the supplied struct page.
|
||||
static inline void uvm_touch_page(struct page *page)
|
||||
{
|
||||
char *mapping;
|
||||
|
||||
UVM_ASSERT(page);
|
||||
|
||||
mapping = (char *) kmap(page);
|
||||
(void)READ_ONCE(*mapping);
|
||||
kunmap(page);
|
||||
}
|
||||
// Like uvm_file_is_nvidia_uvm(), but further requires that the input file
|
||||
// represent a UVM VA space (has fd type UVM_FD_VA_SPACE).
|
||||
bool uvm_file_is_nvidia_uvm_va_space(struct file *filp);
|
||||
|
||||
// Return true if the VMA is one used by UVM managed allocations.
|
||||
bool uvm_vma_is_managed(struct vm_area_struct *vma);
|
||||
|
||||
static bool uvm_platform_uses_canonical_form_address(void)
|
||||
{
|
||||
if (NVCPU_IS_PPC64LE)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Similar to the GPU MMU HAL num_va_bits(), it returns the CPU's num_va_bits().
|
||||
static NvU32 uvm_cpu_num_va_bits(void)
|
||||
{
|
||||
@@ -420,7 +371,7 @@ static void uvm_get_unaddressable_range(NvU32 num_va_bits, NvU64 *first, NvU64 *
|
||||
|
||||
// Maxwell GPUs (num_va_bits == 40b) do not support canonical form address
|
||||
// even when plugged into platforms using it.
|
||||
if (uvm_platform_uses_canonical_form_address() && num_va_bits > 40) {
|
||||
if (num_va_bits > 40) {
|
||||
*first = 1ULL << (num_va_bits - 1);
|
||||
*outer = (NvU64)((NvS64)(1ULL << 63) >> (64 - num_va_bits));
|
||||
}
|
||||
|
||||
@@ -708,7 +708,12 @@ void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu)
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu)
|
||||
{
|
||||
return gpu->channel_manager->conf_computing.key_rotation_enabled;
|
||||
UVM_ASSERT(gpu);
|
||||
|
||||
// If the channel_manager is not set, we're in channel manager destroy
|
||||
// path after the pointer was NULL-ed. Chances are that other key rotation
|
||||
// infrastructure is not available either. Disallow the key rotation.
|
||||
return gpu->channel_manager && gpu->channel_manager->conf_computing.key_rotation_enabled;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool)
|
||||
|
||||
114
kernel-open/nvidia-uvm/uvm_fd_type.c
Normal file
114
kernel-open/nvidia-uvm/uvm_fd_type.c
Normal file
@@ -0,0 +1,114 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_fd_type.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_test_file.h"
|
||||
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
|
||||
{
|
||||
unsigned long uptr;
|
||||
uvm_fd_type_t type;
|
||||
void *ptr;
|
||||
|
||||
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
|
||||
|
||||
uptr = atomic_long_read_acquire((atomic_long_t *) (&filp->private_data));
|
||||
type = (uvm_fd_type_t)(uptr & UVM_FD_TYPE_MASK);
|
||||
ptr = (void *)(uptr & ~UVM_FD_TYPE_MASK);
|
||||
BUILD_BUG_ON(UVM_FD_COUNT > UVM_FD_TYPE_MASK + 1);
|
||||
|
||||
switch (type) {
|
||||
case UVM_FD_UNINITIALIZED:
|
||||
case UVM_FD_INITIALIZING:
|
||||
UVM_ASSERT(!ptr);
|
||||
break;
|
||||
|
||||
case UVM_FD_VA_SPACE:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
case UVM_FD_MM:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(struct file) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
case UVM_FD_TEST:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(uvm_test_file_t) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
default:
|
||||
UVM_ASSERT(0);
|
||||
}
|
||||
|
||||
if (ptr_val)
|
||||
*ptr_val = ptr;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
|
||||
|
||||
if (uvm_fd_type(filp, &ptr) == type)
|
||||
return ptr;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uvm_fd_type_t uvm_fd_type_init_cas(struct file *filp)
|
||||
{
|
||||
long old = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
|
||||
return (uvm_fd_type_t)(old & UVM_FD_TYPE_MASK);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_fd_type_init(struct file *filp)
|
||||
{
|
||||
uvm_fd_type_t old = uvm_fd_type_init_cas(filp);
|
||||
|
||||
if (old != UVM_FD_UNINITIALIZED)
|
||||
return NV_ERR_IN_USE;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_fd_type_set(struct file *filp, uvm_fd_type_t type, void *ptr)
|
||||
{
|
||||
void *tmp_ptr;
|
||||
UVM_ASSERT(uvm_fd_type(filp, &tmp_ptr) == UVM_FD_INITIALIZING);
|
||||
UVM_ASSERT(!tmp_ptr);
|
||||
|
||||
if (type == UVM_FD_UNINITIALIZED)
|
||||
UVM_ASSERT(!ptr);
|
||||
|
||||
UVM_ASSERT(((uintptr_t)ptr & type) == 0);
|
||||
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, (uintptr_t)ptr | type);
|
||||
}
|
||||
69
kernel-open/nvidia-uvm/uvm_fd_type.h
Normal file
69
kernel-open/nvidia-uvm/uvm_fd_type.h
Normal file
@@ -0,0 +1,69 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef __UVM_FD_TYPE_H__
|
||||
#define __UVM_FD_TYPE_H__
|
||||
|
||||
#include "nvstatus.h"
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING,
|
||||
UVM_FD_VA_SPACE,
|
||||
UVM_FD_MM,
|
||||
UVM_FD_TEST,
|
||||
UVM_FD_COUNT
|
||||
} uvm_fd_type_t;
|
||||
|
||||
// This should be large enough to fit the valid values from uvm_fd_type_t above.
|
||||
// Note we can't use order_base_2(UVM_FD_COUNT) to define this because our code
|
||||
// coverage tool fails when the preprocessor expands that to a huge mess of
|
||||
// ternary operators.
|
||||
#define UVM_FD_TYPE_BITS 3
|
||||
#define UVM_FD_TYPE_MASK ((1UL << UVM_FD_TYPE_BITS) - 1)
|
||||
|
||||
struct file;
|
||||
|
||||
// Returns the type of data filp->private_data contains and if ptr_val != NULL
|
||||
// returns the value of the pointer.
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val);
|
||||
|
||||
// Returns the pointer stored in filp->private_data if the type matches,
|
||||
// otherwise returns NULL.
|
||||
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type);
|
||||
|
||||
// Does atomic CAS on filp->private_data, expecting UVM_FD_UNINITIALIZED and
|
||||
// swapping in UVM_FD_INITIALIZING. Returns the old type regardless of CAS
|
||||
// success.
|
||||
uvm_fd_type_t uvm_fd_type_init_cas(struct file *filp);
|
||||
|
||||
// Like uvm_fd_type_init_cas() but returns NV_OK on CAS success and
|
||||
// NV_ERR_IN_USE on CAS failure.
|
||||
NV_STATUS uvm_fd_type_init(struct file *filp);
|
||||
|
||||
// Assigns {type, ptr} to filp. filp's current type must be UVM_FD_INITIALIZING.
|
||||
// If the new type is UVM_FD_UNINITIALIZED, ptr must be NULL.
|
||||
void uvm_fd_type_set(struct file *filp, uvm_fd_type_t type, void *ptr);
|
||||
|
||||
#endif // __UVM_FD_TYPE_H__
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -98,8 +98,7 @@ typedef struct uvm_non_replayable_fault_buffer_struct uvm_non_replayable_fault_b
|
||||
typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
|
||||
typedef struct uvm_access_counter_buffer_struct uvm_access_counter_buffer_t;
|
||||
typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
|
||||
typedef struct uvm_pmm_sysmem_mappings_struct uvm_pmm_sysmem_mappings_t;
|
||||
|
||||
typedef struct uvm_reverse_map_struct uvm_reverse_map_t;
|
||||
|
||||
typedef struct uvm_ibm_npu_struct uvm_ibm_npu_t;
|
||||
#endif //__UVM_FORWARD_DECL_H__
|
||||
|
||||
@@ -31,7 +31,6 @@
|
||||
#include "uvm_processors.h"
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_ats_ibm.h"
|
||||
|
||||
// Global state of the uvm driver
|
||||
struct uvm_global_struct
|
||||
@@ -124,12 +123,6 @@ struct uvm_global_struct
|
||||
bool enabled;
|
||||
} ats;
|
||||
|
||||
#if UVM_IBM_NPU_SUPPORTED()
|
||||
// On IBM systems this array tracks the active NPUs (the NPUs which are
|
||||
// attached to retained GPUs).
|
||||
uvm_ibm_npu_t npus[NV_MAX_NPUS];
|
||||
#endif
|
||||
|
||||
// List of all active VA spaces
|
||||
struct
|
||||
{
|
||||
|
||||
@@ -149,7 +149,7 @@ static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
|
||||
}
|
||||
|
||||
// Return a PASID to use with the internal address space (AS), or -1 if not
|
||||
// supported. This PASID is needed to enable ATS in the internal AS, but it is
|
||||
// supported. This PASID is needed to enable ATS in the internal AS, but it is
|
||||
// not used in address translation requests, which only translate GPA->SPA.
|
||||
// The buffer management thus remains the same: DMA mapped GPA addresses can
|
||||
// be accessed by the GPU, while unmapped addresses can not and any access is
|
||||
@@ -358,10 +358,11 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
addr_shift = gpu_addr_shift;
|
||||
|
||||
// Pascal+ GPUs are capable of accessing kernel pointers in various modes
|
||||
// by applying the same upper-bit checks that x86, ARM, and Power
|
||||
// processors do. x86 and ARM use canonical form addresses. For ARM, even
|
||||
// by applying the same upper-bit checks that x86 or ARM CPU processors do.
|
||||
// The x86 and ARM platforms use canonical form addresses. For ARM, even
|
||||
// with Top-Byte Ignore enabled, the following logic validates addresses
|
||||
// from the kernel VA range. PowerPC does not use canonical form address.
|
||||
// from the kernel VA range.
|
||||
//
|
||||
// The following diagram illustrates the valid (V) VA regions that can be
|
||||
// mapped (or addressed) by the GPU/CPU when the CPU uses canonical form.
|
||||
// (C) regions are only accessible by the CPU. Similarly, (G) regions
|
||||
@@ -389,8 +390,8 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// 0 +----------------+ 0 +----------------+
|
||||
|
||||
// On canonical form address platforms and Pascal+ GPUs.
|
||||
if (uvm_platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
|
||||
// On Pascal+ GPUs.
|
||||
if (gpu_addr_shift > 40) {
|
||||
// On x86, when cpu_addr_shift > gpu_addr_shift, it means the CPU uses
|
||||
// 5-level paging and the GPU is pre-Hopper. On Pascal-Ada GPUs (49b
|
||||
// wide VA) we set addr_shift to match a 4-level paging x86 (48b wide).
|
||||
@@ -431,30 +432,28 @@ NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)
|
||||
NvU8 addr_shift;
|
||||
NvU64 input_addr = addr;
|
||||
|
||||
if (uvm_platform_uses_canonical_form_address()) {
|
||||
// When the CPU VA width is larger than GPU's, it means that:
|
||||
// On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
|
||||
// On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
|
||||
// We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
|
||||
// behavior of CPUs with smaller (than GPU) VA widths.
|
||||
gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
|
||||
cpu_addr_shift = uvm_cpu_num_va_bits();
|
||||
// When the CPU VA width is larger than GPU's, it means that:
|
||||
// On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
|
||||
// On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
|
||||
// We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
|
||||
// behavior of CPUs with smaller (than GPU) VA widths.
|
||||
gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
|
||||
cpu_addr_shift = uvm_cpu_num_va_bits();
|
||||
|
||||
if (cpu_addr_shift > gpu_addr_shift)
|
||||
addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
|
||||
else if (gpu_addr_shift == 57)
|
||||
addr_shift = gpu_addr_shift;
|
||||
else
|
||||
addr_shift = cpu_addr_shift;
|
||||
if (cpu_addr_shift > gpu_addr_shift)
|
||||
addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
|
||||
else if (gpu_addr_shift == 57)
|
||||
addr_shift = gpu_addr_shift;
|
||||
else
|
||||
addr_shift = cpu_addr_shift;
|
||||
|
||||
addr = (NvU64)((NvS64)(addr << (64 - addr_shift)) >> (64 - addr_shift));
|
||||
addr = (NvU64)((NvS64)(addr << (64 - addr_shift)) >> (64 - addr_shift));
|
||||
|
||||
// This protection acts on when the address is not covered by the GPU's
|
||||
// OOR_ADDR_CHECK. This can only happen when OOR_ADDR_CHECK is in
|
||||
// permissive (NO_CHECK) mode.
|
||||
if ((addr << (64 - gpu_addr_shift)) != (input_addr << (64 - gpu_addr_shift)))
|
||||
return input_addr;
|
||||
}
|
||||
// This protection acts on when the address is not covered by the GPU's
|
||||
// OOR_ADDR_CHECK. This can only happen when OOR_ADDR_CHECK is in
|
||||
// permissive (NO_CHECK) mode.
|
||||
if ((addr << (64 - gpu_addr_shift)) != (input_addr << (64 - gpu_addr_shift)))
|
||||
return input_addr;
|
||||
|
||||
return addr;
|
||||
}
|
||||
@@ -485,7 +484,7 @@ static void gpu_info_print_ce_caps(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
continue;
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " ce %u pce mask 0x%08x grce %u shared %u sysmem read %u sysmem write %u sysmem %u "
|
||||
"nvlink p2p %u p2p %u\n",
|
||||
"nvlink p2p %u p2p %u secure %u\n",
|
||||
i,
|
||||
ce_caps->cePceMask,
|
||||
ce_caps->grce,
|
||||
@@ -494,7 +493,8 @@ static void gpu_info_print_ce_caps(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
ce_caps->sysmemWrite,
|
||||
ce_caps->sysmem,
|
||||
ce_caps->nvlinkP2p,
|
||||
ce_caps->p2p);
|
||||
ce_caps->p2p,
|
||||
ce_caps->secure);
|
||||
}
|
||||
|
||||
out:
|
||||
@@ -595,9 +595,6 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
window_size / (1024 * 1024));
|
||||
}
|
||||
|
||||
if (gpu->parent->npu)
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "npu_domain %d\n", gpu->parent->npu->pci_domain);
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "interrupts %llu\n", gpu->parent->isr.interrupt_count);
|
||||
|
||||
if (gpu->parent->isr.replayable_faults.handling) {
|
||||
@@ -1041,7 +1038,7 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
{
|
||||
struct proc_dir_entry *gpu_base_dir_entry;
|
||||
char symlink_name[16]; // Hold a uvm_gpu_id_t value in decimal.
|
||||
char uuid_buffer[max(UVM_PARENT_GPU_UUID_STRING_LENGTH, UVM_GPU_UUID_STRING_LENGTH)];
|
||||
char uuid_buffer[NV_MAX(UVM_PARENT_GPU_UUID_STRING_LENGTH, UVM_GPU_UUID_STRING_LENGTH)];
|
||||
char gpu_dir_name[sizeof(symlink_name) + sizeof(uuid_buffer) + 1];
|
||||
|
||||
if (!uvm_procfs_is_enabled())
|
||||
@@ -1197,6 +1194,8 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_sema_init(&parent_gpu->isr.replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
uvm_sema_init(&parent_gpu->isr.non_replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
uvm_mutex_init(&parent_gpu->access_counters_enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS);
|
||||
uvm_mutex_init(&parent_gpu->access_counters_clear_tracker_lock, UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS);
|
||||
uvm_tracker_init(&parent_gpu->access_counters_clear_tracker);
|
||||
uvm_spin_lock_irqsave_init(&parent_gpu->isr.interrupts_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_spin_lock_init(&parent_gpu->instance_ptr_table_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
|
||||
@@ -1214,6 +1213,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
return NV_OK;
|
||||
|
||||
cleanup:
|
||||
uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker);
|
||||
uvm_kvfree(parent_gpu);
|
||||
|
||||
return status;
|
||||
@@ -1249,13 +1249,15 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
NvU32 num_entries;
|
||||
NvU64 va_size;
|
||||
NvU64 va_per_entry;
|
||||
NvU64 physical_address;
|
||||
NvU64 dma_address;
|
||||
uvm_mmu_page_table_alloc_t *tree_alloc;
|
||||
|
||||
status = uvm_page_tree_init(gpu,
|
||||
NULL,
|
||||
UVM_PAGE_TREE_TYPE_KERNEL,
|
||||
gpu->big_page.internal_size,
|
||||
uvm_get_page_tree_location(gpu->parent),
|
||||
uvm_get_page_tree_location(gpu),
|
||||
&gpu->address_space_tree);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Initializing the page tree failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
@@ -1279,12 +1281,17 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
gpu->parent->rm_va_size,
|
||||
va_per_entry);
|
||||
|
||||
tree_alloc = uvm_page_tree_pdb(&gpu->address_space_tree);
|
||||
tree_alloc = uvm_page_tree_pdb_internal(&gpu->address_space_tree);
|
||||
if (tree_alloc->addr.aperture == UVM_APERTURE_VID)
|
||||
physical_address = tree_alloc->addr.address;
|
||||
else
|
||||
physical_address = page_to_phys(tree_alloc->handle.page);
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceSetPageDirectory(gpu->rm_address_space,
|
||||
tree_alloc->addr.address,
|
||||
physical_address,
|
||||
num_entries,
|
||||
tree_alloc->addr.aperture == UVM_APERTURE_VID,
|
||||
gpu_get_internal_pasid(gpu)));
|
||||
gpu_get_internal_pasid(gpu),
|
||||
&dma_address));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("nvUvmInterfaceSetPageDirectory() failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
@@ -1292,6 +1299,9 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
return status;
|
||||
}
|
||||
|
||||
if (tree_alloc->addr.aperture == UVM_APERTURE_SYS)
|
||||
gpu->address_space_tree.pdb_rm_dma_address = uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_address);
|
||||
|
||||
gpu->rm_address_space_moved_to_page_tree = true;
|
||||
|
||||
return NV_OK;
|
||||
@@ -1404,13 +1414,12 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
parent_gpu->egm.enabled = gpu_info->egmEnabled;
|
||||
parent_gpu->egm.local_peer_id = gpu_info->egmPeerId;
|
||||
parent_gpu->egm.base_address = gpu_info->egmBaseAddr;
|
||||
parent_gpu->access_counters_supported = (gpu_info->accessCntrBufferCount != 0);
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceGetFbInfo(parent_gpu->rm_device, &fb_info));
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
parent_gpu->sli_enabled = (gpu_info->subdeviceCount > 1);
|
||||
|
||||
if (!fb_info.bZeroFb)
|
||||
parent_gpu->max_allocatable_address = fb_info.maxAllocatableAddress;
|
||||
|
||||
@@ -1559,6 +1568,12 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_pmm_sysmem_mappings_init(gpu, &gpu->pmm_reverse_sysmem_mappings);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("CPU PMM MMIO initialization failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_pmm_gpu_device_p2p_init(gpu);
|
||||
|
||||
status = init_semaphore_pools(gpu);
|
||||
@@ -1644,19 +1659,12 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
// Sync the access counter clear tracker too.
|
||||
if (parent_gpu->access_counters_supported && parent_gpu->access_counter_buffer) {
|
||||
NvU32 notif_buf_index;
|
||||
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[notif_buf_index];
|
||||
uvm_mutex_lock(&parent_gpu->access_counters_clear_tracker_lock);
|
||||
status = uvm_tracker_wait(&parent_gpu->access_counters_clear_tracker);
|
||||
uvm_mutex_unlock(&parent_gpu->access_counters_clear_tracker_lock);
|
||||
|
||||
if (access_counters->rm_info.accessCntrBufferHandle != 0) {
|
||||
uvm_access_counters_isr_lock(access_counters);
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(status == uvm_global_get_status());
|
||||
}
|
||||
}
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(status == uvm_global_get_status());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1757,6 +1765,8 @@ static void deinit_gpu(uvm_gpu_t *gpu)
|
||||
|
||||
uvm_pmm_gpu_device_p2p_deinit(gpu);
|
||||
|
||||
uvm_pmm_sysmem_mappings_deinit(&gpu->pmm_reverse_sysmem_mappings);
|
||||
|
||||
uvm_pmm_gpu_deinit(&gpu->pmm);
|
||||
|
||||
if (gpu->rm_address_space != 0)
|
||||
@@ -1787,6 +1797,8 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)
|
||||
for_each_sub_processor_index(sub_processor_index)
|
||||
UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);
|
||||
|
||||
uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker);
|
||||
|
||||
uvm_kvfree(parent_gpu);
|
||||
}
|
||||
|
||||
@@ -2881,6 +2893,10 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
|
||||
if (status != NV_OK)
|
||||
goto error_unregister;
|
||||
|
||||
// TODO: Bug 5262806: Remove this WAR once the bug is fixed.
|
||||
if (gpu_info->accessCntrBufferCount > 1)
|
||||
gpu_info->accessCntrBufferCount = 1;
|
||||
|
||||
if (parent_gpu != NULL) {
|
||||
// If the UUID has been seen before, and if SMC is enabled, then check
|
||||
// if this specific partition has been seen previously. The UUID-based
|
||||
@@ -3147,12 +3163,15 @@ bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
|
||||
return false;
|
||||
}
|
||||
|
||||
uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_aperture_t uvm_get_page_tree_location(const uvm_gpu_t *gpu)
|
||||
{
|
||||
// See comment in page_tree_set_location
|
||||
if (uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu) || g_uvm_global.conf_computing_enabled)
|
||||
// See comments in page_tree_set_location
|
||||
if (uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent) || g_uvm_global.conf_computing_enabled)
|
||||
return UVM_APERTURE_VID;
|
||||
|
||||
if (!gpu->mem_info.size)
|
||||
return UVM_APERTURE_SYS;
|
||||
|
||||
|
||||
return UVM_APERTURE_DEFAULT;
|
||||
}
|
||||
@@ -3610,7 +3629,7 @@ static NvU64 gpu_addr_to_dma_addr(uvm_parent_gpu_t *parent_gpu, NvU64 gpu_addr)
|
||||
// dma_addressable_start (in bifSetupDmaWindow_IMPL()) and hence when
|
||||
// referencing sysmem from the GPU, dma_addressable_start should be
|
||||
// subtracted from the DMA address we get from the OS.
|
||||
static NvU64 dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr)
|
||||
NvU64 uvm_parent_gpu_dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr)
|
||||
{
|
||||
NvU64 gpu_addr = dma_addr - parent_gpu->dma_addressable_start;
|
||||
UVM_ASSERT(dma_addr >= gpu_addr);
|
||||
@@ -3618,32 +3637,40 @@ static NvU64 dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr)
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
void *uvm_parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu, gfp_t gfp_flags, NvU64 *dma_address_out)
|
||||
static void *parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu, gfp_t gfp_flags, NvU64 *dma_address_out)
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
void *cpu_addr;
|
||||
|
||||
cpu_addr = dma_alloc_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, &dma_addr, gfp_flags);
|
||||
|
||||
if (!cpu_addr)
|
||||
return cpu_addr;
|
||||
|
||||
*dma_address_out = dma_addr_to_gpu_addr(parent_gpu, dma_addr);
|
||||
*dma_address_out = uvm_parent_gpu_dma_addr_to_gpu_addr(parent_gpu, dma_addr);
|
||||
atomic64_add(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address)
|
||||
NV_STATUS uvm_gpu_dma_alloc_page(uvm_gpu_t *gpu, gfp_t gfp_flags, void **cpu_addr_out, NvU64 *dma_address_out)
|
||||
{
|
||||
void *cpu_addr = parent_gpu_dma_alloc_page(gpu->parent, gfp_flags, dma_address_out);
|
||||
if (!cpu_addr)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
// TODO: Bug 4868590: Issue GPA invalidate here
|
||||
|
||||
*cpu_addr_out = cpu_addr;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *cpu_addr, NvU64 dma_address)
|
||||
{
|
||||
dma_address = gpu_addr_to_dma_addr(parent_gpu, dma_address);
|
||||
dma_free_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, va, dma_address);
|
||||
dma_free_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, cpu_addr, dma_address);
|
||||
atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu,
|
||||
struct page *page,
|
||||
size_t size,
|
||||
NvU64 *dma_address_out)
|
||||
static NV_STATUS parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
|
||||
@@ -3666,11 +3693,20 @@ NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu,
|
||||
}
|
||||
|
||||
atomic64_add(size, &parent_gpu->mapped_cpu_pages_size);
|
||||
*dma_address_out = dma_addr_to_gpu_addr(parent_gpu, dma_addr);
|
||||
*dma_address_out = uvm_parent_gpu_dma_addr_to_gpu_addr(parent_gpu, dma_addr);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
{
|
||||
NV_STATUS status = parent_gpu_map_cpu_pages(gpu->parent, page, size, dma_address_out);
|
||||
|
||||
// TODO: Bug 4868590: Issue GPA invalidate here
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size)
|
||||
{
|
||||
UVM_ASSERT(PAGE_ALIGNED(size));
|
||||
|
||||
@@ -522,10 +522,6 @@ struct uvm_access_counter_buffer_struct
|
||||
// PCIe
|
||||
NvU32 cached_put;
|
||||
|
||||
// Tracker used to aggregate access counters clear operations, needed for
|
||||
// GPU removal
|
||||
uvm_tracker_t clear_tracker;
|
||||
|
||||
// Current access counter configuration. During normal operation this
|
||||
// information is computed once during GPU initialization. However, tests
|
||||
// may override it to try different configuration values.
|
||||
@@ -819,6 +815,14 @@ struct uvm_gpu_struct
|
||||
uvm_bit_locks_t bitlocks;
|
||||
} sysmem_mappings;
|
||||
|
||||
// Reverse lookup table used to query the user mapping associated with a
|
||||
// sysmem (DMA) physical address.
|
||||
//
|
||||
// The system memory mapping information referred to by this field is
|
||||
// different from that of sysmem_mappings, because it relates to user
|
||||
// mappings (instead of kernel), and it is used in most configurations.
|
||||
uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings;
|
||||
|
||||
struct
|
||||
{
|
||||
uvm_conf_computing_dma_buffer_pool_t dma_buffer_pool;
|
||||
@@ -993,17 +997,6 @@ struct uvm_parent_gpu_struct
|
||||
// nvUvmInterfaceUnregisterGpu()).
|
||||
struct pci_dev *pci_dev;
|
||||
|
||||
// NVLINK Processing Unit (NPU) on PowerPC platforms. The NPU is a
|
||||
// collection of CPU-side PCI devices which bridge GPU NVLINKs and the CPU
|
||||
// memory bus.
|
||||
//
|
||||
// There is one PCI device per NVLINK. A set of NVLINKs connects to a single
|
||||
// GPU, and all NVLINKs for a given socket are collected logically under
|
||||
// this UVM NPU because some resources (such as register mappings) are
|
||||
// shared by all those NVLINKs. This means multiple GPUs may connect to the
|
||||
// same UVM NPU.
|
||||
uvm_ibm_npu_t *npu;
|
||||
|
||||
// On kernels with NUMA support, this entry contains the closest CPU NUMA
|
||||
// node to this GPU. Otherwise, the value will be -1.
|
||||
int closest_cpu_numa_node;
|
||||
@@ -1026,13 +1019,12 @@ struct uvm_parent_gpu_struct
|
||||
// dma_addressable_start (in bifSetupDmaWindow_IMPL()) and hence when
|
||||
// referencing sysmem from the GPU, dma_addressable_start should be
|
||||
// subtracted from the physical address. The DMA mapping helpers like
|
||||
// uvm_parent_gpu_map_cpu_pages() and uvm_parent_gpu_dma_alloc_page() take
|
||||
// care of that.
|
||||
// uvm_gpu_map_cpu_pages() and uvm_gpu_dma_alloc_page() take care of that.
|
||||
NvU64 dma_addressable_start;
|
||||
NvU64 dma_addressable_limit;
|
||||
|
||||
// Total size (in bytes) of physically mapped (with
|
||||
// uvm_parent_gpu_map_cpu_pages) sysmem pages, used for leak detection.
|
||||
// uvm_gpu_map_cpu_pages) sysmem pages, used for leak detection.
|
||||
atomic64_t mapped_cpu_pages_size;
|
||||
|
||||
// Hardware Abstraction Layer
|
||||
@@ -1072,6 +1064,11 @@ struct uvm_parent_gpu_struct
|
||||
|
||||
bool access_counters_supported;
|
||||
|
||||
// TODO: Bug 4637114: [UVM] Remove support for physical access counter
|
||||
// notifications. Always set to false, until we remove the PMM reverse
|
||||
// mapping code.
|
||||
bool access_counters_can_use_physical_addresses;
|
||||
|
||||
bool fault_cancel_va_supported;
|
||||
|
||||
// True if the GPU has hardware support for scoped atomics
|
||||
@@ -1132,6 +1129,13 @@ struct uvm_parent_gpu_struct
|
||||
// Indicates whether the GPU can map sysmem with pages larger than 4k
|
||||
bool can_map_sysmem_with_large_pages;
|
||||
|
||||
// An integrated GPU has no vidmem and coherent access to sysmem. Note
|
||||
// integrated GPUs have a write-back L2 cache (cf. discrete GPUs
|
||||
// write-through cache.)
|
||||
// TODO: Bug 5023085: this should be queried from RM instead of determined
|
||||
// by UVM.
|
||||
bool is_integrated_gpu;
|
||||
|
||||
struct
|
||||
{
|
||||
// If true, the granularity of key rotation is a single channel. If
|
||||
@@ -1205,6 +1209,11 @@ struct uvm_parent_gpu_struct
|
||||
uvm_access_counter_buffer_t *access_counter_buffer;
|
||||
uvm_mutex_t access_counters_enablement_lock;
|
||||
|
||||
// Tracker used to aggregate access counters clear operations, needed for
|
||||
// GPU removal. It is only used when supports_access_counters is set.
|
||||
uvm_tracker_t access_counters_clear_tracker;
|
||||
uvm_mutex_t access_counters_clear_tracker_lock;
|
||||
|
||||
// Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
|
||||
NvU32 utlb_per_gpc_count;
|
||||
|
||||
@@ -1252,9 +1261,6 @@ struct uvm_parent_gpu_struct
|
||||
uvm_rb_tree_t instance_ptr_table;
|
||||
uvm_spinlock_t instance_ptr_table_lock;
|
||||
|
||||
// This is set to true if the GPU belongs to an SLI group.
|
||||
bool sli_enabled;
|
||||
|
||||
struct
|
||||
{
|
||||
bool supported;
|
||||
@@ -1340,6 +1346,8 @@ struct uvm_parent_gpu_struct
|
||||
uvm_test_parent_gpu_inject_error_t test;
|
||||
};
|
||||
|
||||
NvU64 uvm_parent_gpu_dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr);
|
||||
|
||||
static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return parent_gpu->name;
|
||||
@@ -1676,20 +1684,21 @@ NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu);
|
||||
// error and it's required to call uvm_gpu_check_nvlink_error() to be sure.
|
||||
NV_STATUS uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_t *gpu);
|
||||
|
||||
// Map size bytes of contiguous sysmem on the GPU for physical access
|
||||
// Map size bytes of contiguous sysmem on the GPU for physical access.
|
||||
//
|
||||
// size has to be aligned to PAGE_SIZE.
|
||||
//
|
||||
// Returns the physical address of the pages that can be used to access them on
|
||||
// the GPU.
|
||||
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
// the GPU. This address is usable by any GPU under the same parent for the
|
||||
// lifetime of that parent.
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
|
||||
// Unmap num_pages pages previously mapped with uvm_parent_gpu_map_cpu_pages().
|
||||
// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
|
||||
void uvm_parent_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
|
||||
|
||||
static NV_STATUS uvm_parent_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
|
||||
static NV_STATUS uvm_gpu_map_cpu_page(uvm_gpu_t *gpu, struct page *page, NvU64 *dma_address_out)
|
||||
{
|
||||
return uvm_parent_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
|
||||
return uvm_gpu_map_cpu_pages(gpu, page, PAGE_SIZE, dma_address_out);
|
||||
}
|
||||
|
||||
static void uvm_parent_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
|
||||
@@ -1700,16 +1709,15 @@ static void uvm_parent_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dm
|
||||
// Allocate and map a page of system DMA memory on the GPU for physical access
|
||||
//
|
||||
// Returns
|
||||
// - the address of the page that can be used to access them on
|
||||
// the GPU in the dma_address_out parameter.
|
||||
// - the address of allocated memory in CPU virtual address space.
|
||||
void *uvm_parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu,
|
||||
gfp_t gfp_flags,
|
||||
NvU64 *dma_address_out);
|
||||
// - the address of the page that can be used to access them on
|
||||
// the GPU in the dma_address_out parameter. This address is usable by any GPU
|
||||
// under the same parent for the lifetime of that parent.
|
||||
NV_STATUS uvm_gpu_dma_alloc_page(uvm_gpu_t *gpu, gfp_t gfp_flags, void **cpu_addr_out, NvU64 *dma_address_out);
|
||||
|
||||
// Unmap and free size bytes of contiguous sysmem DMA previously allocated
|
||||
// with uvm_parent_gpu_map_cpu_pages().
|
||||
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address);
|
||||
// with uvm_gpu_dma_alloc_page().
|
||||
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *cpu_addr, NvU64 dma_address);
|
||||
|
||||
// Returns whether the given range is within the GPU's addressable VA ranges.
|
||||
// It requires the input 'addr' to be in canonical form for platforms compliant
|
||||
@@ -1730,8 +1738,6 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
// The GPU must be initialized before calling this function.
|
||||
bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
|
||||
bool uvm_platform_uses_canonical_form_address(void);
|
||||
|
||||
// Returns addr's canonical form for host systems that use canonical form
|
||||
// addresses.
|
||||
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
|
||||
@@ -1774,7 +1780,7 @@ static bool uvm_parent_gpu_needs_proxy_channel_pool(const uvm_parent_gpu_t *pare
|
||||
return uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu);
|
||||
}
|
||||
|
||||
uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu);
|
||||
uvm_aperture_t uvm_get_page_tree_location(const uvm_gpu_t *gpu);
|
||||
|
||||
// Add the given instance pointer -> user_channel mapping to this GPU. The
|
||||
// bottom half GPU page fault handler uses this to look up the VA space for GPU
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "nv_uvm_interface.h"
|
||||
#include "uvm_gpu_access_counters.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_api.h"
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
@@ -216,38 +217,19 @@ static NV_STATUS config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY gran
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Clear the access counter notifications and add it to the per-GPU
|
||||
// per-notification-buffer clear tracker.
|
||||
static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_buffer_t *access_counters,
|
||||
uvm_access_counter_buffer_entry_t **notification_start,
|
||||
NvU32 num_notifications)
|
||||
static NV_STATUS parent_gpu_clear_tracker_wait(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NvU32 i;
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch");
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu),
|
||||
access_counters->index);
|
||||
return status;
|
||||
}
|
||||
uvm_mutex_lock(&parent_gpu->access_counters_clear_tracker_lock);
|
||||
status = uvm_tracker_wait(&parent_gpu->access_counters_clear_tracker);
|
||||
uvm_mutex_unlock(&parent_gpu->access_counters_clear_tracker_lock);
|
||||
|
||||
for (i = 0; i < num_notifications; i++)
|
||||
gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]);
|
||||
|
||||
uvm_push_end(&push);
|
||||
|
||||
uvm_tracker_remove_completed(&access_counters->clear_tracker);
|
||||
|
||||
return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push);
|
||||
return status;
|
||||
}
|
||||
|
||||
// Clear all access counters and add the operation to the per-GPU
|
||||
// per-notification-buffer clear tracker
|
||||
// Clear all access counters and add the operation to the per-GPU clear
|
||||
// tracker.
|
||||
static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu, uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@@ -269,8 +251,52 @@ static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu, uvm_access_counter_buf
|
||||
|
||||
uvm_push_end(&push);
|
||||
|
||||
uvm_tracker_remove_completed(&access_counters->clear_tracker);
|
||||
return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push);
|
||||
uvm_mutex_lock(&gpu->parent->access_counters_clear_tracker_lock);
|
||||
uvm_tracker_remove_completed(&gpu->parent->access_counters_clear_tracker);
|
||||
status = uvm_tracker_add_push_safe(&gpu->parent->access_counters_clear_tracker, &push);
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_clear_tracker_lock);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Clear the access counter notifications and add it to the per-GPU clear
|
||||
// tracker.
|
||||
static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_buffer_t *access_counters,
|
||||
uvm_access_counter_buffer_entry_t **notification_start,
|
||||
NvU32 num_notifications)
|
||||
{
|
||||
NvU32 i;
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_access_counter_clear_op_t clear_op;
|
||||
|
||||
clear_op = gpu->parent->host_hal->access_counter_query_clear_op(gpu->parent, notification_start, num_notifications);
|
||||
if (clear_op == UVM_ACCESS_COUNTER_CLEAR_OP_ALL)
|
||||
return access_counter_clear_all(gpu, access_counters);
|
||||
|
||||
UVM_ASSERT(clear_op == UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED);
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch");
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu),
|
||||
access_counters->index);
|
||||
return status;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_notifications; i++)
|
||||
gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]);
|
||||
|
||||
uvm_push_end(&push);
|
||||
|
||||
uvm_mutex_lock(&gpu->parent->access_counters_clear_tracker_lock);
|
||||
uvm_tracker_remove_completed(&gpu->parent->access_counters_clear_tracker);
|
||||
status = uvm_tracker_add_push_safe(&gpu->parent->access_counters_clear_tracker, &push);
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_clear_tracker_lock);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
@@ -373,8 +399,6 @@ NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU3
|
||||
access_counters->notifications_ignored_count = 0;
|
||||
access_counters->test.reconfiguration_owner = NULL;
|
||||
|
||||
uvm_tracker_init(&access_counters->clear_tracker);
|
||||
|
||||
access_counters->max_notifications = access_counters->rm_info.bufferSize /
|
||||
parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu);
|
||||
|
||||
@@ -442,8 +466,6 @@ void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 n
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
access_counters->rm_info.accessCntrBufferHandle = 0;
|
||||
uvm_tracker_deinit(&access_counters->clear_tracker);
|
||||
|
||||
uvm_kvfree(batch_context->notification_cache);
|
||||
uvm_kvfree(batch_context->notifications);
|
||||
batch_context->notification_cache = NULL;
|
||||
@@ -487,7 +509,7 @@ static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, NvU32 index, con
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
status = parent_gpu_clear_tracker_wait(gpu->parent);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
@@ -521,7 +543,7 @@ static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu, NvU32
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[index].service_lock));
|
||||
|
||||
// Wait for any pending clear operation before releasing ownership
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
status = parent_gpu_clear_tracker_wait(parent_gpu);
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(status == uvm_global_get_status());
|
||||
|
||||
@@ -1750,28 +1772,21 @@ NV_STATUS uvm_api_clear_all_access_counters(UVM_CLEAR_ALL_ACCESS_COUNTERS_PARAMS
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
for_each_gpu_in_mask(gpu, retained_gpus) {
|
||||
NvU32 notif_buf_index;
|
||||
uvm_access_counter_buffer_t *access_counters;
|
||||
|
||||
if (!gpu->parent->access_counters_supported)
|
||||
continue;
|
||||
|
||||
for (notif_buf_index = 0; notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
|
||||
notif_buf_index);
|
||||
uvm_access_counters_isr_lock(access_counters);
|
||||
// clear_all affects all the notification buffers, we issue it for
|
||||
// the notif_buf_index 0.
|
||||
access_counters = parent_gpu_access_counter_buffer_get(gpu->parent, 0);
|
||||
status = access_counter_clear_all(gpu, access_counters);
|
||||
if (status == NV_OK)
|
||||
status = parent_gpu_clear_tracker_wait(gpu->parent);
|
||||
|
||||
// Access counters are not enabled. Nothing to clear.
|
||||
if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count) {
|
||||
status = access_counter_clear_all(gpu, access_counters);
|
||||
if (status == NV_OK)
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
}
|
||||
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
// Break the loop if clear_all failed in any of the retained gpus.
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
for_each_gpu_in_mask(gpu, retained_gpus)
|
||||
@@ -2054,7 +2069,9 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
uvm_access_counter_buffer_t *access_counters;
|
||||
NvU32 notif_buf_index;
|
||||
NvBool index0_state;
|
||||
|
||||
if (params->mode >= UVM_TEST_ACCESS_COUNTER_RESET_MODE_MAX)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
@@ -2068,51 +2085,52 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
|
||||
goto exit_release_gpu;
|
||||
}
|
||||
|
||||
for (notif_buf_index = 0;
|
||||
notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount && status == NV_OK;
|
||||
notif_buf_index++) {
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
|
||||
notif_buf_index);
|
||||
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
|
||||
|
||||
uvm_access_counters_isr_lock(access_counters);
|
||||
// Access counters not enabled. Nothing to reset
|
||||
if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
|
||||
goto exit_release_gpu;
|
||||
}
|
||||
|
||||
// Access counters not enabled. Nothing to reset
|
||||
if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0)
|
||||
goto exit_isr_unlock;
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
|
||||
|
||||
if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) {
|
||||
status = access_counter_clear_all(gpu, access_counters);
|
||||
}
|
||||
else {
|
||||
uvm_access_counter_buffer_entry_t entry = { 0 };
|
||||
uvm_access_counter_buffer_entry_t *notification = &entry;
|
||||
// Clear operations affect all notification buffers, we use the
|
||||
// notif_buf_index = 0;
|
||||
notif_buf_index = 0;
|
||||
access_counters = parent_gpu_access_counter_buffer_get(gpu->parent, notif_buf_index);
|
||||
|
||||
entry.bank = params->bank;
|
||||
entry.tag = params->tag;
|
||||
uvm_access_counters_isr_lock(access_counters);
|
||||
|
||||
status = access_counter_clear_notifications(gpu, access_counters, ¬ification, 1);
|
||||
}
|
||||
// Recheck access counters are enabled.
|
||||
index0_state = gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0;
|
||||
if (index0_state) {
|
||||
NvU32 i;
|
||||
|
||||
if (status == NV_OK)
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
for (i = notif_buf_index + 1; i < gpu->parent->rm_info.accessCntrBufferCount; i++)
|
||||
UVM_ASSERT((gpu->parent->isr.access_counters[i].handling_ref_count == 0) == index0_state);
|
||||
|
||||
goto exit_isr_unlock;
|
||||
}
|
||||
|
||||
if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) {
|
||||
status = access_counter_clear_all(gpu, access_counters);
|
||||
}
|
||||
else {
|
||||
uvm_access_counter_buffer_entry_t entry = { 0 };
|
||||
uvm_access_counter_buffer_entry_t *notification = &entry;
|
||||
|
||||
entry.bank = params->bank;
|
||||
entry.tag = params->tag;
|
||||
|
||||
status = access_counter_clear_notifications(gpu, access_counters, ¬ification, 1);
|
||||
}
|
||||
|
||||
if (status == NV_OK)
|
||||
status = parent_gpu_clear_tracker_wait(gpu->parent);
|
||||
|
||||
exit_isr_unlock:
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
|
||||
// We only need to clear_all() once.
|
||||
if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) {
|
||||
NvU32 i;
|
||||
|
||||
// Early exit of the main loop; since we only need to clear_all()
|
||||
// once. Check that all the remaining notification buffers have
|
||||
// access counters in same state.
|
||||
NvBool index0_state = (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0);
|
||||
for (i = notif_buf_index + 1; i < gpu->parent->rm_info.accessCntrBufferCount; i++)
|
||||
UVM_ASSERT((gpu->parent->isr.access_counters[i].handling_ref_count == 0) == index0_state);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
|
||||
exit_release_gpu:
|
||||
uvm_gpu_release(gpu);
|
||||
|
||||
@@ -588,7 +588,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
ats_invalidate->tlb_batch_pending = false;
|
||||
|
||||
va_range_next = uvm_va_space_iter_first(va_space, fault_entry->fault_address, ~0ULL);
|
||||
va_range_next = uvm_va_space_iter_gmmu_mappable_first(va_space, fault_entry->fault_address);
|
||||
|
||||
// The VA isn't managed. See if ATS knows about it.
|
||||
vma = find_vma_intersection(mm, fault_address, fault_address + 1);
|
||||
|
||||
@@ -453,7 +453,7 @@ static NV_STATUS cancel_fault_precise_va(uvm_fault_buffer_entry_t *fault_entry,
|
||||
|
||||
gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
|
||||
UVM_ASSERT(gpu_va_space);
|
||||
pdb = uvm_page_tree_pdb(&gpu_va_space->page_tables)->addr;
|
||||
pdb = uvm_page_tree_pdb_address(&gpu_va_space->page_tables);
|
||||
|
||||
// Record fatal fault event
|
||||
uvm_tools_record_gpu_fatal_fault(gpu->id, va_space, fault_entry, fault_entry->fatal_reason);
|
||||
@@ -1964,12 +1964,12 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
|
||||
(*block_faults) = 0;
|
||||
|
||||
va_range_next = uvm_va_space_iter_first(va_space, fault_address, ~0ULL);
|
||||
va_range_next = uvm_va_space_iter_gmmu_mappable_first(va_space, fault_address);
|
||||
if (va_range_next && (fault_address >= va_range_next->node.start)) {
|
||||
UVM_ASSERT(fault_address < va_range_next->node.end);
|
||||
|
||||
va_range = va_range_next;
|
||||
va_range_next = uvm_va_space_iter_next(va_range_next, ~0ULL);
|
||||
va_range_next = uvm_va_range_gmmu_mappable_next(va_range);
|
||||
}
|
||||
|
||||
if (va_range)
|
||||
|
||||
@@ -792,7 +792,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
|
||||
//
|
||||
// Notably as of 4.3, atomic64_set_release() and atomic64_read_acquire()
|
||||
// have been added that are exactly what we need and could be slightly
|
||||
// faster on arm and powerpc than the implementation below. But at least in
|
||||
// faster on arm than the implementation below. But at least in
|
||||
// 4.3 the implementation looks broken for arm32 (it maps directly to
|
||||
// smp_load_acquire() and that doesn't support 64-bit reads on 32-bit
|
||||
// architectures) so instead of dealing with that just use a slightly bigger
|
||||
|
||||
@@ -218,6 +218,7 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.clear_faulted_channel_register = uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported,
|
||||
.access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
|
||||
.access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
|
||||
.access_counter_query_clear_op = uvm_hal_maxwell_access_counter_query_clear_op_unsupported,
|
||||
.get_time = uvm_hal_maxwell_get_time,
|
||||
}
|
||||
},
|
||||
@@ -269,6 +270,7 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test,
|
||||
.access_counter_clear_all = uvm_hal_turing_access_counter_clear_all,
|
||||
.access_counter_clear_targeted = uvm_hal_turing_access_counter_clear_targeted,
|
||||
.access_counter_query_clear_op = uvm_hal_turing_access_counter_query_clear_op,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -308,12 +310,15 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.tlb_invalidate_all = uvm_hal_blackwell_host_tlb_invalidate_all,
|
||||
.tlb_invalidate_va = uvm_hal_blackwell_host_tlb_invalidate_va,
|
||||
.tlb_invalidate_test = uvm_hal_blackwell_host_tlb_invalidate_test,
|
||||
.access_counter_query_clear_op = uvm_hal_blackwell_access_counter_query_clear_op_gb100,
|
||||
}
|
||||
},
|
||||
{
|
||||
.id = BLACKWELL_CHANNEL_GPFIFO_B,
|
||||
.parent_id = BLACKWELL_CHANNEL_GPFIFO_A,
|
||||
.u.host_ops = {}
|
||||
.u.host_ops = {
|
||||
.access_counter_query_clear_op = uvm_hal_blackwell_access_counter_query_clear_op_gb20x
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
@@ -407,6 +412,32 @@ static uvm_hal_class_ops_t arch_table[] =
|
||||
},
|
||||
};
|
||||
|
||||
// chip_table[] is different from the other class op tables - it is used to
|
||||
// apply chip specific overrides to arch ops. This means unlike the other class
|
||||
// op tables, parent_id does not refer to a preceding entry within the table
|
||||
// itself. parent_id is an architecture (not a chip id) and instead refers to an
|
||||
// entry in arch_table[]. This means that arch_table[] must be initialized
|
||||
// before chip_table[]. chip_table[] must be initialized using
|
||||
// ops_init_from_table(arch_table) instead of ops_init_from_parent().
|
||||
// TODO: BUG 5044266: the chip ops should be separated from the arch ops.
|
||||
static uvm_hal_class_ops_t chip_table[] =
|
||||
{
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100 | NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
|
||||
.u.arch_ops = {
|
||||
.mmu_mode_hal = uvm_hal_mmu_mode_blackwell_integrated,
|
||||
}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 | NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200,
|
||||
.u.arch_ops = {
|
||||
.mmu_mode_hal = uvm_hal_mmu_mode_blackwell_integrated,
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
{
|
||||
{
|
||||
@@ -670,33 +701,35 @@ static inline void op_copy(uvm_hal_class_ops_t *dst, uvm_hal_class_ops_t *src, N
|
||||
memcpy(m_dst, m_src, sizeof(void *));
|
||||
}
|
||||
|
||||
static inline NV_STATUS ops_init_from_parent(uvm_hal_class_ops_t *table,
|
||||
NvU32 row_count,
|
||||
NvLength op_count,
|
||||
NvLength op_offset)
|
||||
static inline NV_STATUS ops_init_from_table(uvm_hal_class_ops_t *dest_table,
|
||||
NvU32 dest_row_count,
|
||||
uvm_hal_class_ops_t *src_table,
|
||||
NvU32 src_row_count,
|
||||
NvLength op_count,
|
||||
NvLength op_offset)
|
||||
{
|
||||
NvLength i;
|
||||
|
||||
for (i = 0; i < row_count; i++) {
|
||||
for (i = 0; i < dest_row_count; i++) {
|
||||
NvLength j;
|
||||
uvm_hal_class_ops_t *parent = NULL;
|
||||
|
||||
if (table[i].parent_id != 0) {
|
||||
parent = ops_find_by_id(table, i, table[i].parent_id);
|
||||
if (dest_table[i].parent_id != 0) {
|
||||
parent = ops_find_by_id(src_table, src_row_count, dest_table[i].parent_id);
|
||||
if (parent == NULL)
|
||||
return NV_ERR_INVALID_CLASS;
|
||||
|
||||
// Go through all the ops and assign from parent's corresponding op
|
||||
// if NULL
|
||||
for (j = 0; j < op_count; j++) {
|
||||
if (op_is_null(table + i, j, op_offset))
|
||||
op_copy(table + i, parent, j, op_offset);
|
||||
if (op_is_null(dest_table + i, j, op_offset))
|
||||
op_copy(dest_table + i, parent, j, op_offset);
|
||||
}
|
||||
}
|
||||
|
||||
// At this point, it is an error to have missing HAL operations
|
||||
for (j = 0; j < op_count; j++) {
|
||||
if (op_is_null(table + i, j, op_offset))
|
||||
if (op_is_null(dest_table + i, j, op_offset))
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
}
|
||||
@@ -704,6 +737,19 @@ static inline NV_STATUS ops_init_from_parent(uvm_hal_class_ops_t *table,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static inline NV_STATUS ops_init_from_parent(uvm_hal_class_ops_t *table,
|
||||
NvU32 row_count,
|
||||
NvLength op_count,
|
||||
NvLength op_offset)
|
||||
{
|
||||
return ops_init_from_table(table,
|
||||
row_count,
|
||||
table,
|
||||
row_count,
|
||||
op_count,
|
||||
op_offset);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hal_init_table(void)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@@ -732,6 +778,18 @@ NV_STATUS uvm_hal_init_table(void)
|
||||
return status;
|
||||
}
|
||||
|
||||
// chip_table[] must be initialized after arch_table[].
|
||||
status = ops_init_from_table(chip_table,
|
||||
ARRAY_SIZE(chip_table),
|
||||
arch_table,
|
||||
ARRAY_SIZE(arch_table),
|
||||
ARCH_OP_COUNT,
|
||||
offsetof(uvm_hal_class_ops_t, u.arch_ops));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("ops_init_from_table(chip_table) failed: %s\n", nvstatusToString(status));
|
||||
return status;
|
||||
}
|
||||
|
||||
status = ops_init_from_parent(fault_buffer_table,
|
||||
ARRAY_SIZE(fault_buffer_table),
|
||||
FAULT_BUFFER_OP_COUNT,
|
||||
@@ -797,6 +855,13 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->arch_hal = &class_ops->u.arch_ops;
|
||||
|
||||
// Apply per chip overrides if required
|
||||
class_ops = ops_find_by_id(chip_table,
|
||||
ARRAY_SIZE(chip_table),
|
||||
gpu_info->gpuArch | gpu_info->gpuImplementation);
|
||||
if (class_ops)
|
||||
parent_gpu->arch_hal = &class_ops->u.arch_ops;
|
||||
|
||||
class_ops = ops_find_by_id(fault_buffer_table, ARRAY_SIZE(fault_buffer_table), gpu_info->gpuArch);
|
||||
if (class_ops == NULL) {
|
||||
UVM_ERR_PRINT("Fault buffer HAL not found, GPU %s, arch: 0x%X\n",
|
||||
@@ -840,6 +905,12 @@ static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
// TODO: Bug 200692962: Add support for access counters in vGPU
|
||||
if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled)
|
||||
parent_gpu->access_counters_supported = false;
|
||||
|
||||
|
||||
// TODO: Bug 4637114: [UVM] Remove support for physical access counter
|
||||
// notifications. Always set to false, until we remove the PMM reverse
|
||||
// mapping code.
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
}
|
||||
|
||||
void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
@@ -494,6 +494,7 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell_integrated(NvU64 big_page_size);
|
||||
|
||||
void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
@@ -703,6 +704,10 @@ typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *pa
|
||||
typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push);
|
||||
typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
typedef uvm_access_counter_clear_op_t
|
||||
(*uvm_hal_access_counter_query_clear_op_t)(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_buffer_entry_t **buffer_entries,
|
||||
NvU32 num_entries);
|
||||
|
||||
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
|
||||
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
|
||||
@@ -719,6 +724,10 @@ NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gp
|
||||
void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push);
|
||||
void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
uvm_access_counter_clear_op_t
|
||||
uvm_hal_maxwell_access_counter_query_clear_op_unsupported(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_buffer_entry_t **buffer_entries,
|
||||
NvU32 num_entries);
|
||||
|
||||
void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
|
||||
void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
|
||||
@@ -732,6 +741,18 @@ NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_g
|
||||
void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push);
|
||||
void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
uvm_access_counter_clear_op_t
|
||||
uvm_hal_turing_access_counter_query_clear_op(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_buffer_entry_t **buffer_entries,
|
||||
NvU32 num_entries);
|
||||
uvm_access_counter_clear_op_t
|
||||
uvm_hal_blackwell_access_counter_query_clear_op_gb100(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_buffer_entry_t **buffer_entries,
|
||||
NvU32 num_entries);
|
||||
uvm_access_counter_clear_op_t
|
||||
uvm_hal_blackwell_access_counter_query_clear_op_gb20x(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_buffer_entry_t **buffer_entries,
|
||||
NvU32 num_entries);
|
||||
|
||||
// The source and destination addresses must be 16-byte aligned. Note that the
|
||||
// best performance is achieved with 256-byte alignment. The decrypt size must
|
||||
@@ -785,6 +806,7 @@ struct uvm_host_hal_struct
|
||||
uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register;
|
||||
uvm_hal_access_counter_clear_all_t access_counter_clear_all;
|
||||
uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted;
|
||||
uvm_hal_access_counter_query_clear_op_t access_counter_query_clear_op;
|
||||
uvm_hal_get_time_t get_time;
|
||||
};
|
||||
|
||||
@@ -863,7 +885,8 @@ struct uvm_sec2_hal_struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// id is either a hardware class or GPU architecture
|
||||
// TODO: BUG 5044266: the chip ops should be separated from the arch ops.
|
||||
// id is either a hardware class, a chip or a GPU architecture
|
||||
NvU32 id;
|
||||
NvU32 parent_id;
|
||||
union
|
||||
@@ -874,7 +897,7 @@ typedef struct
|
||||
// ce_ops: id is a hardware class
|
||||
uvm_ce_hal_t ce_ops;
|
||||
|
||||
// arch_ops: id is an architecture
|
||||
// arch_ops: id is an architecture or a chip
|
||||
uvm_arch_hal_t arch_ops;
|
||||
|
||||
// fault_buffer_ops: id is an architecture
|
||||
|
||||
@@ -471,6 +471,13 @@ static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)
|
||||
return max(membar_1, membar_2);
|
||||
}
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_ACCESS_COUNTER_CLEAR_OP_NONE = 0,
|
||||
UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED,
|
||||
UVM_ACCESS_COUNTER_CLEAR_OP_ALL
|
||||
} uvm_access_counter_clear_op_t;
|
||||
|
||||
struct uvm_access_counter_buffer_entry_struct
|
||||
{
|
||||
// Address of the region for which a notification was sent
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -34,8 +34,9 @@ MODULE_PARM_DESC(uvm_disable_hmm,
|
||||
"enabled if is not supported in this driver build "
|
||||
"configuration, or if ATS settings conflict with HMM.");
|
||||
#else
|
||||
// So far, we've only tested HMM on x86_64, so disable it by default everywhere
|
||||
// else.
|
||||
// TODO: Bug 4103580: UVM: HMM: implement HMM support on ARM64 (aarch64)
|
||||
// So far, we've only tested HMM on x86_64 and aarch64 and it is broken on
|
||||
// aarch64 so disable it by default everywhere except x86_64.
|
||||
static bool uvm_disable_hmm = true;
|
||||
MODULE_PARM_DESC(uvm_disable_hmm,
|
||||
"Force-disable HMM functionality in the UVM driver. "
|
||||
@@ -186,7 +187,7 @@ static NV_STATUS hmm_copy_devmem_page(struct page *dst_page, struct page *src_pa
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
status = uvm_parent_gpu_map_cpu_pages(gpu->parent, dst_page, PAGE_SIZE, &dma_addr);
|
||||
status = uvm_gpu_map_cpu_page(gpu, dst_page, &dma_addr);
|
||||
if (status != NV_OK)
|
||||
goto out_unmap_gpu;
|
||||
|
||||
@@ -1602,7 +1603,7 @@ static NV_STATUS hmm_va_block_cpu_page_populate(uvm_va_block_t *va_block,
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk);
|
||||
status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk, page_index);
|
||||
if (status != NV_OK) {
|
||||
uvm_cpu_chunk_remove_from_block(va_block, page_to_nid(page), page_index);
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
@@ -1991,7 +1992,7 @@ static void fill_dst_pfn(uvm_va_block_t *va_block,
|
||||
|
||||
dpage = pfn_to_page(pfn);
|
||||
UVM_ASSERT(is_device_private_page(dpage));
|
||||
UVM_ASSERT(dpage->pgmap->owner == &g_uvm_global);
|
||||
UVM_ASSERT(page_pgmap(dpage)->owner == &g_uvm_global);
|
||||
|
||||
hmm_mark_gpu_chunk_referenced(va_block, gpu, gpu_chunk);
|
||||
UVM_ASSERT(!page_count(dpage));
|
||||
@@ -2437,6 +2438,39 @@ static void hmm_release_atomic_pages(uvm_va_block_t *va_block,
|
||||
}
|
||||
}
|
||||
|
||||
static int hmm_make_device_exclusive_range(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
struct page **pages)
|
||||
{
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_make_device_exclusive
|
||||
unsigned long addr;
|
||||
int npages = 0;
|
||||
|
||||
for (addr = start; addr < end; addr += PAGE_SIZE) {
|
||||
struct folio *folio;
|
||||
struct page *page;
|
||||
|
||||
page = make_device_exclusive(mm, addr, &g_uvm_global, &folio);
|
||||
if (IS_ERR(page)) {
|
||||
while (npages) {
|
||||
page = pages[--npages];
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
}
|
||||
npages = PTR_ERR(page);
|
||||
break;
|
||||
}
|
||||
|
||||
pages[npages++] = page;
|
||||
}
|
||||
|
||||
return npages;
|
||||
#else
|
||||
return make_device_exclusive_range(mm, start, end, pages, &g_uvm_global);
|
||||
#endif
|
||||
}
|
||||
|
||||
static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
@@ -2490,11 +2524,10 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
npages = make_device_exclusive_range(service_context->block_context->mm,
|
||||
npages = hmm_make_device_exclusive_range(service_context->block_context->mm,
|
||||
uvm_va_block_cpu_page_address(va_block, region.first),
|
||||
uvm_va_block_cpu_page_address(va_block, region.outer - 1) + PAGE_SIZE,
|
||||
pages + region.first,
|
||||
&g_uvm_global);
|
||||
pages + region.first);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
|
||||
@@ -95,8 +95,6 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
|
||||
@@ -393,9 +393,13 @@ bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push,
|
||||
bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
const bool peer_copy = uvm_gpu_address_is_peer(gpu, dst) || uvm_gpu_address_is_peer(gpu, src);
|
||||
|
||||
if (push->channel && peer_copy && !uvm_channel_is_p2p(push->channel)) {
|
||||
if (uvm_gpu_address_is_peer(gpu, src)) {
|
||||
UVM_ERR_PRINT("Peer copy from peer address (0x%llx) is not allowed!", src.address);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (push->channel && uvm_gpu_address_is_peer(gpu, dst) && !uvm_channel_is_p2p(push->channel)) {
|
||||
UVM_ERR_PRINT("Peer copy from address (0x%llx) to address (0x%llx) should use designated p2p channels!",
|
||||
src.address,
|
||||
dst.address);
|
||||
|
||||
@@ -212,7 +212,13 @@ static NvU64 make_pte_hopper(uvm_aperture_t aperture, NvU64 address, uvm_prot_t
|
||||
|
||||
static NvU64 make_sked_reflected_pte_hopper(void)
|
||||
{
|
||||
// On discrete GPUs, SKED Reflected PTEs may use either the local aperture
|
||||
// or the system non coherent aperture. However, integrated GPUs may only
|
||||
// use the system non-coherent aperture. We always use the system
|
||||
// non-coherent aperture as that is common to both discrete and integrated
|
||||
// GPUs.
|
||||
return HWCONST64(_MMU_VER3, PTE, VALID, TRUE) |
|
||||
HWCONST64(_MMU_VER3, PTE, APERTURE, SYSTEM_NON_COHERENT_MEMORY) |
|
||||
HWVALUE64(_MMU_VER3, PTE, PCF, pte_pcf(UVM_PROT_READ_WRITE_ATOMIC, UVM_MMU_PTE_FLAGS_NONE)) |
|
||||
HWVALUE64(_MMU_VER3, PTE, KIND, NV_MMU_PTE_KIND_SMSKED_MESSAGE);
|
||||
}
|
||||
@@ -323,11 +329,6 @@ static NvU32 pde_pcf(bool valid, pde_type_t pde_type, uvm_page_directory_t *dir,
|
||||
if (!g_uvm_global.ats.enabled)
|
||||
return pcf[pde_type][ATS_ALLOWED];
|
||||
|
||||
// We assume all supported ATS platforms use canonical form address.
|
||||
// See comments in uvm_gpu.c:uvm_gpu_can_address() and in
|
||||
// uvm_mmu.c:page_tree_ats_init();
|
||||
UVM_ASSERT(uvm_platform_uses_canonical_form_address());
|
||||
|
||||
// Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
|
||||
// ATS and GMMU page tables. For managed memory we need to prevent this
|
||||
// parallel lookup since we would not get any GPU fault if the CPU has
|
||||
|
||||
@@ -526,25 +526,6 @@ typedef struct
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_MEM_MAP_PARAMS;
|
||||
|
||||
//
|
||||
// UvmDebugAccessMemory
|
||||
//
|
||||
#define UVM_DEBUG_ACCESS_MEMORY UVM_IOCTL_BASE(36)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#ifdef __linux__
|
||||
NvS32 sessionIndex; // IN
|
||||
#endif
|
||||
NvU64 baseAddress NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 sizeInBytes NV_ALIGN_BYTES(8); // IN
|
||||
NvU32 accessType; // IN (UvmDebugAccessType)
|
||||
NvU64 buffer NV_ALIGN_BYTES(8); // IN/OUT
|
||||
NvBool isBitmaskSet; // OUT
|
||||
NvU64 bitmask NV_ALIGN_BYTES(8); // IN/OUT
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_DEBUG_ACCESS_MEMORY_PARAMS;
|
||||
|
||||
//
|
||||
// UvmRegisterGpu
|
||||
//
|
||||
@@ -1009,20 +990,35 @@ typedef struct
|
||||
//
|
||||
#define UVM_POPULATE_PAGEABLE UVM_IOCTL_BASE(71)
|
||||
|
||||
// Allow population of managed ranges.
|
||||
//
|
||||
// The UVM driver must have builtin tests enabled for the API to use the
|
||||
// following two flags.
|
||||
// Allow population of managed ranges. The goal is to validate that it is
|
||||
// possible to populate pageable ranges backed by VMAs with the VM_MIXEDMAP or
|
||||
// VM_DONTEXPAND special flags set. But since there is no portable way to force
|
||||
// allocation of such memory from user space, and it is not safe to change the
|
||||
// flags of an already-created VMA from kernel space, we take advantage of the
|
||||
// fact that managed ranges have both special flags set at creation time (see
|
||||
// uvm_mmap).
|
||||
#define UVM_POPULATE_PAGEABLE_FLAG_ALLOW_MANAGED 0x00000001
|
||||
|
||||
// By default UVM_POPULATE_PAGEABLE returns an error if the destination vma
|
||||
// does not have read permission. This flag skips that check.
|
||||
#define UVM_POPULATE_PAGEABLE_FLAG_SKIP_PROT_CHECK 0x00000002
|
||||
|
||||
#define UVM_POPULATE_PAGEABLE_FLAGS_TEST_ALL (UVM_POPULATE_PAGEABLE_FLAG_ALLOW_MANAGED | \
|
||||
// By default UVM_POPULATE_PAGEABLE returns an error if the destination vma
|
||||
// is VM_IO or VM_PFNMAP. This flag skips that check.
|
||||
#define UVM_POPULATE_PAGEABLE_FLAG_ALLOW_SPECIAL 0x00000004
|
||||
|
||||
// These flags are used internally within the driver and are not allowed from
|
||||
// user space.
|
||||
#define UVM_POPULATE_PAGEABLE_FLAGS_INTERNAL UVM_POPULATE_PAGEABLE_FLAG_ALLOW_SPECIAL
|
||||
|
||||
// These flags are allowed from user space only when builtin tests are enabled.
|
||||
// Some of them may also be used internally within the driver in non-test use
|
||||
// cases.
|
||||
#define UVM_POPULATE_PAGEABLE_FLAGS_TEST (UVM_POPULATE_PAGEABLE_FLAG_ALLOW_MANAGED | \
|
||||
UVM_POPULATE_PAGEABLE_FLAG_SKIP_PROT_CHECK)
|
||||
|
||||
#define UVM_POPULATE_PAGEABLE_FLAGS_ALL UVM_POPULATE_PAGEABLE_FLAGS_TEST_ALL
|
||||
#define UVM_POPULATE_PAGEABLE_FLAGS_ALL (UVM_POPULATE_PAGEABLE_FLAGS_INTERNAL | \
|
||||
UVM_POPULATE_PAGEABLE_FLAGS_TEST)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -1142,7 +1138,6 @@ typedef struct
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_IS_8_SUPPORTED_PARAMS;
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -76,14 +76,16 @@
|
||||
#include <linux/ratelimit.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_PNV_NPU2_INIT_CONTEXT_PRESENT)
|
||||
#include <asm/powernv.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_LINUX_SCHED_TASK_STACK_H_PRESENT)
|
||||
#include <linux/sched/task_stack.h>
|
||||
#endif
|
||||
|
||||
#include "linux/bitmap.h"
|
||||
#include "linux/bitops.h"
|
||||
#include "linux/gfp.h"
|
||||
#include "linux/pagemap.h"
|
||||
#include "linux/types.h"
|
||||
|
||||
#if !defined(NV_SG_DMA_PAGE_ITER_PRESENT)
|
||||
#include <linux/scatterlist.h>
|
||||
#endif
|
||||
@@ -425,4 +427,8 @@ static inline pgprot_t uvm_pgprot_decrypted(pgprot_t prot)
|
||||
#define UVM_FOR_EACH_SGTABLE_DMA_PAGE_PRESENT() 1
|
||||
#endif
|
||||
|
||||
#ifndef NV_PAGE_PGMAP_PRESENT
|
||||
#define page_pgmap(page) (page)->pgmap
|
||||
#endif
|
||||
|
||||
#endif // _UVM_LINUX_H
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
|
||||
const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 37);
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 38);
|
||||
|
||||
switch (lock_order) {
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
|
||||
@@ -58,6 +58,7 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_PMA);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_ROOT_CHUNK);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHANNEL);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_WLC_CHANNEL);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST);
|
||||
@@ -352,15 +353,7 @@ bool __uvm_thread_check_all_unlocked(void)
|
||||
|
||||
NV_STATUS uvm_bit_locks_init(uvm_bit_locks_t *bit_locks, size_t count, uvm_lock_order_t lock_order)
|
||||
{
|
||||
// TODO: Bug 1772140: Notably bit locks currently do not work on memory
|
||||
// allocated through vmalloc() (including big allocations created with
|
||||
// uvm_kvmalloc()). The problem is the bit_waitqueue() helper used by the
|
||||
// kernel internally that uses virt_to_page().
|
||||
// To prevent us from using kmalloc() for a huge allocation, warn if the
|
||||
// allocation size gets bigger than what we are comfortable with for
|
||||
// kmalloc() in uvm_kvmalloc().
|
||||
size_t size = sizeof(unsigned long) * BITS_TO_LONGS(count);
|
||||
WARN_ON_ONCE(size > UVM_KMALLOC_THRESHOLD);
|
||||
|
||||
bit_locks->bits = kzalloc(size, NV_UVM_GFP_FLAGS);
|
||||
if (!bit_locks->bits)
|
||||
|
||||
@@ -432,6 +432,11 @@
|
||||
// Order: UVM_LOCK_ORDER_PMM_ROOT_CHUNK
|
||||
// Exclusive bitlock (mutex) per each root chunk internal to PMM.
|
||||
//
|
||||
// - Access counters clear operations
|
||||
// Order: UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS
|
||||
//
|
||||
// It protects the parent_gpu's access counters clear tracker.
|
||||
//
|
||||
// - Channel lock
|
||||
// Order: UVM_LOCK_ORDER_CHANNEL
|
||||
// Spinlock (uvm_spinlock_t) or exclusive lock (mutex)
|
||||
@@ -477,7 +482,7 @@
|
||||
//
|
||||
// CE semaphore payloads are encrypted, and require to take the CSL lock
|
||||
// (UVM_LOCK_ORDER_LEAF) to decrypt the payload.
|
||||
|
||||
//
|
||||
// - CSL Context
|
||||
// Order: UVM_LOCK_ORDER_CSL_CTX
|
||||
// When the Confidential Computing feature is enabled, encrypt/decrypt
|
||||
@@ -523,6 +528,7 @@ typedef enum
|
||||
UVM_LOCK_ORDER_PMM,
|
||||
UVM_LOCK_ORDER_PMM_PMA,
|
||||
UVM_LOCK_ORDER_PMM_ROOT_CHUNK,
|
||||
UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS,
|
||||
UVM_LOCK_ORDER_CHANNEL,
|
||||
UVM_LOCK_ORDER_WLC_CHANNEL,
|
||||
UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST,
|
||||
|
||||
@@ -947,13 +947,15 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_external_t *ext
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Check for the maximum page size for the mapping of vidmem allocations,
|
||||
// the vMMU segment size may limit the range of page sizes.
|
||||
biggest_mapping_page_size = uvm_mmu_biggest_page_size_up_to(&gpu_va_space->page_tables,
|
||||
mapping_gpu->mem_info.max_vidmem_page_size);
|
||||
if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
|
||||
(mapping_page_size > biggest_mapping_page_size))
|
||||
mapping_page_size = biggest_mapping_page_size;
|
||||
if (mapping_gpu->mem_info.size) {
|
||||
// Check for the maximum page size for the mapping of vidmem
|
||||
// allocations, the vMMU segment size may limit the range of page sizes.
|
||||
biggest_mapping_page_size = uvm_mmu_biggest_page_size_up_to(&gpu_va_space->page_tables,
|
||||
mapping_gpu->mem_info.max_vidmem_page_size);
|
||||
if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
|
||||
(mapping_page_size > biggest_mapping_page_size))
|
||||
mapping_page_size = biggest_mapping_page_size;
|
||||
}
|
||||
|
||||
mem_info.pageSize = mapping_page_size;
|
||||
|
||||
|
||||
@@ -61,8 +61,6 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = false;
|
||||
|
||||
parent_gpu->access_counters_supported = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = false;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = false;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user