Compare commits

...

7 Commits

Author SHA1 Message Date
Maneet Singh
fade1f7b20 575.64 2025-06-16 19:28:19 -07:00
Maneet Singh
30e15d79de 575.57.08 2025-05-29 10:58:21 -07:00
Andy Ritger
e00332b05f 575.51.03 2025-05-01 22:14:31 -07:00
Bernhard Stoeckner
4159579888 575.51.02 2025-04-17 19:35:59 +02:00
Bernhard Stoeckner
e8113f665d 570.133.20 2025-04-17 17:56:49 +02:00
Bernhard Stoeckner
c5e439fea4 570.133.07 2025-03-19 14:13:05 +01:00
Bernhard Stoeckner
25bef4626e 570.124.06 2025-03-03 19:08:20 +01:00
1190 changed files with 312505 additions and 275837 deletions

1563
README.md

File diff suppressed because it is too large Load Diff

View File

@@ -75,21 +75,14 @@ $(foreach _module, $(NV_KERNEL_MODULES), \
$(eval include $(src)/$(_module)/$(_module).Kbuild))
#
# Define CFLAGS that apply to all the NVIDIA kernel modules. EXTRA_CFLAGS
# is deprecated since 2.6.24 in favor of ccflags-y, but we need to support
# older kernels which do not have ccflags-y. Newer kernels append
# $(EXTRA_CFLAGS) to ccflags-y for compatibility.
#
EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.124.04\"
ccflags-y += -I$(src)/common/inc
ccflags-y += -I$(src)
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
ccflags-y += -DNV_VERSION_STRING=\"575.64\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
ccflags-y += -I$(SYSSRCHOST1X)
endif
# Some Android kernels prohibit driver use of filesystem functions like
@@ -99,57 +92,57 @@ endif
PLATFORM_IS_ANDROID ?= 0
ifeq ($(PLATFORM_IS_ANDROID),1)
EXTRA_CFLAGS += -DNV_FILESYSTEM_ACCESS_AVAILABLE=0
ccflags-y += -DNV_FILESYSTEM_ACCESS_AVAILABLE=0
else
EXTRA_CFLAGS += -DNV_FILESYSTEM_ACCESS_AVAILABLE=1
ccflags-y += -DNV_FILESYSTEM_ACCESS_AVAILABLE=1
endif
EXTRA_CFLAGS += -Wno-unused-function
ccflags-y += -Wno-unused-function
ifneq ($(NV_BUILD_TYPE),debug)
EXTRA_CFLAGS += -Wuninitialized
ccflags-y += -Wuninitialized
endif
EXTRA_CFLAGS += -fno-strict-aliasing
ccflags-y += -fno-strict-aliasing
ifeq ($(ARCH),arm64)
EXTRA_CFLAGS += -mstrict-align
ccflags-y += -mstrict-align
endif
ifeq ($(NV_BUILD_TYPE),debug)
EXTRA_CFLAGS += -g
ccflags-y += -g
endif
EXTRA_CFLAGS += -ffreestanding
ccflags-y += -ffreestanding
ifeq ($(ARCH),arm64)
EXTRA_CFLAGS += -mgeneral-regs-only -march=armv8-a
EXTRA_CFLAGS += $(call cc-option,-mno-outline-atomics,)
ccflags-y += -mgeneral-regs-only -march=armv8-a
ccflags-y += $(call cc-option,-mno-outline-atomics,)
endif
ifeq ($(ARCH),x86_64)
EXTRA_CFLAGS += -mno-red-zone -mcmodel=kernel
ccflags-y += -mno-red-zone -mcmodel=kernel
endif
ifeq ($(ARCH),powerpc)
EXTRA_CFLAGS += -mlittle-endian -mno-strict-align
ccflags-y += -mlittle-endian -mno-strict-align
endif
EXTRA_CFLAGS += -DNV_UVM_ENABLE
EXTRA_CFLAGS += $(call cc-option,-Werror=undef,)
EXTRA_CFLAGS += -DNV_SPECTRE_V2=$(NV_SPECTRE_V2)
EXTRA_CFLAGS += -DNV_KERNEL_INTERFACE_LAYER
ccflags-y += -DNV_UVM_ENABLE
ccflags-y += $(call cc-option,-Werror=undef,)
ccflags-y += -DNV_SPECTRE_V2=$(NV_SPECTRE_V2)
ccflags-y += -DNV_KERNEL_INTERFACE_LAYER
#
# Detect SGI UV systems and apply system-specific optimizations.
#
ifneq ($(wildcard /proc/sgi_uv),)
EXTRA_CFLAGS += -DNV_CONFIG_X86_UV
ccflags-y += -DNV_CONFIG_X86_UV
endif
ifdef VGX_FORCE_VFIO_PCI_CORE
EXTRA_CFLAGS += -DNV_VGPU_FORCE_VFIO_PCI_CORE
ccflags-y += -DNV_VGPU_FORCE_VFIO_PCI_CORE
endif
WARNINGS_AS_ERRORS ?=
@@ -183,7 +176,8 @@ NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \
NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(ccflags-y) -fno-pie
NV_CONFTEST_CFLAGS += $(filter -std=%,$(KBUILD_CFLAGS))
NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
NV_CONFTEST_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types,)

View File

@@ -71,6 +71,31 @@ else
CC ?= cc
LD ?= ld
OBJDUMP ?= objdump
AWK ?= awk
# Bake the following awk program in a string. The program is needed to add C++
# to the languages excluded from BTF generation.
#
# Also, unconditionally return success (0) from the awk program, rather than
# propagating pahole's return status (with 'exit system(pahole_cmd)'), to
# workaround an DW_TAG_rvalue_reference_type error in
# kernel/nvidia-modeset.ko.
#
# BEGIN {
# pahole_cmd = "pahole"
# for (i = 1; i < ARGC; i++) {
# if (ARGV[i] ~ /--lang_exclude=/) {
# pahole_cmd = pahole_cmd sprintf(" %s,c++", ARGV[i])
# } else {
# pahole_cmd = pahole_cmd sprintf(" %s", ARGV[i])
# }
# }
# system(pahole_cmd)
# }
PAHOLE_AWK_PROGRAM = BEGIN { pahole_cmd = \"pahole\"; for (i = 1; i < ARGC; i++) { if (ARGV[i] ~ /--lang_exclude=/) { pahole_cmd = pahole_cmd sprintf(\" %s,c++\", ARGV[i]); } else { pahole_cmd = pahole_cmd sprintf(\" %s\", ARGV[i]); } } system(pahole_cmd); }
# If scripts/pahole-flags.sh is not present in the kernel tree, add PAHOLE and
# PAHOLE_AWK_PROGRAM assignments to PAHOLE_VARIABLES; otherwise assign the
# empty string to PAHOLE_VARIABLES.
PAHOLE_VARIABLES=$(if $(wildcard $(KERNEL_SOURCES)/scripts/pahole-flags.sh),,"PAHOLE=$(AWK) '$(PAHOLE_AWK_PROGRAM)'")
ifndef ARCH
ARCH := $(shell uname -m | sed -e 's/i.86/i386/' \
@@ -86,7 +111,7 @@ else
ifneq ($(filter $(ARCH),i386 x86_64),)
KERNEL_ARCH = x86
else
ifeq ($(filter $(ARCH),arm64 powerpc),)
ifeq ($(filter $(ARCH),arm64 powerpc riscv),)
$(error Unsupported architecture $(ARCH))
endif
endif
@@ -112,7 +137,8 @@ else
.PHONY: modules module clean clean_conftest modules_install
modules clean modules_install:
@$(MAKE) "LD=$(LD)" "CC=$(CC)" "OBJDUMP=$(OBJDUMP)" $(KBUILD_PARAMS) $@
@$(MAKE) "LD=$(LD)" "CC=$(CC)" "OBJDUMP=$(OBJDUMP)" \
$(PAHOLE_VARIABLES) $(KBUILD_PARAMS) $@
@if [ "$@" = "modules" ]; then \
for module in $(NV_KERNEL_MODULES); do \
if [ -x split-object-file.sh ]; then \

View File

@@ -0,0 +1,35 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _OS_DCE_CLIENT_IPC_H_
#define _OS_DCE_CLIENT_IPC_H_
// RM IPC Client Types
#define DCE_CLIENT_RM_IPC_TYPE_SYNC 0x0
#define DCE_CLIENT_RM_IPC_TYPE_EVENT 0x1
#define DCE_CLIENT_RM_IPC_TYPE_MAX 0x2
void dceclientHandleAsyncRpcCallback(NvU32 handle, NvU32 interfaceType,
NvU32 msgLength, void *data,
void *usrCtx);
#endif

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2001-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2001-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -36,8 +36,7 @@
#include "nv-timer.h"
#include "nv-time.h"
#include "nv-chardev-numbers.h"
#define NV_KERNEL_NAME "Linux"
#include "nv-platform.h"
#ifndef AUTOCONF_INCLUDED
#if defined(NV_GENERATED_AUTOCONF_H_PRESENT)
@@ -239,7 +238,7 @@ NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32,
#undef NV_SET_PAGES_UC_PRESENT
#endif
#if !defined(NVCPU_AARCH64) && !defined(NVCPU_PPC64LE) && !defined(NVCPU_RISCV64)
#if !defined(NVCPU_AARCH64) && !defined(NVCPU_RISCV64)
#if !defined(NV_SET_MEMORY_UC_PRESENT) && !defined(NV_SET_PAGES_UC_PRESENT)
#error "This driver requires the ability to change memory types!"
#endif
@@ -345,8 +344,6 @@ extern int nv_pat_mode;
#define NV_PAGE_COUNT(page) \
((unsigned int)page_count(page))
#define NV_GET_PAGE_COUNT(page_ptr) \
(NV_PAGE_COUNT(NV_GET_PAGE_STRUCT(page_ptr->phys_addr)))
#define NV_GET_PAGE_FLAGS(page_ptr) \
(NV_GET_PAGE_STRUCT(page_ptr->phys_addr)->flags)
@@ -405,7 +402,7 @@ typedef enum
NV_MEMORY_TYPE_DEVICE_MMIO, /* All kinds of MMIO referred by NVRM e.g. BARs and MCFG of device */
} nv_memory_type_t;
#if defined(NVCPU_AARCH64) || defined(NVCPU_PPC64LE) || defined(NVCPU_RISCV64)
#if defined(NVCPU_AARCH64) || defined(NVCPU_RISCV64)
#define NV_ALLOW_WRITE_COMBINING(mt) 1
#elif defined(NVCPU_X86_64)
#if defined(NV_ENABLE_PAT_SUPPORT)
@@ -463,10 +460,7 @@ static inline void *nv_vmalloc(unsigned long size)
#else
void *ptr = __vmalloc(size, GFP_KERNEL);
#endif
if (ptr)
{
NV_MEMDBG_ADD(ptr, size);
}
NV_MEMDBG_ADD(ptr, size);
return ptr;
}
@@ -483,10 +477,7 @@ static inline void *nv_ioremap(NvU64 phys, NvU64 size)
#else
void *ptr = ioremap(phys, size);
#endif
if (ptr)
{
NV_MEMDBG_ADD(ptr, size);
}
NV_MEMDBG_ADD(ptr, size);
return ptr;
}
@@ -502,29 +493,12 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
ptr = ioremap_cache_shared(phys, size);
#elif defined(NV_IOREMAP_CACHE_PRESENT)
ptr = ioremap_cache(phys, size);
#elif defined(NVCPU_PPC64LE)
//
// ioremap_cache() has been only implemented correctly for ppc64le with
// commit f855b2f544d6 in April 2017 (kernel 4.12+). Internally, the kernel
// does provide a default implementation of ioremap_cache() that would be
// incorrect for our use (creating an uncached mapping) before the
// referenced commit, but that implementation is not exported and the
// NV_IOREMAP_CACHE_PRESENT conftest doesn't pick it up, and we end up in
// this #elif branch.
//
// At the same time, ppc64le have supported ioremap_prot() since May 2011
// (commit 40f1ce7fb7e8, kernel 3.0+) and that covers all kernels we
// support on power.
//
ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
#else
return nv_ioremap(phys, size);
#endif
if (ptr)
{
NV_MEMDBG_ADD(ptr, size);
}
NV_MEMDBG_ADD(ptr, size);
return ptr;
}
@@ -539,10 +513,8 @@ static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
return nv_ioremap_nocache(phys, size);
#endif
if (ptr)
{
NV_MEMDBG_ADD(ptr, size);
}
NV_MEMDBG_ADD(ptr, size);
return ptr;
}
@@ -562,22 +534,19 @@ static NvBool nv_numa_node_has_memory(int node_id)
#define NV_KMALLOC(ptr, size) \
{ \
(ptr) = kmalloc(size, NV_GFP_KERNEL); \
if (ptr) \
NV_MEMDBG_ADD(ptr, size); \
NV_MEMDBG_ADD(ptr, size); \
}
#define NV_KZALLOC(ptr, size) \
{ \
(ptr) = kzalloc(size, NV_GFP_KERNEL); \
if (ptr) \
NV_MEMDBG_ADD(ptr, size); \
NV_MEMDBG_ADD(ptr, size); \
}
#define NV_KMALLOC_ATOMIC(ptr, size) \
{ \
(ptr) = kmalloc(size, NV_GFP_ATOMIC); \
if (ptr) \
NV_MEMDBG_ADD(ptr, size); \
NV_MEMDBG_ADD(ptr, size); \
}
#if defined(__GFP_RETRY_MAYFAIL)
@@ -591,8 +560,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
#define NV_KMALLOC_NO_OOM(ptr, size) \
{ \
(ptr) = kmalloc(size, NV_GFP_NO_OOM); \
if (ptr) \
NV_MEMDBG_ADD(ptr, size); \
NV_MEMDBG_ADD(ptr, size); \
}
#define NV_KFREE(ptr, size) \
@@ -625,9 +593,9 @@ static inline pgprot_t nv_sme_clr(pgprot_t prot)
#endif // __sme_clr
}
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot)
{
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
pgprot_t prot = __pgprot(pgprot_val(vm_prot));
#if defined(pgprot_decrypted)
return pgprot_decrypted(prot);
@@ -648,41 +616,6 @@ static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
#endif
#endif
static inline NvUPtr nv_vmap(struct page **pages, NvU32 page_count,
NvBool cached, NvBool unencrypted)
{
void *ptr;
pgprot_t prot = PAGE_KERNEL;
#if defined(NVCPU_X86_64)
#if defined(PAGE_KERNEL_NOENC)
if (unencrypted)
{
prot = cached ? nv_adjust_pgprot(PAGE_KERNEL_NOENC, 0) :
nv_adjust_pgprot(NV_PAGE_KERNEL_NOCACHE_NOENC, 0);
}
else
#endif
{
prot = cached ? PAGE_KERNEL : PAGE_KERNEL_NOCACHE;
}
#elif defined(NVCPU_AARCH64)
prot = cached ? PAGE_KERNEL : NV_PGPROT_UNCACHED(PAGE_KERNEL);
#endif
/* All memory cached in PPC64LE; can't honor 'cached' input. */
ptr = vmap(pages, page_count, VM_MAP, prot);
if (ptr)
{
NV_MEMDBG_ADD(ptr, page_count * PAGE_SIZE);
}
return (NvUPtr)ptr;
}
static inline void nv_vunmap(NvUPtr vaddr, NvU32 page_count)
{
vunmap((void *)vaddr);
NV_MEMDBG_REMOVE((void *)vaddr, page_count * PAGE_SIZE);
}
#if defined(NV_GET_NUM_PHYSPAGES_PRESENT)
#define NV_NUM_PHYSPAGES get_num_physpages()
#else
@@ -707,6 +640,47 @@ static inline void nv_vunmap(NvUPtr vaddr, NvU32 page_count)
#define NV_NUM_CPUS() num_possible_cpus()
#define NV_HAVE_MEMORY_ENCRYPT_DECRYPT 0
#if defined(NVCPU_X86_64) && \
NV_IS_EXPORT_SYMBOL_GPL_set_memory_encrypted && \
NV_IS_EXPORT_SYMBOL_GPL_set_memory_decrypted
#undef NV_HAVE_MEMORY_ENCRYPT_DECRYPT
#define NV_HAVE_MEMORY_ENCRYPT_DECRYPT 1
#endif
static inline void nv_set_memory_decrypted_zeroed(NvBool unencrypted,
unsigned long virt_addr,
int num_native_pages,
size_t size)
{
if (virt_addr == 0)
return;
#if NV_HAVE_MEMORY_ENCRYPT_DECRYPT
if (unencrypted)
{
set_memory_decrypted(virt_addr, num_native_pages);
memset((void *)virt_addr, 0, size);
}
#endif
}
static inline void nv_set_memory_encrypted(NvBool unencrypted,
unsigned long virt_addr,
int num_native_pages)
{
if (virt_addr == 0)
return;
#if NV_HAVE_MEMORY_ENCRYPT_DECRYPT
if (unencrypted)
{
set_memory_encrypted(virt_addr, num_native_pages);
}
#endif
}
static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
{
#if defined(NV_PHYS_TO_DMA_PRESENT)
@@ -887,94 +861,42 @@ typedef void irqreturn_t;
(((addr) >> NV_RM_PAGE_SHIFT) == \
(((addr) + (size) - 1) >> NV_RM_PAGE_SHIFT)))
/*
* The kernel may have a workaround for this, by providing a method to isolate
* a single 4K page in a given mapping.
*/
#if (PAGE_SIZE > NV_RM_PAGE_SIZE) && defined(NVCPU_PPC64LE) && defined(NV_PAGE_4K_PFN)
#define NV_4K_PAGE_ISOLATION_PRESENT
#define NV_4K_PAGE_ISOLATION_MMAP_ADDR(addr) \
((NvP64)((void*)(((addr) >> NV_RM_PAGE_SHIFT) << PAGE_SHIFT)))
#define NV_4K_PAGE_ISOLATION_MMAP_LEN(size) PAGE_SIZE
#define NV_4K_PAGE_ISOLATION_ACCESS_START(addr) \
((NvP64)((void*)((addr) & ~NV_RM_PAGE_MASK)))
#define NV_4K_PAGE_ISOLATION_ACCESS_LEN(addr, size) \
((((addr) & NV_RM_PAGE_MASK) + size + NV_RM_PAGE_MASK) & \
~NV_RM_PAGE_MASK)
#define NV_PROT_4K_PAGE_ISOLATION NV_PAGE_4K_PFN
#endif
static inline int nv_remap_page_range(struct vm_area_struct *vma,
unsigned long virt_addr, NvU64 phys_addr, NvU64 size, pgprot_t prot)
{
int ret = -1;
#if defined(NV_4K_PAGE_ISOLATION_PRESENT) && defined(NV_PROT_4K_PAGE_ISOLATION)
if ((size == PAGE_SIZE) &&
((pgprot_val(prot) & NV_PROT_4K_PAGE_ISOLATION) != 0))
{
/*
* remap_4k_pfn() hardcodes the length to a single OS page, and checks
* whether applying the page isolation workaround will cause PTE
* corruption (in which case it will fail, and this is an unsupported
* configuration).
*/
#if defined(NV_HASH__REMAP_4K_PFN_PRESENT)
ret = hash__remap_4k_pfn(vma, virt_addr, (phys_addr >> PAGE_SHIFT), prot);
#else
ret = remap_4k_pfn(vma, virt_addr, (phys_addr >> PAGE_SHIFT), prot);
#endif
}
else
#endif
{
ret = remap_pfn_range(vma, virt_addr, (phys_addr >> PAGE_SHIFT), size,
return remap_pfn_range(vma, virt_addr, (phys_addr >> PAGE_SHIFT), size,
prot);
}
return ret;
}
static inline int nv_io_remap_page_range(struct vm_area_struct *vma,
NvU64 phys_addr, NvU64 size, NvU32 extra_prot, NvU64 start)
NvU64 phys_addr, NvU64 size, NvU64 start)
{
int ret = -1;
#if !defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL)
ret = nv_remap_page_range(vma, start, phys_addr, size,
nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
nv_adjust_pgprot(vma->vm_page_prot));
#else
ret = io_remap_pfn_range(vma, start, (phys_addr >> PAGE_SHIFT),
size, nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
size, nv_adjust_pgprot(vma->vm_page_prot));
#endif
return ret;
}
static inline vm_fault_t nv_insert_pfn(struct vm_area_struct *vma,
NvU64 virt_addr, NvU64 pfn, NvU32 extra_prot)
NvU64 virt_addr, NvU64 pfn)
{
/*
* vm_insert_pfn{,_prot} replaced with vmf_insert_pfn{,_prot} in Linux 4.20
*/
#if defined(NV_VMF_INSERT_PFN_PROT_PRESENT)
return vmf_insert_pfn_prot(vma, virt_addr, pfn,
__pgprot(pgprot_val(vma->vm_page_prot) | extra_prot));
__pgprot(pgprot_val(vma->vm_page_prot)));
#else
int ret = -EINVAL;
/*
* Only PPC64LE (NV_4K_PAGE_ISOLATION_PRESENT) requires extra_prot to be
* used when remapping.
*
* vm_insert_pfn_prot() was added in Linux 4.4, whereas POWER9 support
* was added in Linux 4.8.
*
* Rather than tampering with the vma to make use of extra_prot with
* vm_insert_pfn() on older kernels, for now, just fail in this case, as
* it's not expected to be used currently.
*/
#if defined(NV_VM_INSERT_PFN_PROT_PRESENT)
ret = vm_insert_pfn_prot(vma, virt_addr, pfn,
__pgprot(pgprot_val(vma->vm_page_prot) | extra_prot));
#elif !defined(NV_4K_PAGE_ISOLATION_PRESENT)
__pgprot(pgprot_val(vma->vm_page_prot)));
#else
ret = vm_insert_pfn(vma, virt_addr, pfn);
#endif
switch (ret)
@@ -1160,11 +1082,6 @@ static inline void nv_kmem_cache_free_stack(nvidia_stack_t *stack)
typedef struct nvidia_pte_s {
NvU64 phys_addr;
unsigned long virt_addr;
NvU64 dma_addr;
#ifdef CONFIG_XEN
unsigned int guest_pfn;
#endif
unsigned int page_count;
} nvidia_pte_t;
#if defined(CONFIG_DMA_SHARED_BUFFER)
@@ -1205,6 +1122,7 @@ typedef struct nv_alloc_s {
NvS32 node_id; /* Node id for memory allocation when node is set in flags */
void *import_priv;
struct sg_table *import_sgt;
dma_addr_t dma_handle; /* dma handle used by dma_alloc_coherent(), dma_free_coherent() */
} nv_alloc_t;
/**
@@ -1430,6 +1348,23 @@ struct os_wait_queue {
struct completion q;
};
/*!
* @brief Mapping between clock names and clock handles.
*
* TEGRA_DISP_WHICH_CLK_MAX: maximum number of clocks
* defined in below enum.
*
* arch/nvalloc/unix/include/nv.h
* enum TEGRASOC_WHICH_CLK_MAX;
*
*/
typedef struct nvsoc_clks_s {
struct {
struct clk *handles;
const char *clkName;
} clk[TEGRASOC_WHICH_CLK_MAX];
} nvsoc_clks_t;
/*
* To report error in msi/msix when unhandled count reaches a threshold
*/
@@ -1589,6 +1524,8 @@ typedef struct nv_linux_state_s {
nv_acpi_t* nv_acpi_object;
#endif
nvsoc_clks_t soc_clk_handles;
/* Lock serializing ISRs for different SOC vectors */
nv_spinlock_t soc_isr_lock;
void *soc_bh_mutex;
@@ -1788,12 +1725,10 @@ static inline struct kmem_cache *nv_kmem_cache_create(const char *name, unsigned
*/
static inline NV_STATUS nv_check_gpu_state(nv_state_t *nv)
{
#if !defined(NVCPU_PPC64LE)
if (NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv))
{
return NV_ERR_GPU_IS_LOST;
}
#endif
return NV_OK;
}

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2016-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -297,9 +297,21 @@ static inline struct rw_semaphore *nv_mmap_get_lock(struct mm_struct *mm)
#endif
}
#define NV_CAN_CALL_VMA_START_WRITE 1
#if !NV_CAN_CALL_VMA_START_WRITE
/*
* Commit 45ad9f5290dc updated vma_start_write() to call __vma_start_write().
*/
void nv_vma_start_write(struct vm_area_struct *);
#endif
static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
{
#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
#if !NV_CAN_CALL_VMA_START_WRITE
nv_vma_start_write(vma);
ACCESS_PRIVATE(vma, __vm_flags) |= flags;
#elif defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
vm_flags_set(vma, flags);
#else
vma->vm_flags |= flags;
@@ -308,7 +320,10 @@ static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
static inline void nv_vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags)
{
#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
#if !NV_CAN_CALL_VMA_START_WRITE
nv_vma_start_write(vma);
ACCESS_PRIVATE(vma, __vm_flags) &= ~flags;
#elif defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
vm_flags_clear(vma, flags);
#else
vma->vm_flags &= ~flags;

View File

@@ -26,8 +26,7 @@
#include "nv-linux.h"
#if (defined(CONFIG_X86_LOCAL_APIC) || defined(NVCPU_AARCH64) || \
defined(NVCPU_PPC64LE)) && \
#if (defined(CONFIG_X86_LOCAL_APIC) || defined(NVCPU_AARCH64)) && \
(defined(CONFIG_PCI_MSI) || defined(CONFIG_PCI_USE_VECTOR))
#define NV_LINUX_PCIE_MSI_SUPPORTED
#endif

View File

@@ -0,0 +1,36 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NV_PLATFORM_H
#define NV_PLATFORM_H
#include "nv-linux.h"
irqreturn_t nvidia_isr (int, void *);
irqreturn_t nvidia_isr_kthread_bh (int, void *);
#define NV_SUPPORTS_PLATFORM_DEVICE 0
#define NV_SUPPORTS_PLATFORM_DISPLAY_DEVICE 0
#endif

View File

@@ -41,7 +41,7 @@ void nv_procfs_remove_gpu (nv_linux_state_t *);
int nvidia_mmap (struct file *, struct vm_area_struct *);
int nvidia_mmap_helper (nv_state_t *, nv_linux_file_private_t *, nvidia_stack_t *, struct vm_area_struct *, void *);
int nv_encode_caching (pgprot_t *, NvU32, NvU32);
int nv_encode_caching (pgprot_t *, NvU32, nv_memory_type_t);
void nv_revoke_gpu_mappings_locked(nv_state_t *);
NvUPtr nv_vm_map_pages (struct page **, NvU32, NvBool, NvBool);

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2017-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -63,4 +63,13 @@ static inline void nv_timer_setup(struct nv_timer *nv_timer,
#endif
}
static inline void nv_timer_delete_sync(struct timer_list *timer)
{
#if !defined(NV_BSD) && NV_IS_EXPORT_SYMBOL_PRESENT_timer_delete_sync
timer_delete_sync(timer);
#else
del_timer_sync(timer);
#endif
}
#endif // __NV_TIMER_H__

View File

@@ -168,6 +168,15 @@ typedef enum _TEGRASOC_WHICH_CLK
TEGRASOC_WHICH_CLK_PLLA_DISP,
TEGRASOC_WHICH_CLK_PLLA_DISPHUB,
TEGRASOC_WHICH_CLK_PLLA,
TEGRASOC_WHICH_CLK_EMC,
TEGRASOC_WHICH_CLK_GPU_FIRST,
TEGRASOC_WHICH_CLK_GPU_SYS = TEGRASOC_WHICH_CLK_GPU_FIRST,
TEGRASOC_WHICH_CLK_GPU_NVD,
TEGRASOC_WHICH_CLK_GPU_UPROC,
TEGRASOC_WHICH_CLK_GPU_GPC0,
TEGRASOC_WHICH_CLK_GPU_GPC1,
TEGRASOC_WHICH_CLK_GPU_GPC2,
TEGRASOC_WHICH_CLK_GPU_LAST = TEGRASOC_WHICH_CLK_GPU_GPC2,
TEGRASOC_WHICH_CLK_MAX, // TEGRASOC_WHICH_CLK_MAX is defined for boundary checks only.
} TEGRASOC_WHICH_CLK;
@@ -283,7 +292,6 @@ typedef struct nv_usermap_access_params_s
MemoryArea memArea;
NvU64 access_start;
NvU64 access_size;
NvU64 remap_prot_extra;
NvBool contig;
NvU32 caching;
} nv_usermap_access_params_t;
@@ -299,7 +307,6 @@ typedef struct nv_alloc_mapping_context_s {
MemoryArea memArea;
NvU64 access_start;
NvU64 access_size;
NvU64 remap_prot_extra;
NvU32 prot;
NvBool valid;
NvU32 caching;
@@ -498,6 +505,9 @@ typedef struct nv_state_t
NvU32 dispIsoStreamId;
NvU32 dispNisoStreamId;
} iommus;
/* Console is managed by drm drivers or NVKMS */
NvBool client_managed_console;
} nv_state_t;
#define NVFP_TYPE_NONE 0x0
@@ -542,9 +552,9 @@ typedef struct UvmGpuNvlinkInfo_tag *nvgpuNvlinkInfo_t;
typedef struct UvmGpuEccInfo_tag *nvgpuEccInfo_t;
typedef struct UvmGpuFaultInfo_tag *nvgpuFaultInfo_t;
typedef struct UvmGpuAccessCntrInfo_tag *nvgpuAccessCntrInfo_t;
typedef struct UvmGpuAccessCntrConfig_tag *nvgpuAccessCntrConfig_t;
typedef struct UvmGpuInfo_tag nvgpuInfo_t;
typedef struct UvmGpuClientInfo_tag nvgpuClientInfo_t;
typedef struct UvmGpuAccessCntrConfig_tag nvgpuAccessCntrConfig_t;
typedef struct UvmGpuInfo_tag nvgpuInfo_t;
typedef struct UvmGpuClientInfo_tag nvgpuClientInfo_t;
typedef struct UvmPmaAllocationOptions_tag *nvgpuPmaAllocationOptions_t;
typedef struct UvmPmaStatistics_tag *nvgpuPmaStatistics_t;
typedef struct UvmGpuMemoryInfo_tag *nvgpuMemoryInfo_t;
@@ -564,24 +574,24 @@ typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemor
* flags
*/
#define NV_FLAG_OPEN 0x0001
#define NV_FLAG_EXCLUDE 0x0002
#define NV_FLAG_CONTROL 0x0004
// Unused 0x0008
#define NV_FLAG_SOC_DISPLAY 0x0010
#define NV_FLAG_USES_MSI 0x0020
#define NV_FLAG_USES_MSIX 0x0040
#define NV_FLAG_PASSTHRU 0x0080
#define NV_FLAG_SUSPENDED 0x0100
#define NV_FLAG_SOC_IGPU 0x0200
#define NV_FLAG_OPEN 0x0001
#define NV_FLAG_EXCLUDE 0x0002
#define NV_FLAG_CONTROL 0x0004
// Unused 0x0008
#define NV_FLAG_SOC_DISPLAY 0x0010
#define NV_FLAG_USES_MSI 0x0020
#define NV_FLAG_USES_MSIX 0x0040
#define NV_FLAG_PASSTHRU 0x0080
#define NV_FLAG_SUSPENDED 0x0100
#define NV_FLAG_SOC_IGPU 0x0200
/* To be set when an FLR needs to be triggered after device shut down. */
#define NV_FLAG_TRIGGER_FLR 0x0400
#define NV_FLAG_PERSISTENT_SW_STATE 0x0800
#define NV_FLAG_IN_RECOVERY 0x1000
// Unused 0x2000
#define NV_FLAG_UNBIND_LOCK 0x4000
#define NV_FLAG_TRIGGER_FLR 0x0400
#define NV_FLAG_PERSISTENT_SW_STATE 0x0800
#define NV_FLAG_IN_RECOVERY 0x1000
#define NV_FLAG_PCI_REMOVE_IN_PROGRESS 0x2000
#define NV_FLAG_UNBIND_LOCK 0x4000
/* To be set when GPU is not present on the bus, to help device teardown */
#define NV_FLAG_IN_SURPRISE_REMOVAL 0x8000
#define NV_FLAG_IN_SURPRISE_REMOVAL 0x8000
typedef enum
{
@@ -795,7 +805,7 @@ NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU64, Nv
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvU64, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
NV_STATUS NV_API_CALL nv_free_pages (nv_state_t *, NvU32, NvBool, NvU32, void *);
NV_STATUS NV_API_CALL nv_register_user_pages (nv_state_t *, NvU64, NvU64 *, void *, void **);
NV_STATUS NV_API_CALL nv_register_user_pages (nv_state_t *, NvU64, NvU64 *, void *, void **, NvBool);
void NV_API_CALL nv_unregister_user_pages (nv_state_t *, NvU64, void **, void **);
NV_STATUS NV_API_CALL nv_register_peer_io_mem (nv_state_t *, NvU64 *, NvU64, void **);
@@ -915,6 +925,15 @@ NV_STATUS NV_API_CALL nv_get_phys_pages (void *, void *, NvU32 *);
void NV_API_CALL nv_get_disp_smmu_stream_ids (nv_state_t *, NvU32 *, NvU32 *);
NV_STATUS NV_API_CALL nv_clk_get_handles (nv_state_t *);
void NV_API_CALL nv_clk_clear_handles (nv_state_t *);
NV_STATUS NV_API_CALL nv_enable_clk (nv_state_t *, TEGRASOC_WHICH_CLK);
void NV_API_CALL nv_disable_clk (nv_state_t *, TEGRASOC_WHICH_CLK);
NV_STATUS NV_API_CALL nv_get_curr_freq (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32 *);
NV_STATUS NV_API_CALL nv_get_max_freq (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32 *);
NV_STATUS NV_API_CALL nv_get_min_freq (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32 *);
NV_STATUS NV_API_CALL nv_set_freq (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32);
/*
* ---------------------------------------------------------------------------
*
@@ -1040,6 +1059,9 @@ void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
NvBool NV_API_CALL rm_is_altstack_in_use(void);
void NV_API_CALL rm_notify_gpu_addition(nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_notify_gpu_removal(nvidia_stack_t *, nv_state_t *);
/* vGPU VFIO specific functions */
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *,
NvU32 *, NvU32 *, NvU32);
@@ -1054,7 +1076,7 @@ NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *,
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *, NvU32, NvBool *);
NV_STATUS NV_API_CALL nv_gpu_unbind_event(nvidia_stack_t *, NvU32, NvBool *);
NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_access_params_t*);
NV_STATUS NV_API_CALL nv_check_usermap_access_params(nv_state_t*, const nv_usermap_access_params_t*);
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);
void NV_API_CALL nv_flush_coherent_cpu_cache_range(nv_state_t *nv, NvU64 cpu_virtual, NvU64 size);

View File

@@ -0,0 +1,120 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __NV_COMMON_UTILS_H__
#define __NV_COMMON_UTILS_H__
#include "nvtypes.h"
#include "nvmisc.h"
#if !defined(TRUE)
#define TRUE NV_TRUE
#endif
#if !defined(FALSE)
#define FALSE NV_FALSE
#endif
#define NV_IS_UNSIGNED(x) ((__typeof__(x))-1 > 0)
/* Get the length of a statically-sized array. */
#define ARRAY_LEN(_arr) (sizeof(_arr) / sizeof(_arr[0]))
#define NV_INVALID_HEAD 0xFFFFFFFF
#define NV_INVALID_CONNECTOR_PHYSICAL_INFORMATION (~0)
#if !defined(NV_MIN)
# define NV_MIN(a,b) (((a)<(b))?(a):(b))
#endif
#define NV_MIN3(a,b,c) NV_MIN(NV_MIN(a, b), c)
#define NV_MIN4(a,b,c,d) NV_MIN3(NV_MIN(a,b),c,d)
#if !defined(NV_MAX)
# define NV_MAX(a,b) (((a)>(b))?(a):(b))
#endif
#define NV_MAX3(a,b,c) NV_MAX(NV_MAX(a, b), c)
#define NV_MAX4(a,b,c,d) NV_MAX3(NV_MAX(a,b),c,d)
static inline int NV_LIMIT_VAL_TO_MIN_MAX(int val, int min, int max)
{
if (val < min) {
return min;
}
if (val > max) {
return max;
}
return val;
}
#define NV_ROUNDUP_DIV(x,y) ((x) / (y) + (((x) % (y)) ? 1 : 0))
/*
* Macros used for computing palette entries:
*
* NV_UNDER_REPLICATE(val, source_size, result_size) expands a value
* of source_size bits into a value of target_size bits by shifting
* the source value into the high bits and replicating the high bits
* of the value into the low bits of the result.
*
* PALETTE_DEPTH_SHIFT(val, w) maps a colormap entry for a component
* that has w bits to an appropriate entry in a LUT of 256 entries.
*/
static inline unsigned int NV_UNDER_REPLICATE(unsigned short val,
int source_size,
int result_size)
{
return (val << (result_size - source_size)) |
(val >> ((source_size << 1) - result_size));
}
static inline unsigned short PALETTE_DEPTH_SHIFT(unsigned short val, int depth)
{
return NV_UNDER_REPLICATE(val, depth, 8);
}
/*
* Use __builtin_ffs where it is supported, or provide an equivalent
* implementation for platforms like riscv where it is not.
*/
#if defined(__GNUC__) && !NVCPU_IS_RISCV64
static inline int nv_ffs(int x)
{
return __builtin_ffs(x);
}
#else
static inline int nv_ffs(int x)
{
if (x == 0)
return 0;
LOWESTBITIDX_32(x);
return 1 + x;
}
#endif
#endif /* __NV_COMMON_UTILS_H__ */

View File

@@ -0,0 +1,370 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2010-2014 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/*
* This header file defines the types NVDpyId and NVDpyIdList, as well
* as inline functions to manipulate these types. NVDpyId and
* NVDpyIdList should be treated as opaque by includers of this header
* file.
*/
#ifndef __NV_DPY_ID_H__
#define __NV_DPY_ID_H__
#include "nvtypes.h"
#include "nvmisc.h"
#include "nv_common_utils.h"
#include <nvlimits.h> /* NV_MAX_SUBDEVICES */
typedef struct {
NvU32 opaqueDpyId;
} NVDpyId;
typedef struct {
NvU32 opaqueDpyIdList;
} NVDpyIdList;
#define NV_DPY_ID_MAX_SUBDEVICES NV_MAX_SUBDEVICES
#define NV_DPY_ID_MAX_DPYS_IN_LIST 32
/*
* For use in combination with nvDpyIdToPrintFormat(); e.g.,
*
* printf("dpy id: " NV_DPY_ID_PRINT_FORMAT "\n",
* nvDpyIdToPrintFormat(dpyId));
*
* The includer should not make assumptions about the return type of
* nvDpyIdToPrintFormat().
*/
#define NV_DPY_ID_PRINT_FORMAT "0x%08x"
/* functions to return an invalid DpyId and empty DpyIdList */
static inline NVDpyId nvInvalidDpyId(void)
{
NVDpyId dpyId = { 0 };
return dpyId;
}
static inline NVDpyIdList nvEmptyDpyIdList(void)
{
NVDpyIdList dpyIdList = { 0 };
return dpyIdList;
}
static inline NVDpyIdList nvAllDpyIdList(void)
{
NVDpyIdList dpyIdList = { ~0U };
return dpyIdList;
}
static inline void
nvEmptyDpyIdListSubDeviceArray(NVDpyIdList dpyIdList[NV_DPY_ID_MAX_SUBDEVICES])
{
int dispIndex;
for (dispIndex = 0; dispIndex < NV_DPY_ID_MAX_SUBDEVICES; dispIndex++) {
dpyIdList[dispIndex] = nvEmptyDpyIdList();
}
}
/* set operations on DpyIds and DpyIdLists: Add, Subtract, Intersect, Xor */
static inline __attribute__ ((warn_unused_result))
NVDpyIdList nvAddDpyIdToDpyIdList(NVDpyId dpyId, NVDpyIdList dpyIdList)
{
NVDpyIdList tmpDpyIdList;
tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList |
dpyId.opaqueDpyId;
return tmpDpyIdList;
}
/* Passing an invalid display ID makes this function return an empty list. */
static inline __attribute__ ((warn_unused_result))
NVDpyIdList nvAddDpyIdToEmptyDpyIdList(NVDpyId dpyId)
{
NVDpyIdList tmpDpyIdList;
tmpDpyIdList.opaqueDpyIdList = dpyId.opaqueDpyId;
return tmpDpyIdList;
}
static inline __attribute__ ((warn_unused_result))
NVDpyIdList nvAddDpyIdListToDpyIdList(NVDpyIdList dpyIdListA,
NVDpyIdList dpyIdListB)
{
NVDpyIdList tmpDpyIdList;
tmpDpyIdList.opaqueDpyIdList = dpyIdListB.opaqueDpyIdList |
dpyIdListA.opaqueDpyIdList;
return tmpDpyIdList;
}
/* Returns: dpyIdList - dpyId */
static inline __attribute__ ((warn_unused_result))
NVDpyIdList nvDpyIdListMinusDpyId(NVDpyIdList dpyIdList, NVDpyId dpyId)
{
NVDpyIdList tmpDpyIdList;
tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList &
(~dpyId.opaqueDpyId);
return tmpDpyIdList;
}
/* Returns: dpyIdListA - dpyIdListB */
static inline __attribute__ ((warn_unused_result))
NVDpyIdList nvDpyIdListMinusDpyIdList(NVDpyIdList dpyIdListA,
NVDpyIdList dpyIdListB)
{
NVDpyIdList tmpDpyIdList;
tmpDpyIdList.opaqueDpyIdList = dpyIdListA.opaqueDpyIdList &
(~dpyIdListB.opaqueDpyIdList);
return tmpDpyIdList;
}
static inline __attribute__ ((warn_unused_result))
NVDpyIdList nvIntersectDpyIdAndDpyIdList(NVDpyId dpyId, NVDpyIdList dpyIdList)
{
NVDpyIdList tmpDpyIdList;
tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList &
dpyId.opaqueDpyId;
return tmpDpyIdList;
}
static inline __attribute__ ((warn_unused_result))
NVDpyIdList nvIntersectDpyIdListAndDpyIdList(NVDpyIdList dpyIdListA,
NVDpyIdList dpyIdListB)
{
NVDpyIdList tmpDpyIdList;
tmpDpyIdList.opaqueDpyIdList = dpyIdListA.opaqueDpyIdList &
dpyIdListB.opaqueDpyIdList;
return tmpDpyIdList;
}
static inline __attribute__ ((warn_unused_result))
NVDpyIdList nvXorDpyIdAndDpyIdList(NVDpyId dpyId, NVDpyIdList dpyIdList)
{
NVDpyIdList tmpDpyIdList;
tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList ^
dpyId.opaqueDpyId;
return tmpDpyIdList;
}
static inline __attribute__ ((warn_unused_result))
NVDpyIdList nvXorDpyIdListAndDpyIdList(NVDpyIdList dpyIdListA,
NVDpyIdList dpyIdListB)
{
NVDpyIdList tmpDpyIdList;
tmpDpyIdList.opaqueDpyIdList = dpyIdListA.opaqueDpyIdList ^
dpyIdListB.opaqueDpyIdList;
return tmpDpyIdList;
}
/* boolean checks */
static inline NvBool nvDpyIdIsInDpyIdList(NVDpyId dpyId,
NVDpyIdList dpyIdList)
{
return !!(dpyIdList.opaqueDpyIdList & dpyId.opaqueDpyId);
}
static inline NvBool nvDpyIdIsInvalid(NVDpyId dpyId)
{
return (dpyId.opaqueDpyId == 0);
}
static inline NvBool nvDpyIdListIsEmpty(NVDpyIdList dpyIdList)
{
return (dpyIdList.opaqueDpyIdList == 0);
}
static inline NvBool
nvDpyIdListSubDeviceArrayIsEmpty(NVDpyIdList
dpyIdList[NV_DPY_ID_MAX_SUBDEVICES])
{
int dispIndex;
for (dispIndex = 0; dispIndex < NV_DPY_ID_MAX_SUBDEVICES; dispIndex++) {
if (!nvDpyIdListIsEmpty(dpyIdList[dispIndex])) {
return NV_FALSE;
}
}
return NV_TRUE;
}
static inline NvBool nvDpyIdsAreEqual(NVDpyId dpyIdA, NVDpyId dpyIdB)
{
return (dpyIdA.opaqueDpyId == dpyIdB.opaqueDpyId);
}
static inline NvBool nvDpyIdListsAreEqual(NVDpyIdList dpyIdListA,
NVDpyIdList dpyIdListB)
{
return (dpyIdListA.opaqueDpyIdList == dpyIdListB.opaqueDpyIdList);
}
static inline NvBool nvDpyIdListIsASubSetofDpyIdList(NVDpyIdList dpyIdListA,
NVDpyIdList dpyIdListB)
{
NVDpyIdList intersectedDpyIdList =
nvIntersectDpyIdListAndDpyIdList(dpyIdListA, dpyIdListB);
return nvDpyIdListsAreEqual(intersectedDpyIdList, dpyIdListA);
}
/*
* retrieve the individual dpyIds from dpyIdList; if dpyId is invalid,
* start at the beginning of the list; otherwise, start at the dpyId
* after the specified dpyId
*/
static inline __attribute__ ((warn_unused_result))
NVDpyId nvNextDpyIdInDpyIdListUnsorted(NVDpyId dpyId, NVDpyIdList dpyIdList)
{
if (nvDpyIdIsInvalid(dpyId)) {
dpyId.opaqueDpyId = 1;
} else {
dpyId.opaqueDpyId <<= 1;
}
while (dpyId.opaqueDpyId) {
if (nvDpyIdIsInDpyIdList(dpyId, dpyIdList)) {
return dpyId;
}
dpyId.opaqueDpyId <<= 1;
}
/* no dpyIds left in dpyIdlist; return the invalid dpyId */
return nvInvalidDpyId();
}
#define FOR_ALL_DPY_IDS(_dpyId, _dpyIdList) \
for ((_dpyId) = nvNextDpyIdInDpyIdListUnsorted(nvInvalidDpyId(), \
(_dpyIdList)); \
!nvDpyIdIsInvalid(_dpyId); \
(_dpyId) = nvNextDpyIdInDpyIdListUnsorted((_dpyId), \
(_dpyIdList)))
/* report how many dpyIds are in the dpyIdList */
static inline int nvCountDpyIdsInDpyIdList(NVDpyIdList dpyIdList)
{
return nvPopCount32(dpyIdList.opaqueDpyIdList);
}
static inline int
nvCountDpyIdsInDpyIdListSubDeviceArray(NVDpyIdList
dpyIdList[NV_DPY_ID_MAX_SUBDEVICES])
{
int dispIndex, n = 0;
for (dispIndex = 0; dispIndex < NV_DPY_ID_MAX_SUBDEVICES; dispIndex++) {
n += nvCountDpyIdsInDpyIdList(dpyIdList[dispIndex]);
}
return n;
}
/* convert between dpyId/dpyIdList and NV-CONTROL values */
static inline int nvDpyIdToNvControlVal(NVDpyId dpyId)
{
return (int) dpyId.opaqueDpyId;
}
static inline int nvDpyIdListToNvControlVal(NVDpyIdList dpyIdList)
{
return (int) dpyIdList.opaqueDpyIdList;
}
static inline NVDpyId nvNvControlValToDpyId(int val)
{
NVDpyId dpyId;
dpyId.opaqueDpyId = (val == 0) ? 0 : 1 << (nv_ffs(val)-1);
return dpyId;
}
static inline NVDpyIdList nvNvControlValToDpyIdList(int val)
{
NVDpyIdList dpyIdList;
dpyIdList.opaqueDpyIdList = val;
return dpyIdList;
}
/* convert between dpyId and NvU32 */
static inline NVDpyId nvNvU32ToDpyId(NvU32 val)
{
NVDpyId dpyId;
dpyId.opaqueDpyId = (val == 0) ? 0 : 1 << (nv_ffs(val)-1);
return dpyId;
}
static inline NVDpyIdList nvNvU32ToDpyIdList(NvU32 val)
{
NVDpyIdList dpyIdList;
dpyIdList.opaqueDpyIdList = val;
return dpyIdList;
}
static inline NvU32 nvDpyIdToNvU32(NVDpyId dpyId)
{
return dpyId.opaqueDpyId;
}
static inline NvU32 nvDpyIdListToNvU32(NVDpyIdList dpyIdList)
{
return dpyIdList.opaqueDpyIdList;
}
/* Return the bit position of dpyId: a number in the range [0..31]. */
static inline NvU32 nvDpyIdToIndex(NVDpyId dpyId)
{
return nv_ffs(dpyId.opaqueDpyId) - 1;
}
/* Return a display ID that is not in the list passed in. */
static inline NVDpyId nvNewDpyId(NVDpyIdList excludeList)
{
NVDpyId dpyId;
if (~excludeList.opaqueDpyIdList == 0) {
return nvInvalidDpyId();
}
dpyId.opaqueDpyId =
1U << (nv_ffs(~excludeList.opaqueDpyIdList) - 1);
return dpyId;
}
/* See comment for NV_DPY_ID_PRINT_FORMAT. */
static inline NvU32 nvDpyIdToPrintFormat(NVDpyId dpyId)
{
return nvDpyIdToNvU32(dpyId);
}
/* Prevent usage of opaque values. */
#define opaqueDpyId __ERROR_ACCESS_ME_VIA_NV_DPY_ID_H
#define opaqueDpyIdList __ERROR_ACCESS_ME_VIA_NV_DPY_ID_H
#endif /* __NV_DPY_ID_H__ */

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -20,8 +20,8 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __NV_SMG_H__
#define __NV_SMG_H__
#ifndef __NV_MIG_TYPES_H__
#define __NV_MIG_TYPES_H__
#ifdef __cplusplus
extern "C" {
@@ -29,25 +29,12 @@ extern "C" {
#include "nvtypes.h"
/*
* The simplest required abstraction for accessing RM independent of the
* calling component which may be a kernel module or userspace driver.
*/
typedef NvU32 (*NVSubdevSMGRMControl) (void *ctx, NvU32 object, NvU32 cmd, void *params, NvU32 paramsSize);
typedef NvU32 (*NVSubdevSMGRMAlloc) (void *ctx, NvU32 parent, NvU32 object, NvU32 cls, void *allocParams);
typedef NvU32 (*NVSubdevSMGRMFree) (void *ctx, NvU32 parent, NvU32 object);
typedef NvU32 MIGDeviceId;
NvBool NVSubdevSMGSetPartition(void *ctx,
NvU32 subdevHandle,
const char *computeInstUuid,
NvU32 gpuInstSubscriptionHdl,
NvU32 computeInstSubscriptionHdl,
NVSubdevSMGRMControl rmControl,
NVSubdevSMGRMAlloc rmAlloc,
NVSubdevSMGRMFree rmFree);
#define NO_MIG_DEVICE 0L
#ifdef __cplusplus
}
#endif
#endif /* __NV_SMG_H__ */
#endif /* __NV_MIG_TYPES_H__ */

View File

@@ -660,14 +660,20 @@ NV_STATUS nvUvmInterfaceServiceDeviceInterruptsRM(uvmGpuDeviceHandle device);
RM will propagate the update to all channels using the provided VA space.
All channels must be idle when this call is made.
If the pageDirectory is in system memory then a CPU physical address must be
provided. RM will establish and manage the DMA mapping for the
pageDirectory.
Arguments:
vaSpace[IN} - VASpace Object
physAddress[IN] - Physical address of new page directory
physAddress[IN] - Physical address of new page directory. If
!bVidMemAperture this is a CPU physical address.
numEntries[IN] - Number of entries including previous PDE which will be copied
bVidMemAperture[IN] - If set pageDirectory will reside in VidMem aperture else sysmem
pasid[IN] - PASID (Process Address Space IDentifier) of the process
corresponding to the VA space. Ignored unless the VA space
object has ATS enabled.
dmaAddress[OUT] - DMA mapping created for physAddress.
Error codes:
NV_ERR_GENERIC
@@ -675,7 +681,8 @@ NV_STATUS nvUvmInterfaceServiceDeviceInterruptsRM(uvmGpuDeviceHandle device);
*/
NV_STATUS nvUvmInterfaceSetPageDirectory(uvmGpuAddressSpaceHandle vaSpace,
NvU64 physAddress, unsigned numEntries,
NvBool bVidMemAperture, NvU32 pasid);
NvBool bVidMemAperture, NvU32 pasid,
NvU64 *dmaAddress);
/*******************************************************************************
nvUvmInterfaceUnsetPageDirectory
@@ -1056,7 +1063,7 @@ NV_STATUS nvUvmInterfaceDestroyAccessCntrInfo(uvmGpuDeviceHandle device,
*/
NV_STATUS nvUvmInterfaceEnableAccessCntr(uvmGpuDeviceHandle device,
UvmGpuAccessCntrInfo *pAccessCntrInfo,
UvmGpuAccessCntrConfig *pAccessCntrConfig);
const UvmGpuAccessCntrConfig *pAccessCntrConfig);
/*******************************************************************************
nvUvmInterfaceDisableAccessCntr
@@ -1862,5 +1869,4 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
NV_STATUS nvUvmInterfaceCslLogEncryption(UvmCslContext *uvmCslContext,
UvmCslOperation operation,
NvU32 bufferSize);
#endif // _NV_UVM_INTERFACE_H_

View File

@@ -268,6 +268,7 @@ typedef struct UvmGpuChannelInfo_tag
// The errorNotifier is filled out when the channel hits an RC error.
NvNotification *errorNotifier;
NvNotification *keyRotationNotifier;
NvU32 hwRunlistId;
@@ -297,6 +298,7 @@ typedef struct UvmGpuChannelInfo_tag
NvU64 gpFifoGpuVa;
NvU64 gpPutGpuVa;
NvU64 gpGetGpuVa;
// GPU VA of work submission offset is needed in Confidential Computing
// so CE channels can ring doorbell of other channels as required for
// WLC/LCIC work submission
@@ -374,6 +376,9 @@ typedef struct
// True if the CE can be used for P2P transactions
NvBool p2p:1;
// True if the CE supports encryption
NvBool secure:1;
// Mask of physical CEs assigned to this LCE
//
// The value returned by RM for this field may change when a GPU is
@@ -1007,17 +1012,17 @@ typedef struct UvmGpuFaultInfo_tag
NvU32 replayableFaultMask;
// Fault buffer CPU mapping
void* bufferAddress;
//
// When Confidential Computing is disabled, the mapping points to the
// actual HW fault buffer.
//
// When Confidential Computing is enabled, the mapping points to a
// copy of the HW fault buffer. This "shadow buffer" is maintained
// by GSP-RM.
void* bufferAddress;
// Size, in bytes, of the fault buffer pointed by bufferAddress.
NvU32 bufferSize;
// Mapping pointing to the start of the fault buffer metadata containing
// a 16Byte authentication tag and a valid byte. Always NULL when
// Confidential Computing is disabled.
@@ -1103,24 +1108,9 @@ typedef enum
UVM_ACCESS_COUNTER_GRANULARITY_16G = 4,
} UVM_ACCESS_COUNTER_GRANULARITY;
typedef enum
{
UVM_ACCESS_COUNTER_USE_LIMIT_NONE = 1,
UVM_ACCESS_COUNTER_USE_LIMIT_QTR = 2,
UVM_ACCESS_COUNTER_USE_LIMIT_HALF = 3,
UVM_ACCESS_COUNTER_USE_LIMIT_FULL = 4,
} UVM_ACCESS_COUNTER_USE_LIMIT;
typedef struct UvmGpuAccessCntrConfig_tag
{
NvU32 mimcGranularity;
NvU32 momcGranularity;
NvU32 mimcUseLimit;
NvU32 momcUseLimit;
NvU32 granularity;
NvU32 threshold;
} UvmGpuAccessCntrConfig;

View File

@@ -0,0 +1,37 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _NV_I2C_H_
#define _NV_I2C_H_
#define NV_I2C_MSG_WR 0x0000
#define NV_I2C_MSG_RD 0x0001
typedef struct nv_i2c_msg_s
{
NvU16 addr;
NvU16 flags;
NvU16 len;
NvU8* buf;
} nv_i2c_msg_t;
#endif

View File

@@ -0,0 +1,96 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/******************************************************************************\
* *
* Description: *
* Accommodates sharing of IMP-related structures between kernel interface *
* files and core RM. *
* *
\******************************************************************************/
#pragma once
#include <nvtypes.h>
#if defined(_MSC_VER)
#pragma warning(disable:4324)
#endif
//
// This file was generated with FINN, an NVIDIA coding tool.
// Source file: nvimpshared.finn
//
//
// There are only a small number of discrete dramclk frequencies available on
// the system. This structure contains IMP-relevant information associated
// with a specific dramclk frequency.
//
typedef struct DRAM_CLK_INSTANCE {
NvU32 dram_clk_freq_khz;
NvU32 mchub_clk_khz;
NvU32 mc_clk_khz;
NvU32 max_iso_bw_kbps;
//
// switch_latency_ns is the maximum time required to switch the dramclk
// frequency to the frequency specified in dram_clk_freq_khz.
//
NvU32 switch_latency_ns;
} DRAM_CLK_INSTANCE;
//
// This table is used to collect information from other modules that is needed
// for RM IMP calculations. (Used on Tegra only.)
//
typedef struct TEGRA_IMP_IMPORT_DATA {
//
// max_iso_bw_kbps stores the maximum possible ISO bandwidth available to
// display, assuming display is the only active ISO client. (Note that ISO
// bandwidth will typically be allocated to multiple clients, so display
// will generally not have access to the maximum possible bandwidth.)
//
NvU32 max_iso_bw_kbps;
// On Orin, each dram channel is 16 bits wide.
NvU32 num_dram_channels;
//
// dram_clk_instance stores entries for all possible dramclk frequencies,
// sorted by dramclk frequency in increasing order.
//
// "24" is expected to be larger than the actual number of required entries
// (which is provided by a BPMP API), but it can be increased if necessary.
//
// num_dram_clk_entries is filled in with the actual number of distinct
// dramclk entries.
//
NvU32 num_dram_clk_entries;
DRAM_CLK_INSTANCE dram_clk_instance[24];
} TEGRA_IMP_IMPORT_DATA;

View File

@@ -640,22 +640,28 @@ enum NvKmsInputColorRange {
* If DEFAULT is provided, driver will assume full range for RGB formats
* and limited range for YUV formats.
*/
NVKMS_INPUT_COLORRANGE_DEFAULT = 0,
NVKMS_INPUT_COLOR_RANGE_DEFAULT = 0,
NVKMS_INPUT_COLORRANGE_LIMITED = 1,
NVKMS_INPUT_COLOR_RANGE_LIMITED = 1,
NVKMS_INPUT_COLORRANGE_FULL = 2,
NVKMS_INPUT_COLOR_RANGE_FULL = 2,
};
enum NvKmsInputColorSpace {
/* Unknown colorspace; no de-gamma will be applied */
NVKMS_INPUT_COLORSPACE_NONE = 0,
/* Unknown colorspace */
NVKMS_INPUT_COLOR_SPACE_NONE = 0,
/* Linear, Rec.709 [-0.5, 7.5) */
NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR = 1,
NVKMS_INPUT_COLOR_SPACE_BT601 = 1,
NVKMS_INPUT_COLOR_SPACE_BT709 = 2,
NVKMS_INPUT_COLOR_SPACE_BT2020 = 3,
NVKMS_INPUT_COLOR_SPACE_BT2100 = NVKMS_INPUT_COLOR_SPACE_BT2020,
/* PQ, Rec.2020 unity */
NVKMS_INPUT_COLORSPACE_BT2100_PQ = 2,
NVKMS_INPUT_COLOR_SPACE_SCRGB = 4
};
enum NvKmsInputTf {
NVKMS_INPUT_TF_LINEAR = 0,
NVKMS_INPUT_TF_PQ = 1
};
enum NvKmsOutputColorimetry {

View File

@@ -24,8 +24,10 @@
#if !defined(__NVKMS_KAPI_H__)
#include "nvtypes.h"
#include "nv_mig_types.h"
#include "nv-gpu-info.h"
#include "nv_dpy_id.h"
#include "nvkms-api-types.h"
#include "nvkms-format.h"
@@ -173,12 +175,18 @@ struct NvKmsKapiDeviceResourcesInfo {
NvBool supportsSyncpts;
NvBool requiresVrrSemaphores;
NvBool supportsInputColorRange;
NvBool supportsInputColorSpace;
} caps;
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
NvBool supportsICtCp[NVKMS_KAPI_LAYER_MAX];
struct NvKmsKapiLutCaps lutCaps;
NvU64 vtFbBaseAddress;
NvU64 vtFbSize;
};
#define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
@@ -204,6 +212,7 @@ struct NvKmsKapiConnectorInfo {
NvU32 numIncompatibleConnectors;
NvKmsKapiConnector incompatibleConnectorHandles[NVKMS_KAPI_MAX_CONNECTORS];
NVDpyIdList dynamicDpyIdList;
};
struct NvKmsKapiStaticDisplayInfo {
@@ -222,6 +231,8 @@ struct NvKmsKapiStaticDisplayInfo {
NvKmsKapiDisplay possibleCloneHandles[NVKMS_KAPI_MAX_CLONE_DISPLAYS];
NvU32 headMask;
NvBool isDpMST;
};
struct NvKmsKapiSyncParams {
@@ -260,7 +271,8 @@ struct NvKmsKapiLayerConfig {
NvBool enabled;
} hdrMetadata;
enum NvKmsOutputTf tf;
enum NvKmsInputTf inputTf;
enum NvKmsOutputTf outputTf;
NvU8 minPresentInterval;
NvBool tearing;
@@ -272,6 +284,7 @@ struct NvKmsKapiLayerConfig {
NvU16 dstWidth, dstHeight;
enum NvKmsInputColorSpace inputColorSpace;
enum NvKmsInputColorRange inputColorRange;
struct {
NvBool enabled;
@@ -315,7 +328,10 @@ struct NvKmsKapiLayerRequestedConfig {
NvBool dstXYChanged : 1;
NvBool dstWHChanged : 1;
NvBool cscChanged : 1;
NvBool tfChanged : 1;
NvBool inputTfChanged : 1;
NvBool outputTfChanged : 1;
NvBool inputColorSpaceChanged : 1;
NvBool inputColorRangeChanged : 1;
NvBool hdrMetadataChanged : 1;
NvBool matrixOverridesChanged : 1;
NvBool ilutChanged : 1;
@@ -481,6 +497,8 @@ struct NvKmsKapiEvent {
struct NvKmsKapiAllocateDeviceParams {
/* [IN] GPU ID obtained from enumerateGpus() */
NvU32 gpuId;
/* [IN] MIG device if requested */
MIGDeviceId migDevice;
/* [IN] Private data of device allocator */
void *privateData;
@@ -563,6 +581,11 @@ typedef enum NvKmsKapiRegisterWaiterResultRec {
typedef void NvKmsKapiSuspendResumeCallbackFunc(NvBool suspend);
struct NvKmsKapiGpuInfo {
nv_gpu_info_t gpuInfo;
MIGDeviceId migDevice;
};
struct NvKmsKapiFunctionsTable {
/*!
@@ -586,7 +609,7 @@ struct NvKmsKapiFunctionsTable {
*
* \return Count of enumerated gpus.
*/
NvU32 (*enumerateGpus)(nv_gpu_info_t *gpuInfo);
NvU32 (*enumerateGpus)(struct NvKmsKapiGpuInfo *kapiGpuInfo);
/*!
* Allocate an NVK device using which you can query/allocate resources on
@@ -1559,6 +1582,26 @@ struct NvKmsKapiFunctionsTable {
NvS32 index
);
/*!
* Check or wait on a head's LUT notifier.
*
* \param [in] device A device allocated using allocateDevice().
*
* \param [in] head The head to check for LUT completion.
*
* \param [in] waitForCompletion If true, wait for the notifier in NvKms
* before returning.
*
* \param [out] complete Returns whether the notifier has completed.
*/
NvBool
(*checkLutNotifier)
(
struct NvKmsKapiDevice *device,
NvU32 head,
NvBool waitForCompletion
);
/*
* Notify NVKMS that the system's framebuffer console has been disabled and
* the reserved allocation for the old framebuffer console can be unmapped.

View File

@@ -701,11 +701,6 @@ nvPrevPow2_U64(const NvU64 x )
} \
}
//
// Bug 4851259: Newly added functions must be hidden from certain HS-signed
// ucode compilers to avoid signature mismatch.
//
#ifndef NVDEC_1_0
/*!
* Returns the position of nth set bit in the given mask.
*
@@ -735,8 +730,6 @@ nvGetNthSetBitIndex32(NvU32 mask, NvU32 n)
return -1;
}
#endif // NVDEC_1_0
//
// Size to use when declaring variable-sized arrays
//
@@ -780,12 +773,15 @@ nvGetNthSetBitIndex32(NvU32 mask, NvU32 n)
// Returns the offset (in bytes) of 'member' in struct 'type'.
#ifndef NV_OFFSETOF
#if defined(__GNUC__) && (__GNUC__ > 3)
#define NV_OFFSETOF(type, member) ((NvU32)__builtin_offsetof(type, member))
#define NV_OFFSETOF(type, member) ((NvUPtr) __builtin_offsetof(type, member))
#else
#define NV_OFFSETOF(type, member) ((NvU32)(NvU64)&(((type *)0)->member)) // shouldn't we use PtrToUlong? But will need to include windows header.
#define NV_OFFSETOF(type, member) ((NvUPtr) &(((type *)0)->member))
#endif
#endif
// Given a pointer and the member it is of the parent struct, return a pointer to the parent struct
#define NV_CONTAINEROF(ptr, type, member) ((type *) (((NvUPtr) ptr) - NV_OFFSETOF(type, member)))
//
// Performs a rounded division of b into a (unsigned). For SIGNED version of
// NV_ROUNDED_DIV() macro check the comments in bug 769777.

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2014-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -159,6 +159,11 @@ NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_NOT_READY, 0x00000081, "Nvlink Fabri
NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_FAILURE, 0x00000082, "Nvlink Fabric Probe failed")
NV_STATUS_CODE(NV_ERR_GPU_MEMORY_ONLINING_FAILURE, 0x00000083, "GPU Memory Onlining failed")
NV_STATUS_CODE(NV_ERR_REDUCTION_MANAGER_NOT_AVAILABLE, 0x00000084, "Reduction Manager is not available")
NV_STATUS_CODE(NV_ERR_THRESHOLD_CROSSED, 0x00000085, "A fatal threshold has been crossed")
NV_STATUS_CODE(NV_ERR_RESOURCE_RETIREMENT_ERROR, 0x00000086, "An error occurred while trying to retire a resource")
NV_STATUS_CODE(NV_ERR_FABRIC_STATE_OUT_OF_SYNC, 0x00000087, "NVLink fabric state cached by the driver is out of sync")
NV_STATUS_CODE(NV_ERR_BUFFER_FULL, 0x00000088, "Buffer is full")
NV_STATUS_CODE(NV_ERR_BUFFER_EMPTY, 0x00000089, "Buffer is empty")
// Warnings:
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
@@ -169,5 +174,6 @@ NV_STATUS_CODE(NV_WARN_MORE_PROCESSING_REQUIRED, 0x00010005, "WARNING More
NV_STATUS_CODE(NV_WARN_NOTHING_TO_DO, 0x00010006, "WARNING Nothing to do")
NV_STATUS_CODE(NV_WARN_NULL_OBJECT, 0x00010007, "WARNING NULL object found")
NV_STATUS_CODE(NV_WARN_OUT_OF_RANGE, 0x00010008, "WARNING value out of range")
NV_STATUS_CODE(NV_WARN_THRESHOLD_CROSSED, 0x00010009, "WARNING Threshold has been crossed")
#endif /* SDK_NVSTATUSCODES_H */

View File

@@ -229,6 +229,7 @@ extern NvU64 os_page_mask;
extern NvU8 os_page_shift;
extern NvBool os_cc_enabled;
extern NvBool os_cc_sev_snp_enabled;
extern NvBool os_cc_sme_enabled;
extern NvBool os_cc_snp_vtom_enabled;
extern NvBool os_cc_tdx_enabled;
extern NvBool os_dma_buf_enabled;

View File

@@ -0,0 +1,387 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _OS_DSI_PANEL_PARAMS_H_
#define _OS_DSI_PANEL_PARAMS_H_
#define DSI_GENERIC_LONG_WRITE 0x29
#define DSI_DCS_LONG_WRITE 0x39
#define DSI_GENERIC_SHORT_WRITE_1_PARAMS 0x13
#define DSI_GENERIC_SHORT_WRITE_2_PARAMS 0x23
#define DSI_DCS_WRITE_0_PARAM 0x05
#define DSI_DCS_WRITE_1_PARAM 0x15
#define DSI_DCS_READ_PARAM 0x06
#define DSI_DCS_COMPRESSION_MODE 0x07
#define DSI_DCS_PPS_LONG_WRITE 0x0A
#define DSI_DCS_SET_ADDR_MODE 0x36
#define DSI_DCS_EXIT_SLEEP_MODE 0x11
#define DSI_DCS_ENTER_SLEEP_MODE 0x10
#define DSI_DCS_SET_DISPLAY_ON 0x29
#define DSI_DCS_SET_DISPLAY_OFF 0x28
#define DSI_DCS_SET_TEARING_EFFECT_OFF 0x34
#define DSI_DCS_SET_TEARING_EFFECT_ON 0x35
#define DSI_DCS_NO_OP 0x0
#define DSI_NULL_PKT_NO_DATA 0x9
#define DSI_BLANKING_PKT_NO_DATA 0x19
#define DSI_DCS_SET_COMPRESSION_METHOD 0xC0
/* DCS commands for command mode */
#define DSI_ENTER_PARTIAL_MODE 0x12
#define DSI_SET_PIXEL_FORMAT 0x3A
#define DSI_AREA_COLOR_MODE 0x4C
#define DSI_SET_PARTIAL_AREA 0x30
#define DSI_SET_PAGE_ADDRESS 0x2B
#define DSI_SET_ADDRESS_MODE 0x36
#define DSI_SET_COLUMN_ADDRESS 0x2A
#define DSI_WRITE_MEMORY_START 0x2C
#define DSI_WRITE_MEMORY_CONTINUE 0x3C
#define PKT_ID0(id) ((((id) & 0x3f) << 3) | \
(((DSI_ENABLE) & 0x1) << 9))
#define PKT_LEN0(len) (((len) & 0x7) << 0)
#define PKT_ID1(id) ((((id) & 0x3f) << 13) | \
(((DSI_ENABLE) & 0x1) << 19))
#define PKT_LEN1(len) (((len) & 0x7) << 10)
#define PKT_ID2(id) ((((id) & 0x3f) << 23) | \
(((DSI_ENABLE) & 0x1) << 29))
#define PKT_LEN2(len) (((len) & 0x7) << 20)
#define PKT_ID3(id) ((((id) & 0x3f) << 3) | \
(((DSI_ENABLE) & 0x1) << 9))
#define PKT_LEN3(len) (((len) & 0x7) << 0)
#define PKT_ID4(id) ((((id) & 0x3f) << 13) | \
(((DSI_ENABLE) & 0x1) << 19))
#define PKT_LEN4(len) (((len) & 0x7) << 10)
#define PKT_ID5(id) ((((id) & 0x3f) << 23) | \
(((DSI_ENABLE) & 0x1) << 29))
#define PKT_LEN5(len) (((len) & 0x7) << 20)
#define PKT_LP (((DSI_ENABLE) & 0x1) << 30)
#define NUMOF_PKT_SEQ 12
/* DSI pixel data format, enum values should match with dt-bindings in tegra-panel.h */
typedef enum
{
DSI_PIXEL_FORMAT_16BIT_P,
DSI_PIXEL_FORMAT_18BIT_P,
DSI_PIXEL_FORMAT_18BIT_NP,
DSI_PIXEL_FORMAT_24BIT_P,
DSI_PIXEL_FORMAT_8BIT_DSC,
DSI_PIXEL_FORMAT_12BIT_DSC,
DSI_PIXEL_FORMAT_16BIT_DSC,
DSI_PIXEL_FORMAT_10BIT_DSC,
DSI_PIXEL_FORMAT_30BIT_P,
DSI_PIXEL_FORMAT_36BIT_P,
} DSIPIXELFORMAT;
/* DSI virtual channel number */
typedef enum
{
DSI_VIRTUAL_CHANNEL_0,
DSI_VIRTUAL_CHANNEL_1,
DSI_VIRTUAL_CHANNEL_2,
DSI_VIRTUAL_CHANNEL_3,
} DSIVIRTUALCHANNEL;
/* DSI transmit method for video data */
typedef enum
{
DSI_VIDEO_TYPE_VIDEO_MODE,
DSI_VIDEO_TYPE_COMMAND_MODE,
} DSIVIDEODATAMODE;
/* DSI HS clock mode */
typedef enum
{
DSI_VIDEO_CLOCK_CONTINUOUS,
DSI_VIDEO_CLOCK_TX_ONLY,
} DSICLOCKMODE;
/* DSI burst mode setting in video mode. Each mode is assigned with a
* fixed value. The rationale behind this is to avoid change of these
* values, since the calculation of dsi clock depends on them. */
typedef enum
{
DSI_VIDEO_NON_BURST_MODE = 0,
DSI_VIDEO_NON_BURST_MODE_WITH_SYNC_END = 1,
DSI_VIDEO_BURST_MODE_LOWEST_SPEED = 2,
DSI_VIDEO_BURST_MODE_LOW_SPEED = 3,
DSI_VIDEO_BURST_MODE_MEDIUM_SPEED = 4,
DSI_VIDEO_BURST_MODE_FAST_SPEED = 5,
DSI_VIDEO_BURST_MODE_FASTEST_SPEED = 6,
} DSIVIDEOBURSTMODE;
/* DSI Ganged Mode */
typedef enum
{
DSI_GANGED_SYMMETRIC_LEFT_RIGHT = 1,
DSI_GANGED_SYMMETRIC_EVEN_ODD = 2,
DSI_GANGED_SYMMETRIC_LEFT_RIGHT_OVERLAP = 3,
} DSIGANGEDTYPE;
typedef enum
{
DSI_LINK0,
DSI_LINK1,
} DSILINKNUM;
/* DSI Command Packet type */
typedef enum
{
DSI_PACKET_CMD,
DSI_DELAY_MS,
DSI_GPIO_SET,
DSI_SEND_FRAME,
DSI_PACKET_VIDEO_VBLANK_CMD,
DSI_DELAY_US,
} DSICMDPKTTYPE;
/* DSI Phy type */
typedef enum
{
DSI_DPHY,
DSI_CPHY,
} DSIPHYTYPE;
enum {
DSI_GPIO_LCD_RESET,
DSI_GPIO_PANEL_EN,
DSI_GPIO_PANEL_EN_1,
DSI_GPIO_BL_ENABLE,
DSI_GPIO_BL_PWM,
DSI_GPIO_AVDD_AVEE_EN,
DSI_GPIO_VDD_1V8_LCD_EN,
DSI_GPIO_TE,
DSI_GPIO_BRIDGE_EN_0,
DSI_GPIO_BRIDGE_EN_1,
DSI_GPIO_BRIDGE_REFCLK_EN,
DSI_N_GPIO_PANEL, /* add new gpio above this entry */
};
enum
{
DSI_DISABLE,
DSI_ENABLE,
};
typedef struct
{
NvU8 cmd_type;
NvU8 data_id;
union
{
NvU16 data_len;
NvU16 delay_ms;
NvU16 delay_us;
NvU32 gpio;
NvU16 frame_cnt;
struct
{
NvU8 data0;
NvU8 data1;
} sp;
} sp_len_dly;
NvU32 *pdata;
NvU8 link_id;
NvBool club_cmd;
} DSI_CMD, *PDSICMD;
typedef struct
{
NvU16 t_hsdexit_ns;
NvU16 t_hstrail_ns;
NvU16 t_datzero_ns;
NvU16 t_hsprepare_ns;
NvU16 t_hsprebegin_ns;
NvU16 t_hspost_ns;
NvU16 t_clktrail_ns;
NvU16 t_clkpost_ns;
NvU16 t_clkzero_ns;
NvU16 t_tlpx_ns;
NvU16 t_clkprepare_ns;
NvU16 t_clkpre_ns;
NvU16 t_wakeup_ns;
NvU16 t_taget_ns;
NvU16 t_tasure_ns;
NvU16 t_tago_ns;
} DSI_PHY_TIMING_IN_NS;
typedef struct
{
NvU32 hActive;
NvU32 vActive;
NvU32 hFrontPorch;
NvU32 vFrontPorch;
NvU32 hBackPorch;
NvU32 vBackPorch;
NvU32 hSyncWidth;
NvU32 vSyncWidth;
NvU32 hPulsePolarity;
NvU32 vPulsePolarity;
NvU32 pixelClkRate;
} DSITIMINGS, *PDSITIMINGS;
typedef struct
{
NvU8 n_data_lanes; /* required */
NvU8 pixel_format; /* required */
NvU8 refresh_rate; /* required */
NvU8 rated_refresh_rate;
NvU8 panel_reset; /* required */
NvU8 virtual_channel; /* required */
NvU8 dsi_instance;
NvU16 dsi_panel_rst_gpio;
NvU16 dsi_panel_bl_en_gpio;
NvU16 dsi_panel_bl_pwm_gpio;
NvU16 even_odd_split_width;
NvU8 controller_vs;
NvBool panel_has_frame_buffer; /* required*/
/* Deprecated. Use DSI_SEND_FRAME panel command instead. */
NvBool panel_send_dc_frames;
DSI_CMD *dsi_init_cmd; /* required */
NvU16 n_init_cmd; /* required */
NvU32 *dsi_init_cmd_array;
NvU32 init_cmd_array_size;
NvBool sendInitCmdsEarly;
DSI_CMD *dsi_early_suspend_cmd;
NvU16 n_early_suspend_cmd;
NvU32 *dsi_early_suspend_cmd_array;
NvU32 early_suspend_cmd_array_size;
DSI_CMD *dsi_late_resume_cmd;
NvU16 n_late_resume_cmd;
NvU32 *dsi_late_resume_cmd_array;
NvU32 late_resume_cmd_array_size;
DSI_CMD *dsi_postvideo_cmd;
NvU16 n_postvideo_cmd;
NvU32 *dsi_postvideo_cmd_array;
NvU32 postvideo_cmd_array_size;
DSI_CMD *dsi_suspend_cmd; /* required */
NvU16 n_suspend_cmd; /* required */
NvU32 *dsi_suspend_cmd_array;
NvU32 suspend_cmd_array_size;
NvU8 video_data_type; /* required */
NvU8 video_clock_mode;
NvU8 video_burst_mode;
NvU8 ganged_type;
NvU16 ganged_overlap;
NvBool ganged_swap_links;
NvBool ganged_write_to_all_links;
NvU8 split_link_type;
NvU8 suspend_aggr;
NvU16 panel_buffer_size_byte;
NvU16 panel_reset_timeout_msec;
NvBool hs_cmd_mode_supported;
NvBool hs_cmd_mode_on_blank_supported;
NvBool enable_hs_clock_on_lp_cmd_mode;
NvBool no_pkt_seq_eot; /* 1st generation panel may not
* support eot. Don't set it for
* most panels.*/
const NvU32 *pktSeq;
NvU32 *pktSeq_array;
NvU32 pktSeq_array_size;
NvBool skip_dsi_pkt_header;
NvBool power_saving_suspend;
NvBool suspend_stop_stream_late;
NvBool dsi2lvds_bridge_enable;
NvBool dsi2edp_bridge_enable;
NvU32 max_panel_freq_khz;
NvU32 lp_cmd_mode_freq_khz;
NvU32 lp_read_cmd_mode_freq_khz;
NvU32 hs_clk_in_lp_cmd_mode_freq_khz;
NvU32 burst_mode_freq_khz;
NvU32 fpga_freq_khz;
NvU32 te_gpio;
NvBool te_polarity_low;
NvBool dsiEnVRR;
NvBool dsiVrrPanelSupportsTe;
NvBool dsiForceSetTePin;
int panel_gpio[DSI_N_GPIO_PANEL];
NvBool panel_gpio_populated;
NvU32 dpd_dsi_pads;
DSI_PHY_TIMING_IN_NS phyTimingNs;
NvU8 *bl_name;
NvBool lp00_pre_panel_wakeup;
NvBool ulpm_not_supported;
NvBool use_video_host_fifo_for_cmd;
NvBool dsi_csi_loopback;
NvBool set_max_timeout;
NvBool use_legacy_dphy_core;
// Swap P/N pins polarity of all data lanes
NvBool swap_data_lane_polarity;
// Swap P/N pins polarity of clock lane
NvBool swap_clock_lane_polarity;
// Reverse clock polarity for partition A/B. 1st SOT bit goes on negedge of Clock lane
NvBool reverse_clock_polarity;
// DSI Lane Crossbar. Allocating xbar array for max number of lanes
NvBool lane_xbar_exists;
NvU32 lane_xbar_ctrl[8];
NvU32 refresh_rate_adj;
NvU8 dsiPhyType;
NvBool en_data_scrambling;
NvU32 dsipll_vco_rate_hz;
NvU32 dsipll_clkoutpn_rate_hz;
NvU32 dsipll_clkouta_rate_hz;
NvU32 vpll0_rate_hz;
DSITIMINGS dsiTimings;
// DSC Parameters
NvBool dsiDscEnable;
NvU32 dsiDscBpp;
NvU32 dsiDscNumSlices;
NvU32 dsiDscSliceWidth;
NvU32 dsiDscSliceHeight;
NvBool dsiDscEnBlockPrediction;
NvBool dsiDscEnDualDsc;
NvU32 dsiDscDecoderMajorVersion;
NvU32 dsiDscDecoderMinorVersion;
NvBool dsiDscUseCustomPPS;
NvU32 dsiDscCustomPPSData[32];
// Driver allocates memory for PPS cmd to be sent to Panel
NvBool ppsCmdMemAllocated;
} DSI_PANEL_INFO;
#endif

View File

@@ -0,0 +1,32 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _OS_GPIO_H_
#define _OS_GPIO_H_
typedef enum
{
NV_OS_GPIO_FUNC_HOTPLUG_A,
NV_OS_GPIO_FUNC_HOTPLUG_B,
} NV_OS_GPIO_FUNC_NAMES;
#endif

View File

@@ -81,9 +81,9 @@ NV_STATUS NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_own_access_cntr_intr(nvidia_stack_t *, nvgpuSessionHandle_t, nvgpuAccessCntrInfo_t, NvBool);
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, nvgpuAccessCntrConfig_t);
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, const nvgpuAccessCntrConfig_t *);
NV_STATUS NV_API_CALL rm_gpu_ops_disable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32);
NV_STATUS NV_API_CALL rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32, NvU64 *);
NV_STATUS NV_API_CALL rm_gpu_ops_unset_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_get_nvlink_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuNvlinkInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_p2p_object_create(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuDeviceHandle_t, NvHandle *);

View File

@@ -662,27 +662,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PCI_GET_DOMAIN_BUS_AND_SLOT_PRESENT" "" "functions"
;;
hash__remap_4k_pfn)
#
# Determine if the hash__remap_4k_pfn() function is
# present.
#
# Added by commit 6cc1a0ee4ce2 ("powerpc/mm/radix: Add radix
# callback for pmd accessors") in v4.7 (committed 2016-04-29).
# Present only in arch/powerpc
#
CODE="
#if defined(NV_ASM_BOOK3S_64_HASH_64K_H_PRESENT)
#include <linux/mm.h>
#include <asm/book3s/64/hash-64k.h>
#endif
void conftest_hash__remap_4k_pfn(void) {
hash__remap_4k_pfn();
}"
compile_check_conftest "$CODE" "NV_HASH__REMAP_4K_PFN_PRESENT" "" "functions"
;;
register_cpu_notifier)
#
# Determine if register_cpu_notifier() is present
@@ -1633,7 +1612,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PHYS_TO_DMA_PRESENT" "" "functions"
;;
dma_attr_macros)
#
# Determine if the NV_DMA_ATTR_SKIP_CPU_SYNC_PRESENT macro present.
@@ -2441,6 +2419,45 @@ compile_test() {
compile_check_conftest "$CODE" "NV_DRM_ATOMIC_HELPER_LEGACY_GAMMA_SET_PRESENT" "" "functions"
;;
drm_plane_create_color_properties)
#
# Determine if the function drm_plane_create_color_properties() is
# present.
#
# Added by commit 80f690e9e3a6 ("drm: Add optional COLOR_ENCODING
# and COLOR_RANGE properties to drm_plane") in v4.17 (2018-02-19).
#
CODE="
#include <linux/types.h>
#if defined(NV_DRM_DRM_COLOR_MGMT_H_PRESENT)
#include <drm/drm_color_mgmt.h>
#endif
void conftest_drm_plane_create_color_properties(void) {
drm_plane_create_color_properties();
}"
compile_check_conftest "$CODE" "NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT" "" "functions"
;;
drm_format_info_has_is_yuv)
#
# Determine if struct drm_format_info has .is_yuv member.
#
# Added by commit ce2d54619a10 ("drm/fourcc: Add is_yuv field to
# drm_format_info to denote if format is yuv") in v4.19
# (2018-07-17).
#
CODE="
#if defined(NV_DRM_DRM_FOURCC_H_PRESENT)
#include <drm/drm_fourcc.h>
#endif
int conftest_drm_format_info_has_is_yuv(void) {
return offsetof(struct drm_format_info, is_yuv);
}"
compile_check_conftest "$CODE" "NV_DRM_FORMAT_INFO_HAS_IS_YUV" "" "types"
;;
pci_stop_and_remove_bus_device)
#
# Determine if the pci_stop_and_remove_bus_device() function is present.
@@ -3132,6 +3149,21 @@ compile_test() {
compile_check_conftest "$CODE" "NV_FOLL_LONGTERM_PRESENT" "" "types"
;;
has_enum_pidtype_tgid)
# Determine if PIDTYPE_TGID is present in the kernel as an enum
#
# Added by commit 6883f81aac6f ("pid: Implement PIDTYPE_TGID")
# in v4.19
#
CODE="
#include <linux/pid.h>
enum pid_type type = PIDTYPE_TGID;
"
compile_check_conftest "$CODE" "NV_HAS_ENUM_PIDTYPE_TGID" "" "types"
;;
vfio_pin_pages_has_vfio_device_arg)
#
# Determine if vfio_pin_pages() kABI accepts "struct vfio_device *"
@@ -3519,60 +3551,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_VM_OPS_FAULT_REMOVED_VMA_ARG" "" "types"
;;
pnv_npu2_init_context)
#
# Determine if the pnv_npu2_init_context() function is
# present and the signature of its callback.
#
# Added by commit 1ab66d1fbada ("powerpc/powernv: Introduce
# address translation services for Nvlink2") in v4.12
# (2017-04-03).
#
echo "$CONFTEST_PREAMBLE
#if defined(NV_ASM_POWERNV_H_PRESENT)
#include <linux/pci.h>
#include <asm/powernv.h>
#endif
void conftest_pnv_npu2_init_context(void) {
pnv_npu2_init_context();
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
echo "#undef NV_PNV_NPU2_INIT_CONTEXT_PRESENT" | append_conftest "functions"
echo "#undef NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID" | append_conftest "functions"
rm -f conftest$$.o
return
fi
echo "#define NV_PNV_NPU2_INIT_CONTEXT_PRESENT" | append_conftest "functions"
# Check the callback signature
echo "$CONFTEST_PREAMBLE
#if defined(NV_ASM_POWERNV_H_PRESENT)
#include <linux/pci.h>
#include <asm/powernv.h>
#endif
struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
unsigned long flags,
void (*cb)(struct npu_context *, void *),
void *priv) {
return NULL;
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
echo "#define NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID" | append_conftest "functions"
rm -f conftest$$.o
return
fi
echo "#undef NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID" | append_conftest "functions"
;;
of_get_ibm_chip_id)
#
# Determine if the of_get_ibm_chip_id() function is present.
@@ -5289,6 +5267,45 @@ compile_test() {
compile_check_conftest "$CODE" "NV_FOLLOW_PFN_PRESENT" "" "functions"
;;
follow_pte_arg_vma)
#
# Determine if the first argument of follow_pte is
# mm_struct or vm_area_struct.
#
# The first argument was changed from mm_struct to vm_area_struct by
# commit 29ae7d96d166 ("mm: pass VMA instead of MM to follow_pte()")
#
CODE="
#include <linux/mm.h>
typeof(follow_pte) conftest_follow_pte_has_vma_arg;
int conftest_follow_pte_has_vma_arg(struct vm_area_struct *vma,
unsigned long address,
pte_t **ptep,
spinlock_t **ptl) {
return 0;
}"
compile_check_conftest "$CODE" "NV_FOLLOW_PTE_ARG1_VMA" "" "types"
;;
ptep_get)
#
# Determine if ptep_get() is present.
#
# ptep_get() was added by commit 481e980a7c19
# ("mm: Allow arches to provide ptep_get()")
#
CODE="
#include <linux/mm.h>
void conftest_ptep_get(void) {
ptep_get();
}"
compile_check_conftest "$CODE" "NV_PTEP_GET_PRESENT" "" "functions"
;;
drm_plane_atomic_check_has_atomic_state_arg)
#
# Determine if drm_plane_helper_funcs::atomic_check takes 'state'
@@ -5478,6 +5495,31 @@ compile_test() {
fi
;;
of_property_for_each_u32_has_internal_args)
#
# Determine if the internal arguments for the macro
# of_property_for_each_u32() are present.
#
# Commit 9722c3b66e21 ("of: remove internal arguments from
# of_property_for_each_u32()") removes two arguments from
# of_property_for_each_u32() which are used internally within
# the macro and so do not need to be passed. This change was
# made for Linux v6.11.
#
CODE="
#include <linux/of.h>
void conftest_of_property_for_each_u32(struct device_node *np,
char *propname) {
struct property *iparam1;
const __be32 *iparam2;
u32 val;
of_property_for_each_u32(np, propname, iparam1, iparam2, val);
}"
compile_check_conftest "$CODE" "NV_OF_PROPERTY_FOR_EACH_U32_HAS_INTERNAL_ARGS" "" "types"
;;
of_property_read_variable_u8_array)
#
# Determine if of_property_read_variable_u8_array is present
@@ -5574,8 +5616,8 @@ compile_test() {
of_dma_configure)
#
# Determine if of_dma_configure() function is present, and how
# many arguments it takes.
# Determine if of_dma_configure() function is present, if it
# returns int, and how many arguments it takes.
#
# Added by commit 591c1ee465ce ("of: configure the platform
# device dma parameters") in v3.16. However, it was a static,
@@ -5585,6 +5627,10 @@ compile_test() {
# commit 1f5c69aa51f9 ("of: Move of_dma_configure() to device.c
# to help re-use") in v4.1.
#
# Its return type was changed from void to int by commit
# 7b07cbefb68d ("iommu: of: Handle IOMMU lookup failure with
# deferred probing or error") in v4.12.
#
# It subsequently began taking a third parameter with commit
# 3d6ce86ee794 ("drivers: remove force dma flag from buses")
# in v4.18.
@@ -5609,6 +5655,7 @@ compile_test() {
echo "#undef NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
echo "#undef NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT" | append_conftest "functions"
echo "#undef NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
else
echo "#define NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
@@ -5627,6 +5674,26 @@ compile_test() {
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 3" | append_conftest "functions"
echo "$CONFTEST_PREAMBLE
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
#include <linux/of_device.h>
#endif
int conftest_of_dma_configure_has_int_return_type(void) {
return of_dma_configure(NULL, NULL, false);
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#define NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
else
echo "#undef NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
fi
return
fi
@@ -5645,6 +5712,26 @@ compile_test() {
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 2" | append_conftest "functions"
echo "$CONFTEST_PREAMBLE
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
#include <linux/of_device.h>
#endif
int conftest_of_dma_configure_has_int_return_type(void) {
return of_dma_configure(NULL, NULL);
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#define NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
else
echo "#undef NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
fi
return
fi
fi
@@ -7507,6 +7594,22 @@ compile_test() {
compile_check_conftest "$CODE" "NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA" "" "types"
;;
page_pgmap)
#
# Determine if the page_pgmap() function is present.
#
# Added by commit 82ba975e4c43 ("mm: allow compound zone device
# pages") in v6.14
#
CODE="
#include <linux/mmzone.h>
int conftest_page_pgmap(void) {
return page_pgmap();
}"
compile_check_conftest "$CODE" "NV_PAGE_PGMAP_PRESENT" "" "functions"
;;
folio_test_swapcache)
#
# Determine if the folio_test_swapcache() function is present.
@@ -7523,6 +7626,34 @@ compile_test() {
compile_check_conftest "$CODE" "NV_FOLIO_TEST_SWAPCACHE_PRESENT" "" "functions"
;;
platform_driver_struct_remove_returns_void)
#
# Determine if the 'platform_driver' structure 'remove' function
# pointer returns void.
#
# Commit 0edb555a65d1 ("platform: Make platform_driver::remove()
# return void") updated the platform_driver structure 'remove'
# callback to return void instead of int in Linux v6.11-rc1.
#
echo "$CONFTEST_PREAMBLE
#include <linux/platform_device.h>
int conftest_platform_driver_struct_remove_returns_void(struct platform_device *pdev,
struct platform_driver *driver) {
return driver->remove(pdev);
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#undef NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" | append_conftest "types"
else
echo "#define NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" | append_conftest "types"
fi
;;
module_import_ns_takes_constant)
#
# Determine if the MODULE_IMPORT_NS macro takes a string literal
@@ -7540,6 +7671,62 @@ compile_test() {
compile_check_conftest "$CODE" "NV_MODULE_IMPORT_NS_TAKES_CONSTANT" "" "generic"
;;
assign_str)
#
# Determine whether the __assign_str() macro, used in tracepoint
# event definitions, has the 'src' parameter.
#
# The 'src' parameter was removed by commit 2c92ca849fcc
# ("tracing/treewide: Remove second parameter of __assign_str()") in
# v6.10.
#
# The expected usage of __assign_str() inside the TRACE_EVENT()
# macro, which involves multiple include passes and assumes it is
# in a header file, requires a non-standard conftest approach of
# producing both a header and a C file.
#
echo "$CONFTEST_PREAMBLE
#undef TRACE_SYSTEM
#define TRACE_SYSTEM conftest
#if !defined(_TRACE_CONFTEST_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_CONFTEST_H
#include <linux/tracepoint.h>
TRACE_EVENT(conftest,
TP_PROTO(const char *s),
TP_ARGS(s),
TP_STRUCT__entry(__string(s, s)),
TP_fast_assign(__assign_str(s);),
TP_printk(\"%s\", __get_str(s))
);
#endif
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE conftest$$
#include <trace/define_trace.h>
" > conftest$$.h
echo "$CONFTEST_PREAMBLE
#define CREATE_TRACE_POINTS
#include \"conftest$$.h\"
void conftest_assign_str(void) {
trace_conftest(\"conftest\");
}
" > conftest$$.c
$CC $CFLAGS -c conftest$$.c >/dev/null 2>&1
rm -f conftest$$.c conftest$$.h
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#define NV_ASSIGN_STR_ARGUMENT_COUNT 1" | append_conftest "functions"
else
echo "#define NV_ASSIGN_STR_ARGUMENT_COUNT 2" | append_conftest "functions"
fi
;;
drm_driver_has_date)
#
@@ -7565,6 +7752,33 @@ compile_test() {
compile_check_conftest "$CODE" "NV_DRM_DRIVER_HAS_DATE" "" "types"
;;
drm_connector_helper_funcs_mode_valid_has_const_mode_arg)
#
# Determine if the 'mode' pointer argument is const in
# drm_connector_helper_funcs::mode_valid.
#
# The 'mode' pointer argument in
# drm_connector_helper_funcs::mode_valid was made const by commit
# 26d6fd81916e ("drm/connector: make mode_valid take a const struct
# drm_display_mode") in linux-next, expected in v6.15.
#
CODE="
#if defined(NV_DRM_DRM_ATOMIC_HELPER_H_PRESENT)
#include <drm/drm_atomic_helper.h>
#endif
static int conftest_drm_connector_mode_valid(struct drm_connector *connector,
const struct drm_display_mode *mode) {
return 0;
}
const struct drm_connector_helper_funcs conftest_drm_connector_helper_funcs = {
.mode_valid = conftest_drm_connector_mode_valid,
};"
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG" "" "types"
;;
# When adding a new conftest entry, please use the correct format for
# specifying the relevant upstream Linux kernel commit. Please
# avoid specifying -rc kernels, and only use SHAs that actually exist

View File

@@ -14,6 +14,7 @@ NV_HEADER_PRESENCE_TESTS = \
drm/drm_encoder.h \
drm/drm_atomic_uapi.h \
drm/drm_drv.h \
drm/drm_edid.h \
drm/drm_fbdev_generic.h \
drm/drm_fbdev_ttm.h \
drm/drm_client_setup.h \
@@ -65,13 +66,10 @@ NV_HEADER_PRESENCE_TESTS = \
linux/nvhost.h \
linux/nvhost_t194.h \
linux/host1x-next.h \
asm/book3s/64/hash-64k.h \
asm/set_memory.h \
asm/prom.h \
asm/powernv.h \
linux/atomic.h \
asm/barrier.h \
asm/opal-api.h \
sound/hdaudio.h \
asm/pgtable_types.h \
asm/page.h \

View File

@@ -62,6 +62,20 @@
#undef NV_DRM_FENCE_AVAILABLE
#endif
#if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) && \
defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
#define NV_DRM_FBDEV_AVAILABLE
#define NV_DRM_FBDEV_GENERIC_AVAILABLE
#endif
#if defined(NV_DRM_FBDEV_TTM_SETUP_PRESENT) && \
defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
#if IS_ENABLED(CONFIG_DRM_TTM_HELPER)
#define NV_DRM_FBDEV_AVAILABLE
#define NV_DRM_FBDEV_TTM_AVAILABLE
#endif
#endif
#if defined(NV_DRM_CLIENT_SETUP_PRESENT) && \
(defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT) || \
defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT))

View File

@@ -314,7 +314,11 @@ static int nv_drm_connector_get_modes(struct drm_connector *connector)
}
static int nv_drm_connector_mode_valid(struct drm_connector *connector,
#if defined(NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG)
const struct drm_display_mode *mode)
#else
struct drm_display_mode *mode)
#endif
{
struct drm_device *dev = connector->dev;
struct nv_drm_device *nv_dev = to_nv_device(dev);

View File

@@ -372,23 +372,88 @@ cursor_plane_req_config_update(struct drm_plane *plane,
old_config.dstY != req_config->dstY;
}
static void free_drm_lut_surface(struct kref *ref)
static void release_drm_nvkms_surface(struct nv_drm_nvkms_surface *drm_nvkms_surface)
{
struct nv_drm_lut_surface *drm_lut_surface =
container_of(ref, struct nv_drm_lut_surface, refcount);
struct NvKmsKapiDevice *pDevice = drm_lut_surface->pDevice;
struct NvKmsKapiDevice *pDevice = drm_nvkms_surface->pDevice;
BUG_ON(drm_lut_surface->nvkms_surface == NULL);
BUG_ON(drm_lut_surface->nvkms_memory == NULL);
BUG_ON(drm_lut_surface->buffer == NULL);
BUG_ON(drm_nvkms_surface->nvkms_surface == NULL);
BUG_ON(drm_nvkms_surface->nvkms_memory == NULL);
BUG_ON(drm_nvkms_surface->buffer == NULL);
nvKms->destroySurface(pDevice, drm_lut_surface->nvkms_surface);
nvKms->unmapMemory(pDevice, drm_lut_surface->nvkms_memory,
nvKms->destroySurface(pDevice, drm_nvkms_surface->nvkms_surface);
nvKms->unmapMemory(pDevice, drm_nvkms_surface->nvkms_memory,
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
drm_lut_surface->buffer);
nvKms->freeMemory(pDevice, drm_lut_surface->nvkms_memory);
drm_nvkms_surface->buffer);
nvKms->freeMemory(pDevice, drm_nvkms_surface->nvkms_memory);
}
nv_drm_free(drm_lut_surface);
static int init_drm_nvkms_surface(struct nv_drm_device *nv_dev,
struct nv_drm_nvkms_surface *drm_nvkms_surface,
struct nv_drm_nvkms_surface_params *surface_params)
{
struct NvKmsKapiDevice *pDevice = nv_dev->pDevice;
NvU8 compressible = 0; // No compression
struct NvKmsKapiCreateSurfaceParams params = {};
struct NvKmsKapiMemory *surface_mem;
struct NvKmsKapiSurface *surface;
void *buffer;
params.format = surface_params->format;
params.width = surface_params->width;
params.height = surface_params->height;
/* Allocate displayable memory. */
if (nv_dev->hasVideoMemory) {
surface_mem =
nvKms->allocateVideoMemory(pDevice,
NvKmsSurfaceMemoryLayoutPitch,
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
surface_params->surface_size,
&compressible);
} else {
surface_mem =
nvKms->allocateSystemMemory(pDevice,
NvKmsSurfaceMemoryLayoutPitch,
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
surface_params->surface_size,
&compressible);
}
if (surface_mem == NULL) {
return -ENOMEM;
}
/* Map memory in order to populate it. */
if (!nvKms->mapMemory(pDevice, surface_mem,
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
&buffer)) {
nvKms->freeMemory(pDevice, surface_mem);
return -ENOMEM;
}
params.planes[0].memory = surface_mem;
params.planes[0].offset = 0;
params.planes[0].pitch = surface_params->surface_size;
/* Create surface. */
surface = nvKms->createSurface(pDevice, &params);
if (surface == NULL) {
nvKms->unmapMemory(pDevice, surface_mem,
NVKMS_KAPI_MAPPING_TYPE_KERNEL, buffer);
nvKms->freeMemory(pDevice, surface_mem);
return -ENOMEM;
}
/* Pack into struct nv_drm_nvkms_surface. */
drm_nvkms_surface->pDevice = pDevice;
drm_nvkms_surface->nvkms_memory = surface_mem;
drm_nvkms_surface->nvkms_surface = surface;
drm_nvkms_surface->buffer = buffer;
/* Init refcount. */
kref_init(&drm_nvkms_surface->refcount);
return 0;
}
static struct nv_drm_lut_surface *alloc_drm_lut_surface(
@@ -399,86 +464,49 @@ static struct nv_drm_lut_surface *alloc_drm_lut_surface(
NvU32 num_vss_header_entries,
NvU32 num_entries)
{
struct NvKmsKapiDevice *pDevice = nv_dev->pDevice;
struct nv_drm_lut_surface *drm_lut_surface;
NvU8 compressible = 0; // No compression
size_t size =
const size_t surface_size =
(((num_vss_header_entries + num_entries) *
NVKMS_LUT_CAPS_LUT_ENTRY_SIZE) + 255) & ~255; // 256-byte aligned
struct NvKmsKapiMemory *surface_mem;
struct NvKmsKapiSurface *surface;
struct NvKmsKapiCreateSurfaceParams params = {};
NvU16 *lut_data;
struct nv_drm_nvkms_surface_params params = {};
/* Allocate displayable memory. */
if (nv_dev->hasVideoMemory) {
surface_mem =
nvKms->allocateVideoMemory(pDevice,
NvKmsSurfaceMemoryLayoutPitch,
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
size,
&compressible);
} else {
surface_mem =
nvKms->allocateSystemMemory(pDevice,
NvKmsSurfaceMemoryLayoutPitch,
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
size,
&compressible);
}
if (surface_mem == NULL) {
return NULL;
}
/* Map memory in order to populate it. */
if (!nvKms->mapMemory(pDevice, surface_mem,
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
(void **) &lut_data)) {
nvKms->freeMemory(pDevice, surface_mem);
return NULL;
}
/* Create surface. */
params.format = NvKmsSurfaceMemoryFormatR16G16B16A16;
params.width = num_vss_header_entries + num_entries;
params.height = 1;
params.planes[0].memory = surface_mem;
params.planes[0].offset = 0;
params.planes[0].pitch = size;
params.surface_size = surface_size;
surface = nvKms->createSurface(pDevice, &params);
if (surface == NULL) {
nvKms->unmapMemory(pDevice, surface_mem,
NVKMS_KAPI_MAPPING_TYPE_KERNEL, (void *) lut_data);
nvKms->freeMemory(pDevice, surface_mem);
return NULL;
}
/* Pack into struct nv_drm_lut_surface. */
drm_lut_surface = nv_drm_calloc(1, sizeof(struct nv_drm_lut_surface));
if (drm_lut_surface == NULL) {
nvKms->destroySurface(pDevice, surface);
nvKms->unmapMemory(pDevice, surface_mem,
NVKMS_KAPI_MAPPING_TYPE_KERNEL, (void *) lut_data);
nvKms->freeMemory(pDevice, surface_mem);
return NULL;
}
drm_lut_surface->pDevice = pDevice;
drm_lut_surface->nvkms_memory = surface_mem;
drm_lut_surface->nvkms_surface = surface;
drm_lut_surface->buffer = lut_data;
if (init_drm_nvkms_surface(nv_dev, &drm_lut_surface->base, &params) != 0) {
nv_drm_free(drm_lut_surface);
return NULL;
}
drm_lut_surface->properties.vssSegments = num_vss_header_segments;
drm_lut_surface->properties.vssType = vss_type;
drm_lut_surface->properties.lutEntries = num_entries;
drm_lut_surface->properties.entryFormat = entry_format;
/* Init refcount. */
kref_init(&drm_lut_surface->refcount);
return drm_lut_surface;
}
static void free_drm_lut_surface(struct kref *ref)
{
struct nv_drm_nvkms_surface *drm_nvkms_surface =
container_of(ref, struct nv_drm_nvkms_surface, refcount);
struct nv_drm_lut_surface *drm_lut_surface =
container_of(drm_nvkms_surface, struct nv_drm_lut_surface, base);
// Clean up base
release_drm_nvkms_surface(drm_nvkms_surface);
nv_drm_free(drm_lut_surface);
}
static NvU32 fp32_lut_interp(
NvU16 entry0,
NvU16 entry1,
@@ -582,7 +610,7 @@ static struct nv_drm_lut_surface *create_drm_ilut_surface_vss(
return NULL;
}
lut_data = (NvU16 *) drm_lut_surface->buffer;
lut_data = (NvU16 *) drm_lut_surface->base.buffer;
/* Calculate VSS header. */
if (vss_header_seg_sizes != NULL) {
@@ -733,7 +761,7 @@ static struct nv_drm_lut_surface *create_drm_ilut_surface_legacy(
return NULL;
}
lut_data = (NvU16 *) drm_lut_surface->buffer;
lut_data = (NvU16 *) drm_lut_surface->base.buffer;
/* Fill LUT surface. */
for (entry_idx = 0; entry_idx < NVKMS_LUT_ARRAY_SIZE; entry_idx++) {
@@ -799,7 +827,7 @@ static struct nv_drm_lut_surface *create_drm_tmo_surface(
return NULL;
}
lut_data = (NvU16 *) drm_lut_surface->buffer;
lut_data = (NvU16 *) drm_lut_surface->base.buffer;
/* Calculate linear VSS header. */
for (entry_idx = 0; entry_idx < NUM_VSS_HEADER_ENTRIES; entry_idx++) {
@@ -901,7 +929,7 @@ static struct nv_drm_lut_surface *create_drm_olut_surface_vss(
return NULL;
}
lut_data = (NvU16 *) drm_lut_surface->buffer;
lut_data = (NvU16 *) drm_lut_surface->base.buffer;
/* Calculate VSS header. */
if (vss_header_seg_sizes != NULL) {
@@ -1021,7 +1049,7 @@ static struct nv_drm_lut_surface *create_drm_olut_surface_legacy(
return NULL;
}
lut_data = (NvU16 *) drm_lut_surface->buffer;
lut_data = (NvU16 *) drm_lut_surface->base.buffer;
/* Fill LUT surface. */
for (entry_idx = 0; entry_idx < NVKMS_LUT_ARRAY_SIZE; entry_idx++) {
@@ -1057,6 +1085,74 @@ update_matrix_override(struct drm_property_blob *blob,
return enabled;
}
static enum NvKmsInputColorSpace nv_get_nvkms_input_colorspace(
enum nv_drm_input_color_space colorSpace)
{
switch (colorSpace) {
case NV_DRM_INPUT_COLOR_SPACE_NONE:
return NVKMS_INPUT_COLOR_SPACE_NONE;
case NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR:
return NVKMS_INPUT_COLOR_SPACE_BT709;
case NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ:
return NVKMS_INPUT_COLOR_SPACE_BT2100;
default:
/* We shouldn't hit this */
WARN_ON("Unsupported input colorspace");
return NVKMS_INPUT_COLOR_SPACE_NONE;
}
}
static enum NvKmsInputTf nv_get_nvkms_input_tf(
enum nv_drm_input_color_space colorSpace)
{
switch (colorSpace) {
case NV_DRM_INPUT_COLOR_SPACE_NONE:
return NVKMS_INPUT_TF_LINEAR;
case NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR:
return NVKMS_INPUT_TF_LINEAR;
case NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ:
return NVKMS_INPUT_TF_PQ;
default:
/* We shouldn't hit this */
WARN_ON("Unsupported input colorspace");
return NVKMS_INPUT_TF_LINEAR;
}
}
#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
static enum NvKmsInputColorSpace nv_drm_color_encoding_to_nvkms_colorspace(
enum drm_color_encoding color_encoding)
{
switch(color_encoding) {
case DRM_COLOR_YCBCR_BT601:
return NVKMS_INPUT_COLOR_SPACE_BT601;
case DRM_COLOR_YCBCR_BT709:
return NVKMS_INPUT_COLOR_SPACE_BT709;
case DRM_COLOR_YCBCR_BT2020:
return NVKMS_INPUT_COLOR_SPACE_BT2020;
default:
/* We shouldn't hit this */
WARN_ON("Unsupported DRM color_encoding");
return NVKMS_INPUT_COLOR_SPACE_NONE;
}
}
static enum NvKmsInputColorRange nv_drm_color_range_to_nvkms_color_range(
enum drm_color_range color_range)
{
switch(color_range) {
case DRM_COLOR_YCBCR_FULL_RANGE:
return NVKMS_INPUT_COLOR_RANGE_FULL;
case DRM_COLOR_YCBCR_LIMITED_RANGE:
return NVKMS_INPUT_COLOR_RANGE_LIMITED;
default:
/* We shouldn't hit this */
WARN_ON("Unsupported DRM color_range");
return NVKMS_INPUT_COLOR_RANGE_DEFAULT;
}
}
#endif
static int
plane_req_config_update(struct drm_plane *plane,
struct drm_plane_state *plane_state,
@@ -1190,8 +1286,37 @@ plane_req_config_update(struct drm_plane *plane,
nv_plane->defaultCompositionMode;
#endif
req_config->config.inputColorSpace =
nv_drm_plane_state->input_colorspace;
#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
if ((nv_drm_plane_state->input_colorspace == NV_DRM_INPUT_COLOR_SPACE_NONE) &&
nv_drm_format_is_yuv(plane_state->fb->format->format)) {
if (nv_plane->supportsColorProperties) {
req_config->config.inputColorSpace =
nv_drm_color_encoding_to_nvkms_colorspace(plane_state->color_encoding);
req_config->config.inputColorRange =
nv_drm_color_range_to_nvkms_color_range(plane_state->color_range);
} else {
req_config->config.inputColorSpace = NVKMS_INPUT_COLOR_SPACE_NONE;
req_config->config.inputColorRange = NVKMS_INPUT_COLOR_RANGE_DEFAULT;
}
req_config->config.inputTf = NVKMS_INPUT_TF_LINEAR;
} else {
#endif
req_config->config.inputColorSpace =
nv_get_nvkms_input_colorspace(nv_drm_plane_state->input_colorspace);
req_config->config.inputColorRange = NVKMS_INPUT_COLOR_RANGE_DEFAULT;
req_config->config.inputTf =
nv_get_nvkms_input_tf(nv_drm_plane_state->input_colorspace);
#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
}
#endif
req_config->flags.inputTfChanged =
(old_config.inputTf != req_config->config.inputTf);
req_config->flags.inputColorSpaceChanged =
(old_config.inputColorSpace != req_config->config.inputColorSpace);
req_config->flags.inputColorRangeChanged =
(old_config.inputColorRange != req_config->config.inputColorRange);
req_config->config.syncParams.preSyncptSpecified = false;
req_config->config.syncParams.postSyncptRequested = false;
@@ -1240,10 +1365,10 @@ plane_req_config_update(struct drm_plane *plane,
switch (info_frame->eotf) {
case HDMI_EOTF_SMPTE_ST2084:
req_config->config.tf = NVKMS_OUTPUT_TF_PQ;
req_config->config.outputTf = NVKMS_OUTPUT_TF_PQ;
break;
case HDMI_EOTF_TRADITIONAL_GAMMA_SDR:
req_config->config.tf =
req_config->config.outputTf =
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR;
break;
default:
@@ -1254,7 +1379,7 @@ plane_req_config_update(struct drm_plane *plane,
req_config->config.hdrMetadata.enabled = true;
} else {
req_config->config.hdrMetadata.enabled = false;
req_config->config.tf = NVKMS_OUTPUT_TF_NONE;
req_config->config.outputTf = NVKMS_OUTPUT_TF_NONE;
}
req_config->flags.hdrMetadataChanged =
@@ -1264,7 +1389,7 @@ plane_req_config_update(struct drm_plane *plane,
&req_config->config.hdrMetadata.val,
sizeof(struct NvKmsHDRStaticMetadata)));
req_config->flags.tfChanged = (old_config.tf != req_config->config.tf);
req_config->flags.outputTfChanged = (old_config.outputTf != req_config->config.outputTf);
#endif
req_config->config.matrixOverrides.enabled.lmsCtm =
@@ -1295,7 +1420,7 @@ plane_req_config_update(struct drm_plane *plane,
if (nv_drm_plane_state->degamma_changed) {
if (nv_drm_plane_state->degamma_drm_lut_surface != NULL) {
kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->refcount,
kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->base.refcount,
free_drm_lut_surface);
nv_drm_plane_state->degamma_drm_lut_surface = NULL;
}
@@ -1327,7 +1452,7 @@ plane_req_config_update(struct drm_plane *plane,
if (nv_drm_plane_state->degamma_drm_lut_surface != NULL) {
req_config->config.ilut.enabled = NV_TRUE;
req_config->config.ilut.lutSurface =
nv_drm_plane_state->degamma_drm_lut_surface->nvkms_surface;
nv_drm_plane_state->degamma_drm_lut_surface->base.nvkms_surface;
req_config->config.ilut.offset = 0;
req_config->config.ilut.vssSegments =
nv_drm_plane_state->degamma_drm_lut_surface->properties.vssSegments;
@@ -1346,7 +1471,7 @@ plane_req_config_update(struct drm_plane *plane,
if (nv_drm_plane_state->tmo_changed) {
if (nv_drm_plane_state->tmo_drm_lut_surface != NULL) {
kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->refcount,
kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->base.refcount,
free_drm_lut_surface);
nv_drm_plane_state->tmo_drm_lut_surface = NULL;
}
@@ -1363,7 +1488,7 @@ plane_req_config_update(struct drm_plane *plane,
if (nv_drm_plane_state->tmo_drm_lut_surface != NULL) {
req_config->config.tmo.enabled = NV_TRUE;
req_config->config.tmo.lutSurface =
nv_drm_plane_state->tmo_drm_lut_surface->nvkms_surface;
nv_drm_plane_state->tmo_drm_lut_surface->base.nvkms_surface;
req_config->config.tmo.offset = 0;
req_config->config.tmo.vssSegments =
nv_drm_plane_state->tmo_drm_lut_surface->properties.vssSegments;
@@ -1870,7 +1995,7 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
nv_plane_state->degamma_drm_lut_surface =
nv_old_plane_state->degamma_drm_lut_surface;
if (nv_plane_state->degamma_drm_lut_surface) {
kref_get(&nv_plane_state->degamma_drm_lut_surface->refcount);
kref_get(&nv_plane_state->degamma_drm_lut_surface->base.refcount);
}
nv_plane_state->tmo_lut = nv_old_plane_state->tmo_lut;
@@ -1881,7 +2006,7 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
nv_plane_state->tmo_drm_lut_surface =
nv_old_plane_state->tmo_drm_lut_surface;
if (nv_plane_state->tmo_drm_lut_surface) {
kref_get(&nv_plane_state->tmo_drm_lut_surface->refcount);
kref_get(&nv_plane_state->tmo_drm_lut_surface->base.refcount);
}
return &nv_plane_state->base;
@@ -1909,13 +2034,13 @@ static inline void __nv_drm_plane_atomic_destroy_state(
nv_drm_property_blob_put(nv_drm_plane_state->degamma_lut);
if (nv_drm_plane_state->degamma_drm_lut_surface != NULL) {
kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->refcount,
kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->base.refcount,
free_drm_lut_surface);
}
nv_drm_property_blob_put(nv_drm_plane_state->tmo_lut);
if (nv_drm_plane_state->tmo_drm_lut_surface != NULL) {
kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->refcount,
kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->base.refcount,
free_drm_lut_surface);
}
}
@@ -2113,7 +2238,7 @@ nv_drm_atomic_crtc_duplicate_state(struct drm_crtc *crtc)
}
nv_state->regamma_divisor = nv_old_state->regamma_divisor;
if (nv_state->regamma_drm_lut_surface) {
kref_get(&nv_state->regamma_drm_lut_surface->refcount);
kref_get(&nv_state->regamma_drm_lut_surface->base.refcount);
}
nv_state->regamma_changed = false;
@@ -2142,7 +2267,7 @@ static void nv_drm_atomic_crtc_destroy_state(struct drm_crtc *crtc,
nv_drm_property_blob_put(nv_state->regamma_lut);
if (nv_state->regamma_drm_lut_surface != NULL) {
kref_put(&nv_state->regamma_drm_lut_surface->refcount,
kref_put(&nv_state->regamma_drm_lut_surface->base.refcount,
free_drm_lut_surface);
}
@@ -2386,7 +2511,7 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
if (nv_crtc_state->regamma_changed) {
if (nv_crtc_state->regamma_drm_lut_surface != NULL) {
kref_put(&nv_crtc_state->regamma_drm_lut_surface->refcount,
kref_put(&nv_crtc_state->regamma_drm_lut_surface->base.refcount,
free_drm_lut_surface);
nv_crtc_state->regamma_drm_lut_surface = NULL;
}
@@ -2417,7 +2542,7 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
if (nv_crtc_state->regamma_drm_lut_surface != NULL) {
req_config->modeSetConfig.olut.enabled = NV_TRUE;
req_config->modeSetConfig.olut.lutSurface =
nv_crtc_state->regamma_drm_lut_surface->nvkms_surface;
nv_crtc_state->regamma_drm_lut_surface->base.nvkms_surface;
req_config->modeSetConfig.olut.offset = 0;
req_config->modeSetConfig.olut.vssSegments =
nv_crtc_state->regamma_drm_lut_surface->properties.vssSegments;
@@ -2521,7 +2646,7 @@ static void nv_drm_plane_install_properties(
if (nv_dev->nv_input_colorspace_property) {
drm_object_attach_property(
&plane->base, nv_dev->nv_input_colorspace_property,
NVKMS_INPUT_COLORSPACE_NONE);
NV_DRM_INPUT_COLOR_SPACE_NONE);
}
if (supportsICtCp) {
@@ -2531,17 +2656,14 @@ static void nv_drm_plane_install_properties(
&plane->base, nv_dev->nv_hdr_output_metadata_property, 0);
}
#endif
}
/*
* The old DRM_OBJECT_MAX_PROPERTY limit of 24 is too small to
* accomodate all of the properties for the ICtCp pipeline.
*
* Commit 1e13c5644c44 ("drm/drm_mode_object: increase max objects to
* accommodate new color props") in Linux v6.8 increased the limit to
* 64. To be safe, require this before attaching any properties for the
* ICtCp pipeline.
*/
if (DRM_OBJECT_MAX_PROPERTY >= 64) {
/*
* Per-plane HDR properties get us dangerously close to the 24 property
* limit on kernels that don't support NV_DRM_USE_EXTENDED_PROPERTIES.
*/
if (NV_DRM_USE_EXTENDED_PROPERTIES) {
if (supportsICtCp) {
if (nv_dev->nv_plane_lms_ctm_property) {
drm_object_attach_property(
&plane->base, nv_dev->nv_plane_lms_ctm_property, 0);
@@ -2568,36 +2690,36 @@ static void nv_drm_plane_install_properties(
NVKMS_LUT_ARRAY_SIZE);
}
}
}
if (nv_dev->nv_plane_blend_ctm_property) {
drm_object_attach_property(
&plane->base, nv_dev->nv_plane_blend_ctm_property, 0);
}
if (nv_dev->nv_plane_blend_ctm_property) {
drm_object_attach_property(
&plane->base, nv_dev->nv_plane_blend_ctm_property, 0);
}
if (nv_plane->ilut_caps.supported) {
if (nv_plane->ilut_caps.vssSupport == NVKMS_LUT_VSS_SUPPORTED) {
if (nv_dev->nv_plane_degamma_tf_property) {
drm_object_attach_property(
&plane->base, nv_dev->nv_plane_degamma_tf_property,
NV_DRM_TRANSFER_FUNCTION_DEFAULT);
if (nv_plane->ilut_caps.supported) {
if (nv_plane->ilut_caps.vssSupport == NVKMS_LUT_VSS_SUPPORTED) {
if (nv_dev->nv_plane_degamma_tf_property) {
drm_object_attach_property(
&plane->base, nv_dev->nv_plane_degamma_tf_property,
NV_DRM_TRANSFER_FUNCTION_DEFAULT);
}
if (nv_dev->nv_plane_degamma_multiplier_property) {
/* Default to 1 in S31.32 Sign-Magnitude Format */
nv_plane_state->degamma_multiplier = ((uint64_t) 1) << 32;
drm_object_attach_property(
&plane->base, nv_dev->nv_plane_degamma_multiplier_property,
nv_plane_state->degamma_multiplier);
}
}
if (nv_dev->nv_plane_degamma_multiplier_property) {
/* Default to 1 in S31.32 Sign-Magnitude Format */
nv_plane_state->degamma_multiplier = ((uint64_t) 1) << 32;
if (nv_dev->nv_plane_degamma_lut_property) {
drm_object_attach_property(
&plane->base, nv_dev->nv_plane_degamma_multiplier_property,
nv_plane_state->degamma_multiplier);
&plane->base, nv_dev->nv_plane_degamma_lut_property, 0);
}
if (nv_dev->nv_plane_degamma_lut_size_property) {
drm_object_attach_property(
&plane->base, nv_dev->nv_plane_degamma_lut_size_property,
NVKMS_LUT_ARRAY_SIZE);
}
}
if (nv_dev->nv_plane_degamma_lut_property) {
drm_object_attach_property(
&plane->base, nv_dev->nv_plane_degamma_lut_property, 0);
}
if (nv_dev->nv_plane_degamma_lut_size_property) {
drm_object_attach_property(
&plane->base, nv_dev->nv_plane_degamma_lut_size_property,
NVKMS_LUT_ARRAY_SIZE);
}
}
}
@@ -2776,6 +2898,29 @@ nv_drm_plane_create(struct drm_device *dev,
goto failed_plane_init;
}
#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
if (pResInfo->caps.supportsInputColorSpace &&
pResInfo->caps.supportsInputColorRange) {
nv_plane->supportsColorProperties = true;
drm_plane_create_color_properties(
plane,
NVBIT(DRM_COLOR_YCBCR_BT601) |
NVBIT(DRM_COLOR_YCBCR_BT709) |
NVBIT(DRM_COLOR_YCBCR_BT2020),
NVBIT(DRM_COLOR_YCBCR_FULL_RANGE) |
NVBIT(DRM_COLOR_YCBCR_LIMITED_RANGE),
DRM_COLOR_YCBCR_BT709,
DRM_COLOR_YCBCR_FULL_RANGE
);
} else {
nv_plane->supportsColorProperties = false;
}
#else
nv_plane->supportsColorProperties = false;
#endif
drm_plane_helper_add(plane, &nv_plane_helper_funcs);
if (plane_type != DRM_PLANE_TYPE_CURSOR) {

View File

@@ -191,6 +191,13 @@ struct nv_drm_plane {
*/
uint32_t layer_idx;
/**
* @supportsColorProperties
*
* If true, supports the COLOR_ENCODING and COLOR_RANGE properties.
*/
bool supportsColorProperties;
struct NvKmsLUTCaps ilut_caps;
struct NvKmsLUTCaps tmo_caps;
};
@@ -203,10 +210,23 @@ static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
return container_of(plane, struct nv_drm_plane, base);
}
struct nv_drm_lut_surface {
struct nv_drm_nvkms_surface {
struct NvKmsKapiDevice *pDevice;
struct NvKmsKapiMemory *nvkms_memory;
struct NvKmsKapiSurface *nvkms_surface;
void *buffer;
struct kref refcount;
};
struct nv_drm_nvkms_surface_params {
NvU32 width;
NvU32 height;
size_t surface_size;
enum NvKmsSurfaceMemoryFormat format;
};
struct nv_drm_lut_surface {
struct nv_drm_nvkms_surface base;
struct {
NvU32 vssSegments;
enum NvKmsLUTVssType vssType;
@@ -215,14 +235,12 @@ struct nv_drm_lut_surface {
enum NvKmsLUTFormat entryFormat;
} properties;
void *buffer;
struct kref refcount;
};
struct nv_drm_plane_state {
struct drm_plane_state base;
s32 __user *fd_user_ptr;
enum NvKmsInputColorSpace input_colorspace;
enum nv_drm_input_color_space input_colorspace;
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
struct drm_property_blob *hdr_output_metadata;
#endif

View File

@@ -35,6 +35,8 @@
#include "nvidia-drm-gem-nvkms-memory.h"
#include "nvidia-drm-gem-user-memory.h"
#include "nvidia-drm-gem-dma-buf.h"
#include "nvidia-drm-utils.h"
#include "nv_dpy_id.h"
#if defined(NV_DRM_AVAILABLE)
@@ -90,6 +92,7 @@
#include <linux/pci.h>
#include <linux/workqueue.h>
#include <linux/sort.h>
/*
* Commit fcd70cd36b9b ("drm: Split out drm_probe_helper.h")
@@ -120,15 +123,15 @@ static int nv_drm_revoke_sub_ownership(struct drm_device *dev);
static struct nv_drm_device *dev_list = NULL;
static char* nv_get_input_colorspace_name(
enum NvKmsInputColorSpace colorSpace)
static const char* nv_get_input_colorspace_name(
enum nv_drm_input_color_space colorSpace)
{
switch (colorSpace) {
case NVKMS_INPUT_COLORSPACE_NONE:
case NV_DRM_INPUT_COLOR_SPACE_NONE:
return "None";
case NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR:
case NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR:
return "scRGB Linear FP16";
case NVKMS_INPUT_COLORSPACE_BT2100_PQ:
case NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ:
return "BT.2100 PQ";
default:
/* We shoudn't hit this */
@@ -284,6 +287,123 @@ done:
mutex_unlock(&nv_dev->lock);
}
struct nv_drm_mst_display_info {
NvKmsKapiDisplay handle;
NvBool isDpMST;
char dpAddress[NVKMS_DP_ADDRESS_STRING_LENGTH];
};
/*
* Helper function to get DpMST display info.
* dpMSTDisplayInfos is allocated dynamically,
* so it needs to be freed after finishing the query.
*/
static int nv_drm_get_mst_display_infos
(
struct nv_drm_device *nv_dev,
NvKmsKapiDisplay hDisplay,
struct nv_drm_mst_display_info **dpMSTDisplayInfos,
NvU32 *nDynamicDisplays
)
{
struct NvKmsKapiStaticDisplayInfo *displayInfo = NULL;
struct NvKmsKapiStaticDisplayInfo *dynamicDisplayInfo = NULL;
struct NvKmsKapiConnectorInfo *connectorInfo = NULL;
struct nv_drm_mst_display_info *displayInfos = NULL;
NvU32 i = 0;
int ret = 0;
NVDpyId dpyId;
*nDynamicDisplays = 0;
/* Query NvKmsKapiStaticDisplayInfo and NvKmsKapiConnectorInfo */
if ((displayInfo = nv_drm_calloc(1, sizeof(*displayInfo))) == NULL) {
ret = -ENOMEM;
goto done;
}
if ((dynamicDisplayInfo = nv_drm_calloc(1, sizeof(*dynamicDisplayInfo))) == NULL) {
ret = -ENOMEM;
goto done;
}
if (!nvKms->getStaticDisplayInfo(nv_dev->pDevice, hDisplay, displayInfo)) {
ret = -EINVAL;
goto done;
}
connectorInfo = nvkms_get_connector_info(nv_dev->pDevice,
displayInfo->connectorHandle);
if (IS_ERR(connectorInfo)) {
ret = PTR_ERR(connectorInfo);
goto done;
}
*nDynamicDisplays = nvCountDpyIdsInDpyIdList(connectorInfo->dynamicDpyIdList);
if (*nDynamicDisplays == 0) {
goto done;
}
if ((displayInfos = nv_drm_calloc(*nDynamicDisplays, sizeof(*displayInfos))) == NULL) {
ret = -ENOMEM;
goto done;
}
FOR_ALL_DPY_IDS(dpyId, connectorInfo->dynamicDpyIdList) {
if (!nvKms->getStaticDisplayInfo(nv_dev->pDevice,
nvDpyIdToNvU32(dpyId),
dynamicDisplayInfo)) {
ret = -EINVAL;
nv_drm_free(displayInfos);
goto done;
}
displayInfos[i].handle = dynamicDisplayInfo->handle;
displayInfos[i].isDpMST = dynamicDisplayInfo->isDpMST;
memcpy(displayInfos[i].dpAddress, dynamicDisplayInfo->dpAddress, sizeof(dynamicDisplayInfo->dpAddress));
i++;
}
*dpMSTDisplayInfos = displayInfos;
done:
nv_drm_free(displayInfo);
nv_drm_free(dynamicDisplayInfo);
nv_drm_free(connectorInfo);
return ret;
}
static int nv_drm_disp_cmp (const void *l, const void *r)
{
struct nv_drm_mst_display_info *l_info = (struct nv_drm_mst_display_info *)l;
struct nv_drm_mst_display_info *r_info = (struct nv_drm_mst_display_info *)r;
return strcmp(l_info->dpAddress, r_info->dpAddress);
}
/*
* Helper function to sort the dpAddress in terms of string.
* This function is to create DRM connectors ID order deterministically.
* It's not numerically.
*/
static void nv_drm_sort_dynamic_displays_by_dp_addr
(
struct nv_drm_mst_display_info *infos,
int nDynamicDisplays
)
{
sort(infos, nDynamicDisplays, sizeof(*infos), nv_drm_disp_cmp, NULL);
}
/*
* Helper function to initialize drm_device::mode_config from
* NvKmsKapiDevice's resource information.
@@ -365,9 +485,11 @@ static void nv_drm_enumerate_encoders_and_connectors
nv_dev,
"Failed to enumurate NvKmsKapiDisplay handles");
} else {
NvU32 i;
NvU32 i, j;
NvU32 nDynamicDisplays = 0;
for (i = 0; i < nDisplays; i++) {
struct nv_drm_mst_display_info *displayInfos = NULL;
struct drm_encoder *encoder =
nv_drm_add_encoder(dev, hDisplays[i]);
@@ -377,6 +499,34 @@ static void nv_drm_enumerate_encoders_and_connectors
"Failed to add connector for NvKmsKapiDisplay 0x%08x",
hDisplays[i]);
}
if (nv_drm_get_mst_display_infos(nv_dev, hDisplays[i],
&displayInfos, &nDynamicDisplays)) {
NV_DRM_DEV_LOG_ERR(
nv_dev,
"Failed to get dynamic displays");
} else if (nDynamicDisplays) {
nv_drm_sort_dynamic_displays_by_dp_addr(displayInfos, nDynamicDisplays);
for (j = 0; j < nDynamicDisplays; j++) {
if (displayInfos[j].isDpMST) {
struct drm_encoder *mst_encoder =
nv_drm_add_encoder(dev, displayInfos[j].handle);
NV_DRM_DEV_DEBUG_DRIVER(nv_dev, "found DP MST port display handle %u",
displayInfos[j].handle);
if (IS_ERR(mst_encoder)) {
NV_DRM_DEV_LOG_ERR(
nv_dev,
"Failed to add connector for NvKmsKapiDisplay 0x%08x",
displayInfos[j].handle);
}
}
}
nv_drm_free(displayInfos);
}
}
}
@@ -602,6 +752,7 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
memset(&allocateDeviceParams, 0, sizeof(allocateDeviceParams));
allocateDeviceParams.gpuId = nv_dev->gpu_info.gpu_id;
allocateDeviceParams.migDevice = nv_dev->gpu_mig_device;
allocateDeviceParams.privateData = nv_dev;
allocateDeviceParams.eventCallback = nv_drm_event_callback;
@@ -672,6 +823,9 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
nv_dev->requiresVrrSemaphores = resInfo.caps.requiresVrrSemaphores;
nv_dev->vtFbBaseAddress = resInfo.vtFbBaseAddress;
nv_dev->vtFbSize = resInfo.vtFbSize;
#if defined(NV_DRM_FORMAT_MODIFIERS_PRESENT)
gen = nv_dev->pageKindGeneration;
kind = nv_dev->genericPageKind;
@@ -855,6 +1009,62 @@ static void nv_drm_master_set(struct drm_device *dev,
}
#endif
static
int nv_drm_reset_input_colorspace(struct drm_device *dev)
{
struct drm_atomic_state *state;
struct drm_plane_state *plane_state;
struct drm_plane *plane;
struct nv_drm_plane_state *nv_drm_plane_state;
struct drm_modeset_acquire_ctx ctx;
int ret = 0;
bool do_reset = false;
NvU32 flags = 0;
state = drm_atomic_state_alloc(dev);
if (!state)
return -ENOMEM;
#if defined(DRM_MODESET_ACQUIRE_INTERRUPTIBLE)
flags |= DRM_MODESET_ACQUIRE_INTERRUPTIBLE;
#endif
drm_modeset_acquire_init(&ctx, flags);
state->acquire_ctx = &ctx;
nv_drm_for_each_plane(plane, dev) {
plane_state = drm_atomic_get_plane_state(state, plane);
if (IS_ERR(plane_state)) {
ret = PTR_ERR(plane_state);
goto out;
}
nv_drm_plane_state = to_nv_drm_plane_state(plane_state);
if (nv_drm_plane_state) {
if (nv_drm_plane_state->input_colorspace != NV_DRM_INPUT_COLOR_SPACE_NONE) {
nv_drm_plane_state->input_colorspace = NV_DRM_INPUT_COLOR_SPACE_NONE;
do_reset = true;
}
}
}
if (do_reset) {
ret = drm_atomic_commit(state);
}
out:
#if defined(NV_DRM_ATOMIC_STATE_REF_COUNTING_PRESENT)
drm_atomic_state_put(state);
#else
// In case of success, drm_atomic_commit() takes care to cleanup and free state.
if (ret != 0) {
drm_atomic_state_free(state);
}
#endif
drm_modeset_drop_locks(&ctx);
drm_modeset_acquire_fini(&ctx);
return ret;
}
#if defined(NV_DRM_MASTER_DROP_HAS_FROM_RELEASE_ARG)
static
@@ -898,6 +1108,12 @@ void nv_drm_master_drop(struct drm_device *dev, struct drm_file *file_priv)
drm_modeset_unlock_all(dev);
nvKms->releaseOwnership(nv_dev->pDevice);
} else {
int err = nv_drm_reset_input_colorspace(dev);
if (err != 0) {
NV_DRM_DEV_LOG_WARN(nv_dev,
"nv_drm_reset_input_colorspace failed with error code: %d !", err);
}
}
}
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
@@ -935,6 +1151,7 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
}
params->gpu_id = nv_dev->gpu_info.gpu_id;
params->mig_device = nv_dev->gpu_mig_device;
params->primary_index = dev->primary->index;
params->supports_alloc = false;
params->generic_page_kind = 0;
@@ -1725,7 +1942,7 @@ static const struct file_operations nv_drm_fops = {
.llseek = noop_llseek,
#if defined(NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT)
#if defined(FOP_UNSIGNED_OFFSET)
.fop_flags = FOP_UNSIGNED_OFFSET,
#endif
};
@@ -1967,16 +2184,16 @@ void nv_drm_update_drm_driver_features(void)
/*
* Helper function for allocate/register DRM device for given NVIDIA GPU ID.
*/
void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
void nv_drm_register_drm_device(const struct NvKmsKapiGpuInfo *gpu_info)
{
struct nv_drm_device *nv_dev = NULL;
struct drm_device *dev = NULL;
struct device *device = gpu_info->os_device_ptr;
struct device *device = gpu_info->gpuInfo.os_device_ptr;
bool bus_is_pci;
DRM_DEBUG(
"Registering device for NVIDIA GPU ID 0x08%x",
gpu_info->gpu_id);
gpu_info->gpuInfo.gpu_id);
/* Allocate NVIDIA-DRM device */
@@ -1988,7 +2205,8 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
return;
}
nv_dev->gpu_info = *gpu_info;
nv_dev->gpu_info = gpu_info->gpuInfo;
nv_dev->gpu_mig_device = gpu_info->migDevice;
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
mutex_init(&nv_dev->lock);
@@ -2045,9 +2263,30 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
aperture_remove_conflicting_pci_devices(pdev, nv_drm_driver.name);
#endif
nvKms->framebufferConsoleDisabled(nv_dev->pDevice);
} else {
resource_size_t base = (resource_size_t) nv_dev->vtFbBaseAddress;
resource_size_t size = (resource_size_t) nv_dev->vtFbSize;
if (base > 0 && size > 0) {
#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_PRESENT)
#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_DRIVER_ARG)
drm_aperture_remove_conflicting_framebuffers(base, size, false, &nv_drm_driver);
#elif defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_NO_PRIMARY_ARG)
drm_aperture_remove_conflicting_framebuffers(base, size, &nv_drm_driver);
#else
drm_aperture_remove_conflicting_framebuffers(base, size, false, nv_drm_driver.name);
#endif
#elif defined(NV_APERTURE_REMOVE_CONFLICTING_DEVICES_PRESENT)
aperture_remove_conflicting_devices(base, size, nv_drm_driver.name);
#endif
} else {
NV_DRM_DEV_LOG_INFO(nv_dev, "Invalid framebuffer console info");
}
}
#if defined(NV_DRM_CLIENT_AVAILABLE)
drm_client_setup(dev, NULL);
drm_client_setup(dev, NULL);
#elif defined(NV_DRM_FBDEV_TTM_AVAILABLE)
drm_fbdev_ttm_setup(dev, 32);
#elif defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
@@ -2078,7 +2317,7 @@ failed_drm_alloc:
#if defined(NV_LINUX)
int nv_drm_probe_devices(void)
{
nv_gpu_info_t *gpu_info = NULL;
struct NvKmsKapiGpuInfo *gpu_info = NULL;
NvU32 gpu_count = 0;
NvU32 i;

View File

@@ -27,13 +27,15 @@
#if defined(NV_DRM_AVAILABLE)
struct NvKmsKapiGpuInfo;
int nv_drm_probe_devices(void);
void nv_drm_remove_devices(void);
void nv_drm_suspend_resume(NvBool suspend);
void nv_drm_register_drm_device(const nv_gpu_info_t *);
void nv_drm_register_drm_device(const struct NvKmsKapiGpuInfo *);
void nv_drm_update_drm_driver_features(void);

View File

@@ -319,7 +319,7 @@ void nv_drm_handle_dynamic_display_connected(struct nv_drm_device *nv_dev,
nv_encoder = get_nv_encoder_from_nvkms_display(dev, hDisplay);
if (nv_encoder != NULL) {
NV_DRM_DEV_LOG_ERR(
NV_DRM_DEV_LOG_INFO(
nv_dev,
"Encoder with NvKmsKapiDisplay 0x%08x already exists.",
hDisplay);

View File

@@ -202,6 +202,43 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
params.explicit_layout = false;
}
/*
* XXX work around an invalid pitch assumption in DRM.
*
* The smallest pitch the display hardware allows is 256.
*
* If a DRM client allocates a 32x32 cursor surface through
* DRM_IOCTL_MODE_CREATE_DUMB, we'll correctly round the pitch to 256:
*
* pitch = round(32width * 4Bpp, 256) = 256
*
* and then allocate an 8k surface:
*
* size = pitch * 32height = 8196
*
* and report the rounded pitch and size back to the client through the
* struct drm_mode_create_dumb ioctl params.
*
* But when the DRM client passes that buffer object handle to
* DRM_IOCTL_MODE_CURSOR, the client has no way to specify the pitch. This
* path in drm:
*
* DRM_IOCTL_MODE_CURSOR
* drm_mode_cursor_ioctl()
* drm_mode_cursor_common()
* drm_mode_cursor_universal()
*
* will implicitly create a framebuffer from the buffer object, and compute
* the pitch as width x 32 (without aligning to our minimum pitch).
*
* Intercept this case and force the pitch back to 256.
*/
if ((params.width == 32) &&
(params.height == 32) &&
(params.planes[0].pitch == 128)) {
params.planes[0].pitch = 256;
}
/* Create NvKmsKapiSurface */
nv_fb->pSurface = nvKms->createSurface(nv_dev->pDevice, &params);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2016-2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -1132,7 +1132,7 @@ static void __nv_drm_semsurf_fence_ctx_destroy(
*/
nv_drm_workthread_shutdown(&ctx->worker);
nv_drm_del_timer_sync(&ctx->timer);
nv_timer_delete_sync(&ctx->timer.kernel_timer);
/*
* The semaphore surface could still be sending callbacks, so it is still

View File

@@ -166,4 +166,37 @@ uint32_t *nv_drm_format_array_alloc(
return array;
}
bool nv_drm_format_is_yuv(u32 format)
{
#if defined(NV_DRM_FORMAT_INFO_HAS_IS_YUV)
const struct drm_format_info *format_info = drm_format_info(format);
return (format_info != NULL) && format_info->is_yuv;
#else
switch (format) {
case DRM_FORMAT_YUYV:
case DRM_FORMAT_UYVY:
case DRM_FORMAT_NV24:
case DRM_FORMAT_NV42:
case DRM_FORMAT_NV16:
case DRM_FORMAT_NV61:
case DRM_FORMAT_NV12:
case DRM_FORMAT_NV21:
#if defined(DRM_FORMAT_P210)
case DRM_FORMAT_P210:
#endif
#if defined(DRM_FORMAT_P010)
case DRM_FORMAT_P010:
#endif
#if defined(DRM_FORMAT_P012)
case DRM_FORMAT_P012:
#endif
return true;
default:
return false;
}
#endif
}
#endif

View File

@@ -38,6 +38,8 @@ uint32_t *nv_drm_format_array_alloc(
unsigned int *count,
const long unsigned int nvkms_format_mask);
bool nv_drm_format_is_yuv(u32 format);
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#endif /* __NVIDIA_DRM_FORMAT_H__ */

View File

@@ -308,12 +308,12 @@ static int __nv_drm_nvkms_gem_obj_init(
nv_nvkms_memory->pWriteCombinedIORemapAddress = NULL;
nv_nvkms_memory->physically_mapped = false;
if (!nvKms->getMemoryPages(nv_dev->pDevice,
if (!nvKms->isVidmem(pMemory) &&
!nvKms->getMemoryPages(nv_dev->pDevice,
pMemory,
&pages,
&numPages) &&
!nvKms->isVidmem(pMemory)) {
/* GetMemoryPages may fail for vidmem allocations,
&numPages)) {
/* GetMemoryPages will fail for vidmem allocations,
* but it should not fail for sysmem allocations. */
NV_DRM_DEV_LOG_ERR(nv_dev,
"Failed to get memory pages for NvKmsKapiMemory 0x%p",

View File

@@ -69,6 +69,13 @@
#endif //NV_DRM_ROTATION_AVAILABLE
/*
* Commit 1e13c5644c44 ("drm/drm_mode_object: increase max objects to
* accommodate new color props") in Linux v6.8 increased the pre-object
* property limit to from 24 to 64.
*/
#define NV_DRM_USE_EXTENDED_PROPERTIES (DRM_OBJECT_MAX_PROPERTY >= 64)
/*
* drm_dev_put() is added by commit 9a96f55034e41b4e002b767e9218d55f03bdff7d
* (2017-09-26) and drm_dev_unref() is removed by

View File

@@ -182,6 +182,7 @@ struct drm_nvidia_gem_import_userspace_memory_params {
struct drm_nvidia_get_dev_info_params {
uint32_t gpu_id; /* OUT */
uint32_t mig_device; /* OUT */
uint32_t primary_index; /* OUT; the "card%d" value */
uint32_t supports_alloc; /* OUT */

View File

@@ -677,6 +677,33 @@ int nv_drm_atomic_commit(struct drm_device *dev,
"Flip event timeout on head %u", nv_crtc->head);
}
}
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
/*
* If the legacy LUT needs to be updated, ensure that the previous LUT
* update is complete first.
*/
if (crtc_state->color_mgmt_changed) {
NvBool complete = nvKms->checkLutNotifier(nv_dev->pDevice,
nv_crtc->head,
!nonblock /* waitForCompletion */);
/* If checking the LUT notifier failed, assume no LUT notifier is set. */
if (!complete) {
if (nonblock) {
return -EBUSY;
} else {
/*
* checkLutNotifier should wait on the notifier in this
* case, so we should only get here if the wait timed out.
*/
NV_DRM_DEV_LOG_ERR(
nv_dev,
"LUT notifier timeout on head %u", nv_crtc->head);
}
}
}
#endif
}
#if defined(NV_DRM_ATOMIC_HELPER_SWAP_STATE_HAS_STALL_ARG)
@@ -803,6 +830,19 @@ int nv_drm_atomic_commit(struct drm_device *dev,
__nv_drm_handle_flip_event(nv_crtc);
}
}
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
if (crtc_state->color_mgmt_changed) {
NvBool complete = nvKms->checkLutNotifier(nv_dev->pDevice,
nv_crtc->head,
true /* waitForCompletion */);
if (!complete) {
NV_DRM_DEV_LOG_ERR(
nv_dev,
"LUT notifier timeout on head %u", nv_crtc->head);
}
}
#endif
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2023, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -236,15 +236,6 @@ unsigned long nv_drm_timeout_from_ms(NvU64 relative_timeout_ms)
return jiffies + msecs_to_jiffies(relative_timeout_ms);
}
bool nv_drm_del_timer_sync(nv_drm_timer *timer)
{
if (del_timer_sync(&timer->kernel_timer)) {
return true;
} else {
return false;
}
}
#if defined(NV_DRM_FENCE_AVAILABLE)
int nv_drm_create_sync_file(nv_dma_fence_t *fence)
{

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -58,16 +58,6 @@ typedef struct nv_timer nv_drm_timer;
#error "Need to define kernel timer callback primitives for this OS"
#endif
#if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
#define NV_DRM_FBDEV_AVAILABLE
#define NV_DRM_FBDEV_GENERIC_AVAILABLE
#endif
#if defined(NV_DRM_FBDEV_TTM_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
#define NV_DRM_FBDEV_AVAILABLE
#define NV_DRM_FBDEV_TTM_AVAILABLE
#endif
struct page;
/* Set to true when the atomic modeset feature is enabled. */
@@ -111,8 +101,6 @@ void nv_drm_timer_setup(nv_drm_timer *timer,
void nv_drm_mod_timer(nv_drm_timer *timer, unsigned long relative_timeout_ms);
bool nv_drm_del_timer_sync(nv_drm_timer *timer);
unsigned long nv_drm_timer_now(void);
unsigned long nv_drm_timeout_from_ms(NvU64 relative_timeout_ms);

View File

@@ -85,8 +85,15 @@
DRM_DEBUG_DRIVER("[GPU ID 0x%08x] " __fmt, \
__dev->gpu_info.gpu_id, ##__VA_ARGS__)
enum nv_drm_input_color_space {
NV_DRM_INPUT_COLOR_SPACE_NONE,
NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR,
NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ
};
struct nv_drm_device {
nv_gpu_info_t gpu_info;
MIGDeviceId gpu_mig_device;
struct drm_device *dev;
@@ -182,6 +189,9 @@ struct nv_drm_device {
struct drm_property *nv_crtc_regamma_divisor_property;
struct nv_drm_device *next;
NvU64 vtFbBaseAddress;
NvU64 vtFbSize;
};
static inline NvU32 nv_drm_next_display_semaphore(

View File

@@ -37,6 +37,8 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_dec_and_test
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_alpha_blending_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_fd_to_handle
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_handle_to_fd
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl___vma_start_write
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
@@ -65,6 +67,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_fence_set_error
NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_devices
NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_pci_devices
@@ -74,6 +77,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_client_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_plane_create_color_properties
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_legacy_gamma_set
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_mixed
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pfn_to_pfn_t
@@ -133,6 +137,8 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers_has_driver_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers_has_no_primary_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers_has_driver_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_syncobj_features_present
@@ -140,8 +146,9 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_framebuffer_obj_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_ctm_3x4_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_lut
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_info_has_is_yuv
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_property_blob_put
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_prime_mmap
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_helper_funcs_mode_valid_has_const_mode_arg

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2015-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2015-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -53,6 +53,7 @@
#include "nv-procfs.h"
#include "nv-kthread-q.h"
#include "nv-time.h"
#include "nv-timer.h"
#include "nv-lock.h"
#include "nv-chardev-numbers.h"
@@ -102,6 +103,11 @@ MODULE_PARM_DESC(malloc_verbose, "Report information about malloc calls on modul
static bool malloc_verbose = false;
module_param_named(malloc_verbose, malloc_verbose, bool, 0400);
MODULE_PARM_DESC(conceal_vrr_caps,
"Conceal all display VRR capabilities");
static bool conceal_vrr_caps = false;
module_param_named(conceal_vrr_caps, conceal_vrr_caps, bool, 0400);
/* Fail allocating the RM core channel for NVKMS using the i-th method (see
* FailAllocCoreChannelMethod). Failures not using the i-th method are ignored. */
MODULE_PARM_DESC(fail_alloc_core_channel, "Control testing for hardware core channel allocation failure");
@@ -135,7 +141,12 @@ NvBool nvkms_test_fail_alloc_core_channel(
return NV_TRUE;
}
NvBool nvkms_conceal_vrr_caps(void)
{
return conceal_vrr_caps;
}
NvBool nvkms_output_rounding_fix(void)
{
return output_rounding_fix;
@@ -738,7 +749,7 @@ static void nvkms_kthread_q_callback(void *arg)
* pending timers and than waiting for workqueue callbacks.
*/
if (timer->kernel_timer_created) {
del_timer_sync(&timer->kernel_timer);
nv_timer_delete_sync(&timer->kernel_timer);
}
/*
@@ -1922,7 +1933,11 @@ restart:
* completion, and we wait for queue completion with
* nv_kthread_q_stop below.
*/
#if !defined(NV_BSD) && NV_IS_EXPORT_SYMBOL_PRESENT_timer_delete_sync
if (timer_delete_sync(&timer->kernel_timer) == 1) {
#else
if (del_timer_sync(&timer->kernel_timer) == 1) {
#endif
/* We've deactivated timer so we need to clean after it */
list_del(&timer->timers_list);

View File

@@ -110,6 +110,7 @@ enum FailAllocCoreChannelMethod {
};
NvBool nvkms_test_fail_alloc_core_channel(enum FailAllocCoreChannelMethod method);
NvBool nvkms_conceal_vrr_caps(void);
NvBool nvkms_output_rounding_fix(void);
NvBool nvkms_disable_hdmi_frl(void);
NvBool nvkms_disable_vrr_memclk_switch(void);

View File

@@ -52,7 +52,7 @@ nvidia-modeset-y += $(NVIDIA_MODESET_BINARY_OBJECT_O)
# Define nvidia-modeset.ko-specific CFLAGS.
#
NVIDIA_MODESET_CFLAGS += -I$(src)/nvidia-modeset
NVIDIA_MODESET_CFLAGS += -I$(src)/nvidia-modeset -I$(src)/common/inc
NVIDIA_MODESET_CFLAGS += -UDEBUG -U_DEBUG -DNDEBUG -DNV_BUILD_MODULE_INSTANCES=0
# Some Android kernels prohibit driver use of filesystem functions like
@@ -103,4 +103,5 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_register_backlight
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kernel_read_has_pointer_pos_arg

View File

@@ -453,35 +453,19 @@ typedef struct nvidia_p2p_rsync_reg_info {
/*
* @brief
* Gets rsync (GEN-ID) register information associated with the supported
* NPUs.
*
* The caller would use the returned information {GPU device, NPU device,
* socket-id, cluster-id} to pick the optimal generation registers to issue
* RSYNC (NVLink HW flush).
*
* The interface allocates structures to return the information, hence
* nvidia_p2p_put_rsync_registers() must be called to free the structures.
*
* Note, cluster-id is hardcoded to zero as early system configurations would
* only support cluster mode i.e. all devices would share the same cluster-id
* (0). In the future, appropriate kernel support would be needed to query
* cluster-ids.
*
* @param[out] reg_info
* A pointer to the rsync reg info structure.
* This interface is no longer supported and will always return an error. It
* is left in place (for now) to allow third-party callers to build without
* any errors.
*
* @Returns
* 0 Upon successful completion. Otherwise, returns negative value.
* -ENODEV
*/
int nvidia_p2p_get_rsync_registers(nvidia_p2p_rsync_reg_info_t **reg_info);
/*
* @brief
* Frees the structures allocated by nvidia_p2p_get_rsync_registers().
*
* @param[in] reg_info
* A pointer to the rsync reg info structure.
* This interface is no longer supported. It is left in place (for now) to
* allow third-party callers to build without any errors.
*/
void nvidia_p2p_put_rsync_registers(nvidia_p2p_rsync_reg_info_t *reg_info);

View File

@@ -1,51 +1,31 @@
/*******************************************************************************
Copyright (c) 2012-2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _clc86f_h_
#define _clc86f_h_
#ifdef __cplusplus
extern "C" {
#endif
#include "nvtypes.h"
/* class HOPPER_CHANNEL_GPFIFO */
/*
* Documentation for HOPPER_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
* chapter "User Control Registers". It is documented as device NV_UDMA.
* The GPFIFO format itself is also documented in dev_pbdma.ref,
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Note there is no .mfs file for this class.
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __gh100_clc86f_h__
#define __gh100_clc86f_h__
#define HOPPER_CHANNEL_GPFIFO_A (0x0000C86F)
#define NVC86F_TYPEDEF HOPPER_CHANNELChannelGPFifoA
/* dma flow control data structure */
typedef volatile struct Nvc86fControl_struct {
NvU32 Ignored00[0x010]; /* 0000-003f*/
NvU32 Put; /* put offset, read/write 0040-0043*/
@@ -64,54 +44,7 @@ typedef volatile struct Nvc86fControl_struct {
NvU32 Ignored05[0x5c];
} Nvc86fControl, HopperAControlGPFifo;
/* fields and values */
#define NVC86F_NUMBER_OF_SUBCHANNELS (8)
#define NVC86F_SET_OBJECT (0x00000000)
#define NVC86F_SET_OBJECT_NVCLASS 15:0
#define NVC86F_SET_OBJECT_ENGINE 20:16
#define NVC86F_SET_OBJECT_ENGINE_SW 0x0000001f
#define NVC86F_ILLEGAL (0x00000004)
#define NVC86F_ILLEGAL_HANDLE 31:0
#define NVC86F_NOP (0x00000008)
#define NVC86F_NOP_HANDLE 31:0
#define NVC86F_SEMAPHOREA (0x00000010)
#define NVC86F_SEMAPHOREA_OFFSET_UPPER 7:0
#define NVC86F_SEMAPHOREB (0x00000014)
#define NVC86F_SEMAPHOREB_OFFSET_LOWER 31:2
#define NVC86F_SEMAPHOREC (0x00000018)
#define NVC86F_SEMAPHOREC_PAYLOAD 31:0
#define NVC86F_SEMAPHORED (0x0000001C)
#define NVC86F_SEMAPHORED_OPERATION 4:0
#define NVC86F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
#define NVC86F_SEMAPHORED_OPERATION_RELEASE 0x00000002
#define NVC86F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
#define NVC86F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
#define NVC86F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH 12:12
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
#define NVC86F_SEMAPHORED_RELEASE_WFI 20:20
#define NVC86F_SEMAPHORED_RELEASE_WFI_EN 0x00000000
#define NVC86F_SEMAPHORED_RELEASE_WFI_DIS 0x00000001
#define NVC86F_SEMAPHORED_RELEASE_SIZE 24:24
#define NVC86F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
#define NVC86F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
#define NVC86F_SEMAPHORED_REDUCTION 30:27
#define NVC86F_SEMAPHORED_REDUCTION_MIN 0x00000000
#define NVC86F_SEMAPHORED_REDUCTION_MAX 0x00000001
#define NVC86F_SEMAPHORED_REDUCTION_XOR 0x00000002
#define NVC86F_SEMAPHORED_REDUCTION_AND 0x00000003
#define NVC86F_SEMAPHORED_REDUCTION_OR 0x00000004
#define NVC86F_SEMAPHORED_REDUCTION_ADD 0x00000005
#define NVC86F_SEMAPHORED_REDUCTION_INC 0x00000006
#define NVC86F_SEMAPHORED_REDUCTION_DEC 0x00000007
#define NVC86F_SEMAPHORED_FORMAT 31:31
#define NVC86F_SEMAPHORED_FORMAT_SIGNED 0x00000000
#define NVC86F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
#define NVC86F_NON_STALL_INTERRUPT (0x00000020)
#define NVC86F_NON_STALL_INTERRUPT_HANDLE 31:0
#define NVC86F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
#define NVC86F_FB_FLUSH_HANDLE 31:0
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
// specifying the page address for a targeted TLB invalidate and the uTLB for
// a targeted REPLAY_CANCEL for UVM.
@@ -206,67 +139,31 @@ typedef volatile struct Nvc86fControl_struct {
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE 23:20
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE_RESERVED 0x00000000
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE_VIDMEM_ACCESS_BIT_DUMP 0x00000001
#define NVC86F_SET_REFERENCE (0x00000050)
#define NVC86F_SET_REFERENCE_COUNT 31:0
#define NVC86F_SEM_ADDR_LO (0x0000005c)
#define NVC86F_SEM_ADDR_LO_OFFSET 31:2
#define NVC86F_SEM_ADDR_HI (0x00000060)
#define NVC86F_SEM_ADDR_HI_OFFSET 24:0
#define NVC86F_SEM_PAYLOAD_LO (0x00000064)
#define NVC86F_SEM_PAYLOAD_LO_PAYLOAD 31:0
#define NVC86F_SEM_PAYLOAD_HI (0x00000068)
#define NVC86F_SEM_PAYLOAD_HI_PAYLOAD 31:0
#define NVC86F_SEM_EXECUTE (0x0000006c)
#define NVC86F_SEM_EXECUTE_OPERATION 2:0
#define NVC86F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
#define NVC86F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ 0x00000002
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ 0x00000003
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_AND 0x00000004
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_NOR 0x00000005
#define NVC86F_SEM_EXECUTE_OPERATION_REDUCTION 0x00000006
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG 12:12
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS 0x00000000
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN 0x00000001
#define NVC86F_SEM_EXECUTE_RELEASE_WFI 20:20
#define NVC86F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
#define NVC86F_SEM_EXECUTE_RELEASE_WFI_EN 0x00000001
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT 0x00000001
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP 25:25
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS 0x00000000
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN 0x00000001
#define NVC86F_SEM_EXECUTE_REDUCTION 30:27
#define NVC86F_SEM_EXECUTE_REDUCTION_IMIN 0x00000000
#define NVC86F_SEM_EXECUTE_REDUCTION_IMAX 0x00000001
#define NVC86F_SEM_EXECUTE_REDUCTION_IXOR 0x00000002
#define NVC86F_SEM_EXECUTE_REDUCTION_IAND 0x00000003
#define NVC86F_SEM_EXECUTE_REDUCTION_IOR 0x00000004
#define NVC86F_SEM_EXECUTE_REDUCTION_IADD 0x00000005
#define NVC86F_SEM_EXECUTE_REDUCTION_INC 0x00000006
#define NVC86F_SEM_EXECUTE_REDUCTION_DEC 0x00000007
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT 31:31
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED 0x00000000
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED 0x00000001
#define NVC86F_WFI (0x00000078)
#define NVC86F_WFI_SCOPE 0:0
#define NVC86F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
#define NVC86F_WFI_SCOPE_CURRENT_VEID 0x00000000
#define NVC86F_WFI_SCOPE_ALL 0x00000001
#define NVC86F_YIELD (0x00000080)
#define NVC86F_YIELD_OP 1:0
#define NVC86F_YIELD_OP_NOP 0x00000000
#define NVC86F_YIELD_OP_TSG 0x00000003
#define NVC86F_CLEAR_FAULTED (0x00000084)
// Note: RM provides the HANDLE as an opaque value; the internal detail fields
// are intentionally not exposed to the driver through these defines.
#define NVC86F_CLEAR_FAULTED_HANDLE 30:0
#define NVC86F_CLEAR_FAULTED_TYPE 31:31
#define NVC86F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
#define NVC86F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
#define NVC86F_QUADRO_VERIFY (0x000000a0)
/* GPFIFO entry format */
#define NVC86F_GP_ENTRY__SIZE 8
@@ -291,85 +188,4 @@ typedef volatile struct Nvc86fControl_struct {
#define NVC86F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
#define NVC86F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE 0x00000004
/* dma method formats */
#define NVC86F_DMA_METHOD_ADDRESS_OLD 12:2
#define NVC86F_DMA_METHOD_ADDRESS 11:0
#define NVC86F_DMA_SUBDEVICE_MASK 15:4
#define NVC86F_DMA_METHOD_SUBCHANNEL 15:13
#define NVC86F_DMA_TERT_OP 17:16
#define NVC86F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
#define NVC86F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
#define NVC86F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
#define NVC86F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
#define NVC86F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
#define NVC86F_DMA_METHOD_COUNT_OLD 28:18
#define NVC86F_DMA_METHOD_COUNT 28:16
#define NVC86F_DMA_IMMD_DATA 28:16
#define NVC86F_DMA_SEC_OP 31:29
#define NVC86F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
#define NVC86F_DMA_SEC_OP_INC_METHOD (0x00000001)
#define NVC86F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
#define NVC86F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
#define NVC86F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
#define NVC86F_DMA_SEC_OP_ONE_INC (0x00000005)
#define NVC86F_DMA_SEC_OP_RESERVED6 (0x00000006)
#define NVC86F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
/* dma incrementing method format */
#define NVC86F_DMA_INCR_ADDRESS 11:0
#define NVC86F_DMA_INCR_SUBCHANNEL 15:13
#define NVC86F_DMA_INCR_COUNT 28:16
#define NVC86F_DMA_INCR_OPCODE 31:29
#define NVC86F_DMA_INCR_OPCODE_VALUE (0x00000001)
#define NVC86F_DMA_INCR_DATA 31:0
/* dma non-incrementing method format */
#define NVC86F_DMA_NONINCR_ADDRESS 11:0
#define NVC86F_DMA_NONINCR_SUBCHANNEL 15:13
#define NVC86F_DMA_NONINCR_COUNT 28:16
#define NVC86F_DMA_NONINCR_OPCODE 31:29
#define NVC86F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
#define NVC86F_DMA_NONINCR_DATA 31:0
/* dma increment-once method format */
#define NVC86F_DMA_ONEINCR_ADDRESS 11:0
#define NVC86F_DMA_ONEINCR_SUBCHANNEL 15:13
#define NVC86F_DMA_ONEINCR_COUNT 28:16
#define NVC86F_DMA_ONEINCR_OPCODE 31:29
#define NVC86F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
#define NVC86F_DMA_ONEINCR_DATA 31:0
/* dma no-operation format */
#define NVC86F_DMA_NOP (0x00000000)
/* dma immediate-data format */
#define NVC86F_DMA_IMMD_ADDRESS 11:0
#define NVC86F_DMA_IMMD_SUBCHANNEL 15:13
#define NVC86F_DMA_IMMD_DATA 28:16
#define NVC86F_DMA_IMMD_OPCODE 31:29
#define NVC86F_DMA_IMMD_OPCODE_VALUE (0x00000004)
/* dma set sub-device mask format */
#define NVC86F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
#define NVC86F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
#define NVC86F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
/* dma store sub-device mask format */
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
/* dma use sub-device mask format */
#define NVC86F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
#define NVC86F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
/* dma end-segment format */
#define NVC86F_DMA_ENDSEG_OPCODE 31:29
#define NVC86F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
/* dma legacy incrementing/non-incrementing formats */
#define NVC86F_DMA_ADDRESS 12:2
#define NVC86F_DMA_SUBCH 15:13
#define NVC86F_DMA_OPCODE3 17:16
#define NVC86F_DMA_OPCODE3_NONE (0x00000000)
#define NVC86F_DMA_COUNT 28:18
#define NVC86F_DMA_OPCODE 31:29
#define NVC86F_DMA_OPCODE_METHOD (0x00000000)
#define NVC86F_DMA_OPCODE_NONINC_METHOD (0x00000002)
#define NVC86F_DMA_DATA 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* _clc86f_h_ */
#endif // __gh100_clc86f_h__

View File

@@ -1,160 +1,46 @@
/*******************************************************************************
Copyright (c) 1993-2004 NVIDIA Corporation
/*
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
#ifndef __gh100_clc8b5_h__
#define __gh100_clc8b5_h__
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "nvtypes.h"
#ifndef _clc8b5_h_
#define _clc8b5_h_
#ifdef __cplusplus
extern "C" {
#endif
#define HOPPER_DMA_COPY_A (0x0000C8B5)
typedef volatile struct _clc8b5_tag0 {
NvV32 Reserved00[0x40];
NvV32 Nop; // 0x00000100 - 0x00000103
NvV32 Reserved01[0xF];
NvV32 PmTrigger; // 0x00000140 - 0x00000143
NvV32 Reserved02[0x36];
NvV32 SetMonitoredFenceType; // 0x0000021C - 0x0000021F
NvV32 SetMonitoredFenceSignalAddrBaseUpper; // 0x00000220 - 0x00000223
NvV32 SetMonitoredFenceSignalAddrBaseLower; // 0x00000224 - 0x00000227
NvV32 Reserved03[0x6];
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
NvV32 SetSemaphorePayloadUpper; // 0x0000024C - 0x0000024F
NvV32 Reserved04[0x1];
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
NvV32 Reserved05[0x26];
NvV32 LaunchDma; // 0x00000300 - 0x00000303
NvV32 Reserved06[0x3F];
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
NvV32 PitchIn; // 0x00000410 - 0x00000413
NvV32 PitchOut; // 0x00000414 - 0x00000417
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
NvV32 LineCount; // 0x0000041C - 0x0000041F
NvV32 Reserved07[0x38];
NvV32 SetSecureCopyMode; // 0x00000500 - 0x00000503
NvV32 SetDecryptIv0; // 0x00000504 - 0x00000507
NvV32 SetDecryptIv1; // 0x00000508 - 0x0000050B
NvV32 SetDecryptIv2; // 0x0000050C - 0x0000050F
NvV32 Reserved_SetAESCounter; // 0x00000510 - 0x00000513
NvV32 SetDecryptAuthTagCompareAddrUpper; // 0x00000514 - 0x00000517
NvV32 SetDecryptAuthTagCompareAddrLower; // 0x00000518 - 0x0000051B
NvV32 Reserved08[0x5];
NvV32 SetEncryptAuthTagAddrUpper; // 0x00000530 - 0x00000533
NvV32 SetEncryptAuthTagAddrLower; // 0x00000534 - 0x00000537
NvV32 SetEncryptIvAddrUpper; // 0x00000538 - 0x0000053B
NvV32 SetEncryptIvAddrLower; // 0x0000053C - 0x0000053F
NvV32 Reserved09[0x6F];
NvV32 SetMemoryScrubParameters; // 0x000006FC - 0x000006FF
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
NvV32 Reserved10[0x1];
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
NvV32 Reserved11[0x1];
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
NvV32 DstOriginY; // 0x00000750 - 0x00000753
NvV32 Reserved12[0x270];
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
NvV32 Reserved13[0x3BA];
} hopper_dma_copy_aControlPio;
#define NVC8B5_NOP (0x00000100)
#define NVC8B5_NOP_PARAMETER 31:0
#define NVC8B5_PM_TRIGGER (0x00000140)
#define NVC8B5_PM_TRIGGER_V 31:0
#define NVC8B5_SET_MONITORED_FENCE_TYPE (0x0000021C)
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE 0:0
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE (0x00000000)
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE_EXT (0x00000001)
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER (0x00000220)
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER_UPPER 24:0
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER (0x00000224)
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER_LOWER 31:0
#define HOPPER_DMA_COPY_A (0x0000C8B5)
#define NVC8B5_SET_SEMAPHORE_A (0x00000240)
#define NVC8B5_SET_SEMAPHORE_A_UPPER 24:0
#define NVC8B5_SET_SEMAPHORE_B (0x00000244)
#define NVC8B5_SET_SEMAPHORE_B_LOWER 31:0
#define NVC8B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_UPPER (0x0000024C)
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD 31:0
#define NVC8B5_SET_RENDER_ENABLE_A (0x00000254)
#define NVC8B5_SET_RENDER_ENABLE_A_UPPER 24:0
#define NVC8B5_SET_RENDER_ENABLE_B (0x00000258)
#define NVC8B5_SET_RENDER_ENABLE_B_LOWER 31:0
#define NVC8B5_SET_RENDER_ENABLE_C (0x0000025C)
#define NVC8B5_SET_RENDER_ENABLE_C_MODE 2:0
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
#define NVC8B5_SET_SRC_PHYS_MODE (0x00000260)
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET 1:0
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM (0x00000003)
#define NVC8B5_SET_SRC_PHYS_MODE_BASIC_KIND 5:2
#define NVC8B5_SET_SRC_PHYS_MODE_PEER_ID 8:6
#define NVC8B5_SET_SRC_PHYS_MODE_FLA 9:9
#define NVC8B5_SET_DST_PHYS_MODE (0x00000264)
#define NVC8B5_SET_DST_PHYS_MODE_TARGET 1:0
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_PEERMEM (0x00000003)
#define NVC8B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
#define NVC8B5_SET_DST_PHYS_MODE_PEER_ID 8:6
#define NVC8B5_SET_DST_PHYS_MODE_FLA 9:9
#define NVC8B5_LAUNCH_DMA (0x00000300)
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
@@ -167,80 +53,41 @@ typedef volatile struct _clc8b5_tag0 {
#define NVC8B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
#define NVC8B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_NO_TIMESTAMP (0x00000001)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_WITH_TIMESTAMP (0x00000002)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE (0x00000003)
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE 10:10
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE 11:11
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE (0x00000000)
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE (0x00000001)
#define NVC8B5_LAUNCH_DMA_SRC_TYPE 12:12
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
#define NVC8B5_LAUNCH_DMA_DST_TYPE 13:13
#define NVC8B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
#define NVC8B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDA (0x00000008)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDB (0x00000009)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMIN (0x0000000B)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMAX (0x0000000C)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDC (0x0000000D)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDD (0x0000000E)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDE (0x0000000F)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
#define NVC8B5_LAUNCH_DMA_COPY_TYPE 21:20
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_PROT2PROT (0x00000000)
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_DEFAULT (0x00000000)
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_SECURE (0x00000001)
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_NONPROT2NONPROT (0x00000002)
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_RESERVED (0x00000003)
#define NVC8B5_LAUNCH_DMA_VPRMODE 22:22
#define NVC8B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
#define NVC8B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE 23:23
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_FALSE (0x00000000)
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_TRUE (0x00000001)
#define NVC8B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC 26:26
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC_TRUE (0x00000001)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE 27:27
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_ONE_WORD (0x00000000)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_TWO_WORD (0x00000001)
#define NVC8B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
#define NVC8B5_OFFSET_IN_UPPER (0x00000400)
#define NVC8B5_OFFSET_IN_UPPER_UPPER 24:0
#define NVC8B5_OFFSET_IN_LOWER (0x00000404)
@@ -249,41 +96,11 @@ typedef volatile struct _clc8b5_tag0 {
#define NVC8B5_OFFSET_OUT_UPPER_UPPER 24:0
#define NVC8B5_OFFSET_OUT_LOWER (0x0000040C)
#define NVC8B5_OFFSET_OUT_LOWER_VALUE 31:0
#define NVC8B5_PITCH_IN (0x00000410)
#define NVC8B5_PITCH_IN_VALUE 31:0
#define NVC8B5_PITCH_OUT (0x00000414)
#define NVC8B5_PITCH_OUT_VALUE 31:0
#define NVC8B5_LINE_LENGTH_IN (0x00000418)
#define NVC8B5_LINE_LENGTH_IN_VALUE 31:0
#define NVC8B5_LINE_COUNT (0x0000041C)
#define NVC8B5_LINE_COUNT_VALUE 31:0
#define NVC8B5_SET_SECURE_COPY_MODE (0x00000500)
#define NVC8B5_SET_SECURE_COPY_MODE_MODE 0:0
#define NVC8B5_SET_SECURE_COPY_MODE_MODE_ENCRYPT (0x00000000)
#define NVC8B5_SET_SECURE_COPY_MODE_MODE_DECRYPT (0x00000001)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET 20:19
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_LOCAL_FB (0x00000000)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_PEERMEM (0x00000003)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_PEER_ID 23:21
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_FLA 24:24
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET 26:25
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_LOCAL_FB (0x00000000)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_PEERMEM (0x00000003)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_PEER_ID 29:27
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_FLA 30:30
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_END_OF_COPY 31:31
#define NVC8B5_SET_DECRYPT_IV0 (0x00000504)
#define NVC8B5_SET_DECRYPT_IV0_VALUE 31:0
#define NVC8B5_SET_DECRYPT_IV1 (0x00000508)
#define NVC8B5_SET_DECRYPT_IV1_VALUE 31:0
#define NVC8B5_SET_DECRYPT_IV2 (0x0000050C)
#define NVC8B5_SET_DECRYPT_IV2_VALUE 31:0
#define NVC8B5_RESERVED_SET_AESCOUNTER (0x00000510)
#define NVC8B5_RESERVED_SET_AESCOUNTER_VALUE 31:0
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER (0x00000514)
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER_UPPER 24:0
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER (0x00000518)
@@ -299,132 +116,18 @@ typedef volatile struct _clc8b5_tag0 {
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS (0x000006FC)
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE 0:0
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_FALSE (0x00000000)
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_TRUE (0x00000001)
#define NVC8B5_SET_REMAP_CONST_A (0x00000700)
#define NVC8B5_SET_REMAP_CONST_A_V 31:0
#define NVC8B5_SET_REMAP_CONST_B (0x00000704)
#define NVC8B5_SET_REMAP_CONST_B_V 31:0
#define NVC8B5_SET_REMAP_COMPONENTS (0x00000708)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X 2:0
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y 6:4
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z 10:8
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W 14:12
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
#define NVC8B5_SET_DST_BLOCK_SIZE (0x0000070C)
#define NVC8B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
#define NVC8B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC8B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC8B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC8B5_SET_DST_WIDTH (0x00000710)
#define NVC8B5_SET_DST_WIDTH_V 31:0
#define NVC8B5_SET_DST_HEIGHT (0x00000714)
#define NVC8B5_SET_DST_HEIGHT_V 31:0
#define NVC8B5_SET_DST_DEPTH (0x00000718)
#define NVC8B5_SET_DST_DEPTH_V 31:0
#define NVC8B5_SET_DST_LAYER (0x0000071C)
#define NVC8B5_SET_DST_LAYER_V 31:0
#define NVC8B5_SET_DST_ORIGIN (0x00000720)
#define NVC8B5_SET_DST_ORIGIN_X 15:0
#define NVC8B5_SET_DST_ORIGIN_Y 31:16
#define NVC8B5_SET_SRC_BLOCK_SIZE (0x00000728)
#define NVC8B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
#define NVC8B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC8B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC8B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC8B5_SET_SRC_WIDTH (0x0000072C)
#define NVC8B5_SET_SRC_WIDTH_V 31:0
#define NVC8B5_SET_SRC_HEIGHT (0x00000730)
#define NVC8B5_SET_SRC_HEIGHT_V 31:0
#define NVC8B5_SET_SRC_DEPTH (0x00000734)
#define NVC8B5_SET_SRC_DEPTH_V 31:0
#define NVC8B5_SET_SRC_LAYER (0x00000738)
#define NVC8B5_SET_SRC_LAYER_V 31:0
#define NVC8B5_SET_SRC_ORIGIN (0x0000073C)
#define NVC8B5_SET_SRC_ORIGIN_X 15:0
#define NVC8B5_SET_SRC_ORIGIN_Y 31:16
#define NVC8B5_SRC_ORIGIN_X (0x00000744)
#define NVC8B5_SRC_ORIGIN_X_VALUE 31:0
#define NVC8B5_SRC_ORIGIN_Y (0x00000748)
#define NVC8B5_SRC_ORIGIN_Y_VALUE 31:0
#define NVC8B5_DST_ORIGIN_X (0x0000074C)
#define NVC8B5_DST_ORIGIN_X_VALUE 31:0
#define NVC8B5_DST_ORIGIN_Y (0x00000750)
#define NVC8B5_DST_ORIGIN_Y_VALUE 31:0
#define NVC8B5_PM_TRIGGER_END (0x00001114)
#define NVC8B5_PM_TRIGGER_END_V 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif // _clc8b5_h
#endif // __gh100_clc8b5_h__

View File

@@ -1,84 +1,42 @@
/*******************************************************************************
Copyright (c) 2012-2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _clc96f_h_
#define _clc96f_h_
#ifdef __cplusplus
extern "C" {
#endif
#include "nvtypes.h"
/* class BLACKWELL_CHANNEL_GPFIFO */
/*
* Documentation for BLACKWELL_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
* chapter "User Control Registers". It is documented as device NV_UDMA.
* The GPFIFO format itself is also documented in dev_pbdma.ref,
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Note there is no .mfs file for this class.
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __gb100_clc96f_h__
#define __gb100_clc96f_h__
#define BLACKWELL_CHANNEL_GPFIFO_A (0x0000C96F)
#define NVC96F_TYPEDEF BLACKWELL_CHANNELChannelGPFifoA
/* dma flow control data structure */
typedef volatile struct Nvc96fControl_struct {
NvU32 Ignored00[0x23]; /* 0000-008b*/
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
NvU32 Ignored01[0x5c];
} Nvc96fControl, BlackwellAControlGPFifo;
/* fields and values */
#define NVC96F_NUMBER_OF_SUBCHANNELS (8)
#define NVC96F_SET_OBJECT (0x00000000)
#define NVC96F_SET_OBJECT_NVCLASS 15:0
#define NVC96F_SET_OBJECT_ENGINE 20:16
#define NVC96F_SET_OBJECT_ENGINE_SW 0x0000001f
#define NVC96F_NOP (0x00000008)
#define NVC96F_NOP_HANDLE 31:0
#define NVC96F_NON_STALL_INTERRUPT (0x00000020)
#define NVC96F_NON_STALL_INTERRUPT_HANDLE 31:0
#define NVC96F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
#define NVC96F_FB_FLUSH_HANDLE 31:0
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
// specifying the page address for a targeted TLB invalidate and the uTLB for
// a targeted REPLAY_CANCEL for UVM.
// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
// rearranged fields.
#define NVC96F_MEM_OP_A (0x00000028)
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID 5:0 // only relevant for REPLAY_CANCEL_TARGETED
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE 5:0 // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID 10:6 // only relevant for REPLAY_CANCEL_TARGETED
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE 7:6 // only relevant for invalidates with NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE for invalidating link TLB only, or non-link TLB only or all TLBs
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_ALL_TLBS 0
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_LINK_TLBS 1
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_NON_LINK_TLBS 2
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_RSVRVD 3
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID 8:0 // only relevant for REPLAY_CANCEL_VA_GLOBAL
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR 11:11
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN 0x00000001
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS 0x00000000
@@ -86,9 +44,6 @@ typedef volatile struct Nvc96fControl_struct {
#define NVC96F_MEM_OP_B (0x0000002c)
#define NVC96F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI 31:0
#define NVC96F_MEM_OP_C (0x00000030)
#define NVC96F_MEM_OP_C_MEMBAR_TYPE 2:0
#define NVC96F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR 0x00000000
#define NVC96F_MEM_OP_C_MEMBAR_TYPE_MEMBAR 0x00000001
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB 0:0
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE 0x00000000
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL 0x00000001 // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
@@ -97,130 +52,38 @@ typedef volatile struct Nvc96fControl_struct {
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE 0x00000001
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY 4:2 // only relevant if GPC ENABLE
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE 0x00000000
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START 0x00000001
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL 0x00000002
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED 0x00000003
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL 0x00000004
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL 0x00000005
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE 6:5 // only relevant if GPC ENABLE
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE 0x00000000
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY 0x00000001
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE 0x00000002
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE 9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ 0
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE 1
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG 2
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD 3
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK 4
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL 5
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC 6
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL 7
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL 9:7 // Invalidate affects this level and all below
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL 0x00000000 // Invalidate tlb caches at all levels of the page table
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY 0x00000001
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0 0x00000002
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1 0x00000003
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2 0x00000004
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 0x00000005
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 0x00000006
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5 0x00000007
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE 11:10 // only relevant if PDB_ONE
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM 0x00000000
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT 0x00000002
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO 31:12 // only relevant if PDB_ONE
#define NVC96F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG 19:0
// MEM_OP_D MUST be preceded by MEM_OPs A-C.
#define NVC96F_MEM_OP_D (0x00000034)
#define NVC96F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI 26:0 // only relevant if PDB_ONE
#define NVC96F_MEM_OP_D_OPERATION 31:27
#define NVC96F_MEM_OP_D_OPERATION_MEMBAR 0x00000005
#define NVC96F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009
#define NVC96F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED 0x0000000a
#define NVC96F_MEM_OP_D_OPERATION_MMU_OPERATION 0x0000000b
#define NVC96F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
// CLEAN_LINES is an alias for Tegra/GPU IP usage
#define NVC96F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
#define NVC96F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
#define NVC96F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_NCOH_INVALIDATE 0x00000011
#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_COH_INVALIDATE 0x00000012
#define NVC96F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015
#define NVC96F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR 0x00000016
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE 1:0
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC 0x00000000
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC 0x00000001
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL 0x00000002
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED 0x00000003
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE 2:2
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC 0x00000000
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC 0x00000001
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK 6:3
#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE 23:20
#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE_RESERVED 0x00000000
#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE_VIDMEM_ACCESS_BIT_DUMP 0x00000001
#define NVC96F_SEM_ADDR_LO (0x0000005c)
#define NVC96F_SEM_ADDR_LO_OFFSET 31:2
#define NVC96F_SEM_ADDR_HI (0x00000060)
#define NVC96F_SEM_ADDR_HI_OFFSET 24:0
#define NVC96F_SEM_PAYLOAD_LO (0x00000064)
#define NVC96F_SEM_PAYLOAD_LO_PAYLOAD 31:0
#define NVC96F_SEM_PAYLOAD_HI (0x00000068)
#define NVC96F_SEM_PAYLOAD_HI_PAYLOAD 31:0
#define NVC96F_SEM_EXECUTE (0x0000006c)
#define NVC96F_SEM_EXECUTE_OPERATION 2:0
#define NVC96F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
#define NVC96F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ 0x00000002
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ 0x00000003
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_AND 0x00000004
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_NOR 0x00000005
#define NVC96F_SEM_EXECUTE_OPERATION_REDUCTION 0x00000006
#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG 12:12
#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS 0x00000000
#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN 0x00000001
#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK 18:18
#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK_DIS 0x00000000
#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK_EN 0x00000001
#define NVC96F_SEM_EXECUTE_RELEASE_WFI 20:20
#define NVC96F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
#define NVC96F_SEM_EXECUTE_RELEASE_WFI_EN 0x00000001
#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT 0x00000001
#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP 25:25
#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS 0x00000000
#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN 0x00000001
#define NVC96F_SEM_EXECUTE_REDUCTION 30:27
#define NVC96F_SEM_EXECUTE_REDUCTION_IMIN 0x00000000
#define NVC96F_SEM_EXECUTE_REDUCTION_IMAX 0x00000001
#define NVC96F_SEM_EXECUTE_REDUCTION_IXOR 0x00000002
#define NVC96F_SEM_EXECUTE_REDUCTION_IAND 0x00000003
#define NVC96F_SEM_EXECUTE_REDUCTION_IOR 0x00000004
#define NVC96F_SEM_EXECUTE_REDUCTION_IADD 0x00000005
#define NVC96F_SEM_EXECUTE_REDUCTION_INC 0x00000006
#define NVC96F_SEM_EXECUTE_REDUCTION_DEC 0x00000007
#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT 31:31
#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED 0x00000000
#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED 0x00000001
#define NVC96F_WFI (0x00000078)
#define NVC96F_WFI_SCOPE 0:0
#define NVC96F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
#define NVC96F_WFI_SCOPE_CURRENT_VEID 0x00000000
#define NVC96F_WFI_SCOPE_ALL 0x00000001
#define NVC96F_YIELD (0x00000080)
#define NVC96F_YIELD_OP 1:0
#define NVC96F_YIELD_OP_NOP 0x00000000
#define NVC96F_YIELD_OP_TSG 0x00000003
#define NVC96F_CLEAR_FAULTED (0x00000084)
// Note: RM provides the HANDLE as an opaque value; the internal detail fields
// are intentionally not exposed to the driver through these defines.
#define NVC96F_CLEAR_FAULTED_HANDLE 30:0
#define NVC96F_CLEAR_FAULTED_TYPE 31:31
#define NVC96F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
#define NVC96F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
/* GPFIFO entry format */
#define NVC96F_GP_ENTRY__SIZE 8
@@ -245,85 +108,4 @@ typedef volatile struct Nvc96fControl_struct {
#define NVC96F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
#define NVC96F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE 0x00000004
/* dma method formats */
#define NVC96F_DMA_METHOD_ADDRESS_OLD 12:2
#define NVC96F_DMA_METHOD_ADDRESS 11:0
#define NVC96F_DMA_SUBDEVICE_MASK 15:4
#define NVC96F_DMA_METHOD_SUBCHANNEL 15:13
#define NVC96F_DMA_TERT_OP 17:16
#define NVC96F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
#define NVC96F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
#define NVC96F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
#define NVC96F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
#define NVC96F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
#define NVC96F_DMA_METHOD_COUNT_OLD 28:18
#define NVC96F_DMA_METHOD_COUNT 28:16
#define NVC96F_DMA_IMMD_DATA 28:16
#define NVC96F_DMA_SEC_OP 31:29
#define NVC96F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
#define NVC96F_DMA_SEC_OP_INC_METHOD (0x00000001)
#define NVC96F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
#define NVC96F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
#define NVC96F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
#define NVC96F_DMA_SEC_OP_ONE_INC (0x00000005)
#define NVC96F_DMA_SEC_OP_RESERVED6 (0x00000006)
#define NVC96F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
/* dma incrementing method format */
#define NVC96F_DMA_INCR_ADDRESS 11:0
#define NVC96F_DMA_INCR_SUBCHANNEL 15:13
#define NVC96F_DMA_INCR_COUNT 28:16
#define NVC96F_DMA_INCR_OPCODE 31:29
#define NVC96F_DMA_INCR_OPCODE_VALUE (0x00000001)
#define NVC96F_DMA_INCR_DATA 31:0
/* dma non-incrementing method format */
#define NVC96F_DMA_NONINCR_ADDRESS 11:0
#define NVC96F_DMA_NONINCR_SUBCHANNEL 15:13
#define NVC96F_DMA_NONINCR_COUNT 28:16
#define NVC96F_DMA_NONINCR_OPCODE 31:29
#define NVC96F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
#define NVC96F_DMA_NONINCR_DATA 31:0
/* dma increment-once method format */
#define NVC96F_DMA_ONEINCR_ADDRESS 11:0
#define NVC96F_DMA_ONEINCR_SUBCHANNEL 15:13
#define NVC96F_DMA_ONEINCR_COUNT 28:16
#define NVC96F_DMA_ONEINCR_OPCODE 31:29
#define NVC96F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
#define NVC96F_DMA_ONEINCR_DATA 31:0
/* dma no-operation format */
#define NVC96F_DMA_NOP (0x00000000)
/* dma immediate-data format */
#define NVC96F_DMA_IMMD_ADDRESS 11:0
#define NVC96F_DMA_IMMD_SUBCHANNEL 15:13
#define NVC96F_DMA_IMMD_DATA 28:16
#define NVC96F_DMA_IMMD_OPCODE 31:29
#define NVC96F_DMA_IMMD_OPCODE_VALUE (0x00000004)
/* dma set sub-device mask format */
#define NVC96F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
#define NVC96F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
#define NVC96F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
/* dma store sub-device mask format */
#define NVC96F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
#define NVC96F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
#define NVC96F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
/* dma use sub-device mask format */
#define NVC96F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
#define NVC96F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
/* dma end-segment format */
#define NVC96F_DMA_ENDSEG_OPCODE 31:29
#define NVC96F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
/* dma legacy incrementing/non-incrementing formats */
#define NVC96F_DMA_ADDRESS 12:2
#define NVC96F_DMA_SUBCH 15:13
#define NVC96F_DMA_OPCODE3 17:16
#define NVC96F_DMA_OPCODE3_NONE (0x00000000)
#define NVC96F_DMA_COUNT 28:18
#define NVC96F_DMA_OPCODE 31:29
#define NVC96F_DMA_OPCODE_METHOD (0x00000000)
#define NVC96F_DMA_OPCODE_NONINC_METHOD (0x00000002)
#define NVC96F_DMA_DATA 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* _clc96f_h_ */
#endif // __gb100_clc96f_h__

View File

@@ -1,460 +1,29 @@
/*******************************************************************************
Copyright (c) 1993-2004 NVIDIA Corporation
/*
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "nvtypes.h"
#ifndef _clc9b5_h_
#define _clc9b5_h_
#ifdef __cplusplus
extern "C" {
#endif
#ifndef __gb100_clc9b5_h__
#define __gb100_clc9b5_h__
#define BLACKWELL_DMA_COPY_A (0x0000C9B5)
typedef volatile struct _clc9b5_tag0 {
NvV32 Reserved00[0x40];
NvV32 Nop; // 0x00000100 - 0x00000103
NvV32 Reserved01[0xF];
NvV32 PmTrigger; // 0x00000140 - 0x00000143
NvV32 Reserved02[0x36];
NvV32 SetMonitoredFenceType; // 0x0000021C - 0x0000021F
NvV32 SetMonitoredFenceSignalAddrBaseUpper; // 0x00000220 - 0x00000223
NvV32 SetMonitoredFenceSignalAddrBaseLower; // 0x00000224 - 0x00000227
NvV32 Reserved03[0x6];
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
NvV32 SetSemaphorePayloadUpper; // 0x0000024C - 0x0000024F
NvV32 Reserved04[0x1];
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
NvV32 Reserved05[0x26];
NvV32 LaunchDma; // 0x00000300 - 0x00000303
NvV32 Reserved06[0x3F];
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
NvV32 PitchIn; // 0x00000410 - 0x00000413
NvV32 PitchOut; // 0x00000414 - 0x00000417
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
NvV32 LineCount; // 0x0000041C - 0x0000041F
NvV32 Reserved07[0x38];
NvV32 SetSecureCopyMode; // 0x00000500 - 0x00000503
NvV32 SetDecryptIv0; // 0x00000504 - 0x00000507
NvV32 SetDecryptIv1; // 0x00000508 - 0x0000050B
NvV32 SetDecryptIv2; // 0x0000050C - 0x0000050F
NvV32 Reserved_SetAESCounter; // 0x00000510 - 0x00000513
NvV32 SetDecryptAuthTagCompareAddrUpper; // 0x00000514 - 0x00000517
NvV32 SetDecryptAuthTagCompareAddrLower; // 0x00000518 - 0x0000051B
NvV32 Reserved08[0x5];
NvV32 SetEncryptAuthTagAddrUpper; // 0x00000530 - 0x00000533
NvV32 SetEncryptAuthTagAddrLower; // 0x00000534 - 0x00000537
NvV32 SetEncryptIvAddrUpper; // 0x00000538 - 0x0000053B
NvV32 SetEncryptIvAddrLower; // 0x0000053C - 0x0000053F
NvV32 Reserved09[0x10];
NvV32 SetCompressionParameters; // 0x00000580 - 0x00000583
NvV32 SetDecompressOutLength; // 0x00000584 - 0x00000587
NvV32 SetDecompressOutLengthAddrUpper; // 0x00000588 - 0x0000058B
NvV32 SetDecompressOutLengthAddrLower; // 0x0000058C - 0x0000058F
NvV32 SetDecompressChecksum; // 0x00000590 - 0x00000593
NvV32 Reserved10[0x5A];
NvV32 SetMemoryScrubParameters; // 0x000006FC - 0x000006FF
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
NvV32 Reserved11[0x1];
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
NvV32 Reserved12[0x1];
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
NvV32 DstOriginY; // 0x00000750 - 0x00000753
NvV32 Reserved13[0x270];
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
NvV32 Reserved14[0x3BA];
} blackwell_dma_copy_aControlPio;
#define NVC9B5_NOP (0x00000100)
#define NVC9B5_NOP_PARAMETER 31:0
#define NVC9B5_PM_TRIGGER (0x00000140)
#define NVC9B5_PM_TRIGGER_V 31:0
#define NVC9B5_SET_MONITORED_FENCE_TYPE (0x0000021C)
#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE 0:0
#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE (0x00000000)
#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE_EXT (0x00000001)
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER (0x00000220)
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER_UPPER 24:0
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER (0x00000224)
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER_LOWER 31:0
#define NVC9B5_SET_SEMAPHORE_A (0x00000240)
#define NVC9B5_SET_SEMAPHORE_A_UPPER 24:0
#define NVC9B5_SET_SEMAPHORE_B (0x00000244)
#define NVC9B5_SET_SEMAPHORE_B_LOWER 31:0
#define NVC9B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
#define NVC9B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
#define NVC9B5_SET_SEMAPHORE_PAYLOAD_UPPER (0x0000024C)
#define NVC9B5_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD 31:0
#define NVC9B5_SET_RENDER_ENABLE_A (0x00000254)
#define NVC9B5_SET_RENDER_ENABLE_A_UPPER 24:0
#define NVC9B5_SET_RENDER_ENABLE_B (0x00000258)
#define NVC9B5_SET_RENDER_ENABLE_B_LOWER 31:0
#define NVC9B5_SET_RENDER_ENABLE_C (0x0000025C)
#define NVC9B5_SET_RENDER_ENABLE_C_MODE 2:0
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
#define NVC9B5_SET_SRC_PHYS_MODE (0x00000260)
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET 1:0
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM (0x00000003)
#define NVC9B5_SET_SRC_PHYS_MODE_BASIC_KIND 5:2
#define NVC9B5_SET_SRC_PHYS_MODE_PEER_ID 8:6
#define NVC9B5_SET_SRC_PHYS_MODE_FLA 9:9
#define NVC9B5_SET_DST_PHYS_MODE (0x00000264)
#define NVC9B5_SET_DST_PHYS_MODE_TARGET 1:0
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_PEERMEM (0x00000003)
#define NVC9B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
#define NVC9B5_SET_DST_PHYS_MODE_PEER_ID 8:6
#define NVC9B5_SET_DST_PHYS_MODE_FLA 9:9
#define NVC9B5_LAUNCH_DMA (0x00000300)
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE 25:25
#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_NO_TIMESTAMP (0x00000001)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_WITH_TIMESTAMP (0x00000002)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE (0x00000003)
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE 10:10
#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE 11:11
#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE_FALSE (0x00000000)
#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE_TRUE (0x00000001)
#define NVC9B5_LAUNCH_DMA_SRC_TYPE 12:12
#define NVC9B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
#define NVC9B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
#define NVC9B5_LAUNCH_DMA_DST_TYPE 13:13
#define NVC9B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
#define NVC9B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDA (0x00000008)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDB (0x00000009)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMIN (0x0000000B)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMAX (0x0000000C)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDC (0x0000000D)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDD (0x0000000E)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDE (0x0000000F)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
#define NVC9B5_LAUNCH_DMA_COPY_TYPE 21:20
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_PROT2PROT (0x00000000)
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_DEFAULT (0x00000000)
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_SECURE (0x00000001)
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_NONPROT2NONPROT (0x00000002)
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_RESERVED (0x00000003)
#define NVC9B5_LAUNCH_DMA_VPRMODE 22:22
#define NVC9B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
#define NVC9B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE 23:23
#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_FALSE (0x00000000)
#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_TRUE (0x00000001)
#define NVC9B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
#define NVC9B5_LAUNCH_DMA_DISABLE_PLC 26:26
#define NVC9B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
#define NVC9B5_LAUNCH_DMA_DISABLE_PLC_TRUE (0x00000001)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE 27:27
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_ONE_WORD (0x00000000)
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_TWO_WORD (0x00000001)
#define NVC9B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
#define NVC9B5_OFFSET_IN_UPPER (0x00000400)
#define NVC9B5_OFFSET_IN_UPPER_UPPER 24:0
#define NVC9B5_OFFSET_IN_LOWER (0x00000404)
#define NVC9B5_OFFSET_IN_LOWER_VALUE 31:0
#define NVC9B5_OFFSET_OUT_UPPER (0x00000408)
#define NVC9B5_OFFSET_OUT_UPPER_UPPER 24:0
#define NVC9B5_OFFSET_OUT_LOWER (0x0000040C)
#define NVC9B5_OFFSET_OUT_LOWER_VALUE 31:0
#define NVC9B5_PITCH_IN (0x00000410)
#define NVC9B5_PITCH_IN_VALUE 31:0
#define NVC9B5_PITCH_OUT (0x00000414)
#define NVC9B5_PITCH_OUT_VALUE 31:0
#define NVC9B5_LINE_LENGTH_IN (0x00000418)
#define NVC9B5_LINE_LENGTH_IN_VALUE 31:0
#define NVC9B5_LINE_COUNT (0x0000041C)
#define NVC9B5_LINE_COUNT_VALUE 31:0
#define NVC9B5_SET_SECURE_COPY_MODE (0x00000500)
#define NVC9B5_SET_SECURE_COPY_MODE_MODE 0:0
#define NVC9B5_SET_SECURE_COPY_MODE_MODE_ENCRYPT (0x00000000)
#define NVC9B5_SET_SECURE_COPY_MODE_MODE_DECRYPT (0x00000001)
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET 20:19
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_LOCAL_FB (0x00000000)
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_PEERMEM (0x00000003)
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_PEER_ID 23:21
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_FLA 24:24
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET 26:25
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_LOCAL_FB (0x00000000)
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_PEERMEM (0x00000003)
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_PEER_ID 29:27
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_FLA 30:30
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_END_OF_COPY 31:31
#define NVC9B5_SET_DECRYPT_IV0 (0x00000504)
#define NVC9B5_SET_DECRYPT_IV0_VALUE 31:0
#define NVC9B5_SET_DECRYPT_IV1 (0x00000508)
#define NVC9B5_SET_DECRYPT_IV1_VALUE 31:0
#define NVC9B5_SET_DECRYPT_IV2 (0x0000050C)
#define NVC9B5_SET_DECRYPT_IV2_VALUE 31:0
#define NVC9B5_RESERVED_SET_AESCOUNTER (0x00000510)
#define NVC9B5_RESERVED_SET_AESCOUNTER_VALUE 31:0
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER (0x00000514)
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER_UPPER 24:0
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER (0x00000518)
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER_LOWER 31:0
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER (0x00000530)
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER_UPPER 24:0
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER (0x00000534)
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER_LOWER 31:0
#define NVC9B5_SET_ENCRYPT_IV_ADDR_UPPER (0x00000538)
#define NVC9B5_SET_ENCRYPT_IV_ADDR_UPPER_UPPER 24:0
#define NVC9B5_SET_ENCRYPT_IV_ADDR_LOWER (0x0000053C)
#define NVC9B5_SET_ENCRYPT_IV_ADDR_LOWER_LOWER 31:0
#define NVC9B5_SET_COMPRESSION_PARAMETERS (0x00000580)
#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION 0:0
#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION_DECOMPRESS (0x00000000)
#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION_COMPRESS (0x00000001)
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO 3:1
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_SNAPPY (0x00000000)
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_DATA_ONLY (0x00000001)
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_BLOCK (0x00000002)
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_BLOCK_CHECKSUM (0x00000003)
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_DEFLATE (0x00000004)
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_SNAPPY_WITH_LONG_FETCH (0x00000005)
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM 29:28
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_NONE (0x00000000)
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_ADLER32 (0x00000001)
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_CRC32 (0x00000002)
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_SNAPPY_CRC (0x00000003)
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH (0x00000584)
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_V 31:0
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_UPPER (0x00000588)
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_UPPER_UPPER 24:0
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_LOWER (0x0000058C)
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_LOWER_LOWER 31:0
#define NVC9B5_SET_DECOMPRESS_CHECKSUM (0x00000590)
#define NVC9B5_SET_DECOMPRESS_CHECKSUM_V 31:0
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS (0x000006FC)
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE 0:0
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_FALSE (0x00000000)
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_TRUE (0x00000001)
#define NVC9B5_SET_REMAP_CONST_A (0x00000700)
#define NVC9B5_SET_REMAP_CONST_A_V 31:0
#define NVC9B5_SET_REMAP_CONST_B (0x00000704)
#define NVC9B5_SET_REMAP_CONST_B_V 31:0
#define NVC9B5_SET_REMAP_COMPONENTS (0x00000708)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X 2:0
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y 6:4
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z 10:8
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W 14:12
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
#define NVC9B5_SET_DST_BLOCK_SIZE (0x0000070C)
#define NVC9B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
#define NVC9B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC9B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC9B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC9B5_SET_DST_WIDTH (0x00000710)
#define NVC9B5_SET_DST_WIDTH_V 31:0
#define NVC9B5_SET_DST_HEIGHT (0x00000714)
#define NVC9B5_SET_DST_HEIGHT_V 31:0
#define NVC9B5_SET_DST_DEPTH (0x00000718)
#define NVC9B5_SET_DST_DEPTH_V 31:0
#define NVC9B5_SET_DST_LAYER (0x0000071C)
#define NVC9B5_SET_DST_LAYER_V 31:0
#define NVC9B5_SET_DST_ORIGIN (0x00000720)
#define NVC9B5_SET_DST_ORIGIN_X 15:0
#define NVC9B5_SET_DST_ORIGIN_Y 31:16
#define NVC9B5_SET_SRC_BLOCK_SIZE (0x00000728)
#define NVC9B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
#define NVC9B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC9B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC9B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC9B5_SET_SRC_WIDTH (0x0000072C)
#define NVC9B5_SET_SRC_WIDTH_V 31:0
#define NVC9B5_SET_SRC_HEIGHT (0x00000730)
#define NVC9B5_SET_SRC_HEIGHT_V 31:0
#define NVC9B5_SET_SRC_DEPTH (0x00000734)
#define NVC9B5_SET_SRC_DEPTH_V 31:0
#define NVC9B5_SET_SRC_LAYER (0x00000738)
#define NVC9B5_SET_SRC_LAYER_V 31:0
#define NVC9B5_SET_SRC_ORIGIN (0x0000073C)
#define NVC9B5_SET_SRC_ORIGIN_X 15:0
#define NVC9B5_SET_SRC_ORIGIN_Y 31:16
#define NVC9B5_SRC_ORIGIN_X (0x00000744)
#define NVC9B5_SRC_ORIGIN_X_VALUE 31:0
#define NVC9B5_SRC_ORIGIN_Y (0x00000748)
#define NVC9B5_SRC_ORIGIN_Y_VALUE 31:0
#define NVC9B5_DST_ORIGIN_X (0x0000074C)
#define NVC9B5_DST_ORIGIN_X_VALUE 31:0
#define NVC9B5_DST_ORIGIN_Y (0x00000750)
#define NVC9B5_DST_ORIGIN_Y_VALUE 31:0
#define NVC9B5_PM_TRIGGER_END (0x00001114)
#define NVC9B5_PM_TRIGGER_END_V 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif // _clc9b5_h
#endif // __gb100_clc9b5_h__

View File

@@ -151,6 +151,7 @@ typedef volatile struct _clcba2_tag0 {
#define NVCBA2_ERROR_SCRUBBER_INSUFFICIENT_PERMISSIONS (0x0000001b)
#define NVCBA2_ERROR_SCRUBBER_MUTEX_ACQUIRE_FAILURE (0x0000001c)
#define NVCBA2_ERROR_SCRUB_SIZE_MAX_EXCEEDED (0x0000001d)
#define NVCBA2_ERROR_SIZE_ZERO (0x0000001e)
#ifdef __cplusplus
}; /* extern "C" */

View File

@@ -43,4 +43,7 @@
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA100 (0x00000000)
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA000 (0x00000001)
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B (0x0000000B)
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B (0x0000000B)
#endif /* _ctrl2080mc_h_ */

View File

@@ -21,6 +21,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rm_mem.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_channel.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_lock.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hal.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_fd_type.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_processors.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree.c
@@ -59,7 +60,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_access_counter_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_access_counter_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_fault_buffer.c
@@ -96,7 +96,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_heuristics.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_thrashing.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_prefetch.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_ibm.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_faults.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
@@ -128,3 +127,4 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_group_tree_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_thread_context_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_file.c

View File

@@ -50,7 +50,6 @@ NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
@@ -63,6 +62,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += find_next_bit_wrap
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_is_dma_domain
NV_CONFTEST_FUNCTION_COMPILE_TESTS += for_each_sgtable_dma_page
NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache
NV_CONFTEST_FUNCTION_COMPILE_TESTS += page_pgmap
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_invalidate_range
@@ -81,3 +81,5 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += struct_page_has_zone_device_data
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present___iowrite64_lo_hi
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_make_device_exclusive

View File

@@ -33,10 +33,12 @@
#include "uvm_va_block.h"
#include "uvm_tools.h"
#include "uvm_common.h"
#include "uvm_fd_type.h"
#include "uvm_linux_ioctl.h"
#include "uvm_hmm.h"
#include "uvm_mem.h"
#include "uvm_kvmalloc.h"
#include "uvm_test_file.h"
#define NVIDIA_UVM_DEVICE_NAME "nvidia-uvm"
@@ -49,55 +51,9 @@ bool uvm_file_is_nvidia_uvm(struct file *filp)
return (filp != NULL) && (filp->f_op == &uvm_fops);
}
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
bool uvm_file_is_nvidia_uvm_va_space(struct file *filp)
{
unsigned long uptr;
uvm_fd_type_t type;
void *ptr;
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
uptr = atomic_long_read_acquire((atomic_long_t *) (&filp->private_data));
type = (uvm_fd_type_t)(uptr & UVM_FD_TYPE_MASK);
ptr = (void *)(uptr & ~UVM_FD_TYPE_MASK);
BUILD_BUG_ON(UVM_FD_COUNT > UVM_FD_TYPE_MASK + 1);
switch (type) {
case UVM_FD_UNINITIALIZED:
case UVM_FD_INITIALIZING:
UVM_ASSERT(!ptr);
break;
case UVM_FD_VA_SPACE:
UVM_ASSERT(ptr);
BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
break;
case UVM_FD_MM:
UVM_ASSERT(ptr);
BUILD_BUG_ON(__alignof__(struct file) < (1UL << UVM_FD_TYPE_BITS));
break;
default:
UVM_ASSERT(0);
}
if (ptr_val)
*ptr_val = ptr;
return type;
}
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type)
{
void *ptr;
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
if (uvm_fd_type(filp, &ptr) == type)
return ptr;
else
return NULL;
return uvm_file_is_nvidia_uvm(filp) && uvm_fd_type(filp, NULL) == UVM_FD_VA_SPACE;
}
static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct file *filp)
@@ -105,7 +61,6 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
uvm_va_space_t *va_space;
uvm_va_space_mm_t *va_space_mm;
struct file *uvm_file;
uvm_fd_type_t old_fd_type;
struct mm_struct *mm;
NV_STATUS status;
@@ -127,14 +82,9 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
goto err;
}
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
UVM_FD_UNINITIALIZED,
UVM_FD_INITIALIZING);
old_fd_type &= UVM_FD_TYPE_MASK;
if (old_fd_type != UVM_FD_UNINITIALIZED) {
status = NV_ERR_IN_USE;
status = uvm_fd_type_init(filp);
if (status != NV_OK)
goto err;
}
va_space_mm = &va_space->va_space_mm;
uvm_spin_lock(&va_space_mm->lock);
@@ -173,13 +123,13 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
break;
}
uvm_spin_unlock(&va_space_mm->lock);
atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)uvm_file | UVM_FD_MM);
uvm_fd_type_set(filp, UVM_FD_MM, uvm_file);
return NV_OK;
err_release_unlock:
uvm_spin_unlock(&va_space_mm->lock);
atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
uvm_fd_type_set(filp, UVM_FD_UNINITIALIZED, NULL);
err:
if (uvm_file)
@@ -240,7 +190,7 @@ static void uvm_release_deferred(void *data)
// Since this function is only scheduled to run when uvm_release() fails
// to trylock-acquire the pm.lock, the following acquisition attempt
// is expected to block this thread, and cause it to remain blocked until
// uvm_resume() releases the lock. As a result, the deferred release
// uvm_resume() releases the lock. As a result, the deferred release
// kthread queue may stall for long periods of time.
uvm_down_read(&g_uvm_global.pm.lock);
@@ -249,12 +199,43 @@ static void uvm_release_deferred(void *data)
uvm_up_read(&g_uvm_global.pm.lock);
}
static void uvm_mm_release(struct file *filp, struct file *uvm_file)
static void uvm_release_va_space(struct file *filp, uvm_va_space_t *va_space)
{
int ret;
filp->private_data = NULL;
filp->f_mapping = NULL;
// Because the kernel discards the status code returned from this release
// callback, early exit in case of a pm.lock acquisition failure is not
// an option. Instead, the teardown work normally performed synchronously
// needs to be scheduled to run after uvm_resume() releases the lock.
if (uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
uvm_va_space_destroy(va_space);
uvm_up_read(&g_uvm_global.pm.lock);
}
else {
// Remove references to this inode from the address_space. This isn't
// strictly necessary, as any CPU mappings of this file have already
// been destroyed, and va_space->mapping won't be used again. Still,
// the va_space survives the inode if its destruction is deferred, in
// which case the references are rendered stale.
address_space_init_once(va_space->mapping);
nv_kthread_q_item_init(&va_space->deferred_release_q_item, uvm_release_deferred, va_space);
ret = nv_kthread_q_schedule_q_item(&g_uvm_global.deferred_release_q, &va_space->deferred_release_q_item);
UVM_ASSERT(ret != 0);
}
}
static void uvm_release_mm(struct file *filp, struct file *uvm_file)
{
uvm_va_space_t *va_space = uvm_va_space_get(uvm_file);
uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
struct mm_struct *mm = va_space_mm->mm;
uvm_kvfree(filp->f_mapping);
if (uvm_va_space_mm_enabled(va_space)) {
uvm_va_space_mm_unregister(va_space);
@@ -269,46 +250,27 @@ static void uvm_mm_release(struct file *filp, struct file *uvm_file)
static int uvm_release(struct inode *inode, struct file *filp)
{
void *ptr;
uvm_va_space_t *va_space;
uvm_fd_type_t fd_type;
int ret;
uvm_fd_type_t fd_type = uvm_fd_type(filp, &ptr);
fd_type = uvm_fd_type(filp, &ptr);
UVM_ASSERT(fd_type != UVM_FD_INITIALIZING);
if (fd_type == UVM_FD_UNINITIALIZED) {
uvm_kvfree(filp->f_mapping);
return 0;
}
else if (fd_type == UVM_FD_MM) {
uvm_kvfree(filp->f_mapping);
uvm_mm_release(filp, (struct file *)ptr);
return 0;
}
switch (fd_type) {
case UVM_FD_UNINITIALIZED:
uvm_kvfree(filp->f_mapping);
break;
UVM_ASSERT(fd_type == UVM_FD_VA_SPACE);
va_space = (uvm_va_space_t *)ptr;
filp->private_data = NULL;
filp->f_mapping = NULL;
case UVM_FD_VA_SPACE:
uvm_release_va_space(filp, (uvm_va_space_t *)ptr);
break;
// Because the kernel discards the status code returned from this release
// callback, early exit in case of a pm.lock acquisition failure is not
// an option. Instead, the teardown work normally performed synchronously
// needs to be scheduled to run after uvm_resume() releases the lock.
if (uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
uvm_va_space_destroy(va_space);
uvm_up_read(&g_uvm_global.pm.lock);
}
else {
// Remove references to this inode from the address_space. This isn't
// strictly necessary, as any CPU mappings of this file have already
// been destroyed, and va_space->mapping won't be used again. Still,
// the va_space survives the inode if its destruction is deferred, in
// which case the references are rendered stale.
address_space_init_once(va_space->mapping);
case UVM_FD_MM:
uvm_release_mm(filp, (struct file *)ptr);
break;
nv_kthread_q_item_init(&va_space->deferred_release_q_item, uvm_release_deferred, va_space);
ret = nv_kthread_q_schedule_q_item(&g_uvm_global.deferred_release_q, &va_space->deferred_release_q_item);
UVM_ASSERT(ret != 0);
case UVM_FD_TEST:
uvm_test_file_release(filp, (uvm_test_file_t *)ptr);
break;
default:
UVM_ASSERT_MSG(0, "Unexpected fd type: %d\n", fd_type);
}
return 0;
@@ -829,6 +791,7 @@ static struct vm_operations_struct uvm_vm_ops_device_p2p =
static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
{
void *fd_type_ptr;
uvm_va_space_t *va_space;
NV_STATUS status = uvm_global_get_status();
int ret = 0;
@@ -837,9 +800,17 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
if (status != NV_OK)
return -nv_status_to_errno(status);
va_space = uvm_fd_va_space(filp);
if (!va_space)
return -EBADFD;
switch (uvm_fd_type(filp, &fd_type_ptr)) {
case UVM_FD_VA_SPACE:
va_space = (uvm_va_space_t *)fd_type_ptr;
break;
case UVM_FD_TEST:
return uvm_test_file_mmap((uvm_test_file_t *)fd_type_ptr, vma);
default:
return -EBADFD;
}
// When the VA space is associated with an mm, all vmas under the VA space
// must come from that mm.
@@ -867,8 +838,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
}
// If the PM lock cannot be acquired, disable the VMA and report success
// to the caller. The caller is expected to determine whether the
// map operation succeeded via an ioctl() call. This is necessary to
// to the caller. The caller is expected to determine whether the
// map operation succeeded via an ioctl() call. This is necessary to
// safely handle MAP_FIXED, which needs to complete atomically to prevent
// the loss of the virtual address range.
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
@@ -999,33 +970,40 @@ static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *
// attempt to be made. This is safe because other threads will have only had
// a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
// case.
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
UVM_FD_UNINITIALIZED,
UVM_FD_INITIALIZING);
old_fd_type &= UVM_FD_TYPE_MASK;
if (old_fd_type == UVM_FD_UNINITIALIZED) {
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
if (status != NV_OK) {
atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
return status;
}
old_fd_type = uvm_fd_type_init_cas(filp);
switch (old_fd_type) {
case UVM_FD_UNINITIALIZED:
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
if (status != NV_OK) {
uvm_fd_type_set(filp, UVM_FD_UNINITIALIZED, NULL);
return status;
}
atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)va_space | UVM_FD_VA_SPACE);
}
else if (old_fd_type == UVM_FD_VA_SPACE) {
va_space = uvm_va_space_get(filp);
uvm_fd_type_set(filp, UVM_FD_VA_SPACE, va_space);
break;
if (params->flags != va_space->initialization_flags)
case UVM_FD_VA_SPACE:
va_space = uvm_va_space_get(filp);
if (params->flags != va_space->initialization_flags)
status = NV_ERR_INVALID_ARGUMENT;
else
status = NV_OK;
break;
case UVM_FD_MM:
case UVM_FD_TEST:
status = NV_ERR_INVALID_ARGUMENT;
else
status = NV_OK;
}
else if (old_fd_type == UVM_FD_MM) {
status = NV_ERR_INVALID_ARGUMENT;
}
else {
UVM_ASSERT(old_fd_type == UVM_FD_INITIALIZING);
status = NV_ERR_BUSY_RETRY;
break;
case UVM_FD_INITIALIZING:
status = NV_ERR_BUSY_RETRY;
break;
default:
UVM_ASSERT(0);
status = NV_ERR_INVALID_STATE; // Quiet compiler warnings
break;
}
return status;
@@ -1233,19 +1211,8 @@ static int uvm_init(void)
goto error;
}
pr_info("Loaded the UVM driver, major device number %d.\n", MAJOR(g_uvm_base_dev));
if (uvm_enable_builtin_tests)
pr_info("Built-in UVM tests are enabled. This is a security risk.\n");
// After Open RM is released, both the enclosing "#if" and this comment
// block should be removed, because the uvm_hmm_is_enabled_system_wide()
// check is both necessary and sufficient for reporting functionality.
// Until that time, however, we need to avoid advertisting UVM's ability to
// enable HMM functionality.
if (uvm_hmm_is_enabled_system_wide())
UVM_INFO_PRINT("HMM (Heterogeneous Memory Management) is enabled in the UVM driver.\n");
UVM_INFO_PRINT("Built-in UVM tests are enabled. This is a security risk.\n");
return 0;
@@ -1274,8 +1241,6 @@ static void uvm_exit(void)
uvm_global_exit();
uvm_test_unload_state_exit();
pr_info("Unloaded the UVM driver.\n");
}
static void __exit uvm_exit_entry(void)

View File

@@ -1430,9 +1430,9 @@ NV_STATUS UvmAllocDeviceP2P(NvProcessorUuid gpuUuid,
// UvmMigrate
//
// Migrates the backing of a given virtual address range to the specified
// destination processor. If any page in the VA range is unpopulated, it is
// populated at the destination processor. The migrated pages in the VA range
// are also mapped on the destination processor.
// destination processor's nearest memory. If any page in the VA range is
// unpopulated, it is populated at the destination processor. The migrated pages
// in the VA range are also mapped on the destination processor.
//
// Both base and length must be aligned to the smallest page size supported by
// the CPU. The VA range must lie within the largest possible virtual address
@@ -2207,9 +2207,9 @@ NV_STATUS UvmMapDynamicParallelismRegion(void *base,
// allocated via a call to either UvmAlloc or UvmMemMap, or be supported
// system-allocated pageable memory. If the input virtual range corresponds to
// system-allocated pageable memory and UvmIsPageableMemoryAccessSupported
// reports that pageable memory access is supported, the behavior described
// below does not take effect, and read duplication will not be enabled for
// the input range.
// reports that pageable memory access is supported, or if a memoryless
// processor is present, the behavior described below does not take effect, and
// read duplication will not be enabled for the input range.
//
// Both base and length must be aligned to the smallest page size supported by
// the CPU.
@@ -2330,7 +2330,7 @@ NV_STATUS UvmDisableReadDuplication(void *base,
// UvmSetPreferredLocation
//
// Sets the preferred location for the given virtual address range to be the
// specified processor's memory.
// specified processor's nearest memory.
//
// Both base and length must be aligned to the smallest page size supported by
// the CPU. The VA range must lie within the largest possible virtual address

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021-2024 NVIDIA Corporation
Copyright (c) 2021-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -38,12 +38,10 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_ada_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) *
8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Ada covers 128 TB and that's the minimum size
@@ -80,10 +78,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->non_replayable_faults_supported = true;
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2018-2024 NVIDIA Corporation
Copyright (c) 2018-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -38,12 +38,10 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_ampere_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
(sizeof(dummy->fault_source.utlb_id) * 8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Ampere covers 128 TB and that's the minimum
@@ -84,10 +82,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->non_replayable_faults_supported = true;
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;

View File

@@ -189,7 +189,7 @@ static bool uvm_api_range_invalid(NvU64 base, NvU64 length)
}
// Some APIs can only enforce 4K alignment as it's the smallest GPU page size
// even when the smallest host page is larger (e.g. 64K on ppc64le).
// even when the smallest host page is larger.
static bool uvm_api_range_invalid_4k(NvU64 base, NvU64 length)
{
return uvm_api_range_invalid_aligned(base, length, UVM_PAGE_SIZE_4K);

View File

@@ -42,26 +42,11 @@ void uvm_ats_init(const UvmPlatformInfo *platform_info)
uvm_va_space_mm_enabled_system();
}
void uvm_ats_init_va_space(uvm_va_space_t *va_space)
{
uvm_init_rwsem(&va_space->ats.lock, UVM_LOCK_ORDER_LEAF);
if (UVM_ATS_IBM_SUPPORTED())
uvm_ats_ibm_init_va_space(va_space);
}
NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
{
if (UVM_ATS_IBM_SUPPORTED()) {
// uvm_ibm_add_gpu() needs to be called even if ATS is disabled since it
// sets parent_gpu->npu. Not setting parent_gpu->npu will result in
// incorrect NVLink addresses. See dma_addr_to_gpu_addr().
return uvm_ats_ibm_add_gpu(parent_gpu);
}
else if (UVM_ATS_SVA_SUPPORTED()) {
if (g_uvm_global.ats.enabled)
return uvm_ats_sva_add_gpu(parent_gpu);
if (g_uvm_global.ats.enabled) {
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
return uvm_ats_sva_add_gpu(parent_gpu);
}
return NV_OK;
@@ -69,38 +54,25 @@ NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu)
{
if (UVM_ATS_IBM_SUPPORTED()) {
// uvm_ibm_remove_gpu() needs to be called even if ATS is disabled since
// uvm_ibm_add_gpu() is called even in that case and
// uvm_ibm_remove_gpu() needs to undo the work done by
// uvm_ats_add_gpu() (gpu retained_count etc.).
uvm_ats_ibm_remove_gpu(parent_gpu);
}
else if (UVM_ATS_SVA_SUPPORTED()) {
if (g_uvm_global.ats.enabled)
uvm_ats_sva_remove_gpu(parent_gpu);
if (g_uvm_global.ats.enabled) {
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
uvm_ats_sva_remove_gpu(parent_gpu);
}
}
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
{
NV_STATUS status = NV_OK;
UVM_ASSERT(gpu_va_space);
if (!gpu_va_space->ats.enabled)
return status;
return NV_OK;
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
uvm_assert_lockable_order(UVM_LOCK_ORDER_MMAP_LOCK);
uvm_assert_lockable_order(UVM_LOCK_ORDER_VA_SPACE);
if (UVM_ATS_IBM_SUPPORTED())
status = uvm_ats_ibm_bind_gpu(gpu_va_space);
else if (UVM_ATS_SVA_SUPPORTED())
status = uvm_ats_sva_bind_gpu(gpu_va_space);
return status;
return uvm_ats_sva_bind_gpu(gpu_va_space);
}
void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
@@ -110,10 +82,9 @@ void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
if (!gpu_va_space->ats.enabled)
return;
if (UVM_ATS_IBM_SUPPORTED())
uvm_ats_ibm_unbind_gpu(gpu_va_space);
else if (UVM_ATS_SVA_SUPPORTED())
uvm_ats_sva_unbind_gpu(gpu_va_space);
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
uvm_ats_sva_unbind_gpu(gpu_va_space);
}
NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
@@ -127,6 +98,8 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
if (!gpu_va_space->ats.enabled)
return status;
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
va_space = gpu_va_space->va_space;
UVM_ASSERT(va_space);
@@ -138,10 +111,7 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
if (uvm_processor_mask_test(&va_space->ats.registered_gpu_va_spaces, gpu_id))
return NV_ERR_INVALID_DEVICE;
if (UVM_ATS_IBM_SUPPORTED())
status = uvm_ats_ibm_register_gpu_va_space(gpu_va_space);
else if (UVM_ATS_SVA_SUPPORTED())
status = uvm_ats_sva_register_gpu_va_space(gpu_va_space);
status = uvm_ats_sva_register_gpu_va_space(gpu_va_space);
if (status == NV_OK)
uvm_processor_mask_set(&va_space->ats.registered_gpu_va_spaces, gpu_id);
@@ -159,25 +129,14 @@ void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
if (!gpu_va_space->ats.enabled)
return;
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
va_space = gpu_va_space->va_space;
gpu_id = gpu_va_space->gpu->id;
if (UVM_ATS_IBM_SUPPORTED())
uvm_ats_ibm_unregister_gpu_va_space(gpu_va_space);
else if (UVM_ATS_SVA_SUPPORTED())
uvm_ats_sva_unregister_gpu_va_space(gpu_va_space);
uvm_ats_sva_unregister_gpu_va_space(gpu_va_space);
uvm_va_space_down_write(va_space);
uvm_processor_mask_clear(&va_space->ats.registered_gpu_va_spaces, gpu_id);
uvm_va_space_up_write(va_space);
}
void uvm_ats_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
{
// We can only reach here from the mmu_notifier callbacks and these callbacks
// wouldn't have been registered if ATS wasn't enabled.
UVM_ASSERT(g_uvm_global.ats.enabled);
if (UVM_ATS_IBM_SUPPORTED())
uvm_ats_ibm_invalidate(va_space, start, end);
}

View File

@@ -26,12 +26,11 @@
#include "uvm_linux.h"
#include "uvm_forward_decl.h"
#include "uvm_ats_ibm.h"
#include "nv_uvm_types.h"
#include "uvm_lock.h"
#include "uvm_ats_sva.h"
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
#define UVM_ATS_SUPPORTED() UVM_ATS_SVA_SUPPORTED()
typedef struct
{
@@ -43,12 +42,7 @@ typedef struct
// being called in ats_compute_residency_mask().
uvm_rw_semaphore_t lock;
union
{
uvm_ibm_va_space_t ibm;
uvm_sva_va_space_t sva;
};
uvm_sva_va_space_t sva;
} uvm_ats_va_space_t;
typedef struct
@@ -61,12 +55,7 @@ typedef struct
NvU32 pasid;
union
{
uvm_ibm_gpu_va_space_t ibm;
uvm_sva_gpu_va_space_t sva;
};
uvm_sva_gpu_va_space_t sva;
} uvm_ats_gpu_va_space_t;
// Initializes driver-wide ATS state
@@ -74,11 +63,6 @@ typedef struct
// LOCKING: None
void uvm_ats_init(const UvmPlatformInfo *platform_info);
// Initializes ATS specific GPU state
//
// LOCKING: None
void uvm_ats_init_va_space(uvm_va_space_t *va_space);
// Enables ATS feature on the GPU.
//
// LOCKING: g_uvm_global.global lock mutex must be held.
@@ -115,8 +99,6 @@ void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space);
//
// LOCKING: The VA space lock must be held in write mode.
// mm has to be retained prior to calling this function.
// current->mm->mmap_lock must be held in write mode iff
// UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1.
NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
// Disables ATS access for the gpu_va_space. Prior to calling this function,
@@ -124,19 +106,8 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
// accesses in this GPU VA space, and that no ATS fault handling for this
// GPU will be attempted.
//
// LOCKING: This function may block on mmap_lock and will acquire the VA space
// lock, so neither lock must be held.
// LOCKING: This function will acquire the VA space lock, so it must not be
// held.
void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
// Synchronously invalidate ATS translations cached by GPU TLBs. The
// invalidate applies to all GPUs with active GPU VA spaces in va_space, and
// covers all pages touching any part of the given range. end is inclusive.
//
// GMMU translations in the given range are not guaranteed to be
// invalidated.
//
// LOCKING: No locks are required, but this function may be called with
// interrupts disabled.
void uvm_ats_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end);
#endif // __UVM_ATS_H__

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2023 NVIDIA Corporation
Copyright (c) 2024-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -58,37 +58,6 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
bool is_fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
bool is_prefetch_faults = (is_fault_service_type && (access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH));
uvm_populate_permissions_t populate_permissions = is_fault_service_type ?
(write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY) :
UVM_POPULATE_PERMISSIONS_INHERIT;
// Request uvm_migrate_pageable() to touch the corresponding page after
// population.
// Under virtualization ATS provides two translations:
// 1) guest virtual -> guest physical
// 2) guest physical -> host physical
//
// The overall ATS translation will fault if either of those translations is
// invalid. The pin_user_pages() call within uvm_migrate_pageable() call
// below handles translation #1, but not #2. We don't know if we're running
// as a guest, but in case we are we can force that translation to be valid
// by touching the guest physical address from the CPU. If the translation
// is not valid then the access will cause a hypervisor fault. Note that
// dma_map_page() can't establish mappings used by GPU ATS SVA translations.
// GPU accesses to host physical addresses obtained as a result of the
// address translation request uses the CPU address space instead of the
// IOMMU address space since the translated host physical address isn't
// necessarily an IOMMU address. The only way to establish guest physical to
// host physical mapping in the CPU address space is to touch the page from
// the CPU.
//
// We assume that the hypervisor mappings are all VM_PFNMAP, VM_SHARED, and
// VM_WRITE, meaning that the mappings are all granted write access on any
// fault and that the kernel will never revoke them.
// drivers/vfio/pci/vfio_pci_nvlink2.c enforces this. Thus we can assume
// that a read fault is always sufficient to also enable write access on the
// guest translation.
uvm_migrate_args_t uvm_migrate_args =
{
@@ -98,8 +67,8 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
.dst_node_id = ats_context->residency_node,
.start = start,
.length = length,
.populate_permissions = populate_permissions,
.touch = is_fault_service_type,
.populate_permissions = UVM_POPULATE_PERMISSIONS_INHERIT,
.populate_flags = UVM_POPULATE_PAGEABLE_FLAG_SKIP_PROT_CHECK,
.skip_mapped = is_fault_service_type,
.populate_on_cpu_alloc_failures = is_fault_service_type,
.populate_on_migrate_vma_failures = is_fault_service_type,
@@ -115,6 +84,13 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
.fail_on_unresolved_sto_errors = !is_fault_service_type || is_prefetch_faults,
};
if (is_fault_service_type) {
uvm_migrate_args.populate_permissions = (write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY);
// If we're faulting, let the GPU access special vmas
uvm_migrate_args.populate_flags |= UVM_POPULATE_PAGEABLE_FLAG_ALLOW_SPECIAL;
}
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
// We are trying to use migrate_vma API in the kernel (if it exists) to
@@ -139,9 +115,9 @@ static void flush_tlb_va_region(uvm_gpu_va_space_t *gpu_va_space,
uvm_ats_fault_invalidate_t *ats_invalidate;
if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.replayable.ats_invalidate;
else
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.non_replayable.ats_invalidate;
if (!ats_invalidate->tlb_batch_pending) {
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->tlb_batch);
@@ -533,8 +509,20 @@ static NV_STATUS uvm_ats_service_faults_region(uvm_gpu_va_space_t *gpu_va_space,
access_type,
UVM_ATS_SERVICE_TYPE_FAULTS,
ats_context);
if (status != NV_OK)
if (status != NV_OK) {
// This condition can occur if we unexpectedly fault on a vma that
// doesn't support faulting (or at least doesn't support
// pin_user_pages). This may be an incorrect mapping setup from the
// vma's owning driver, a hardware bug, or just that the owning driver
// didn't expect a device fault. Either way, we don't want to consider
// this a global error so don't propagate it, but also don't indicate
// that the faults were serviced. That way the caller knows to cancel
// them precisely.
if (status == NV_ERR_INVALID_ADDRESS)
return NV_OK;
return status;
}
uvm_page_mask_region_fill(faults_serviced_mask, region);
@@ -689,12 +677,14 @@ bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_v
if (next->node.start <= gmmu_region_base + UVM_GMMU_ATS_GRANULARITY - 1)
return true;
prev = uvm_va_range_container(uvm_range_tree_prev(&va_space->va_range_tree, &next->node));
prev = uvm_va_range_gmmu_mappable_prev(next);
}
else {
// No VA range exists after address, so check the last VA range in the
// tree.
prev = uvm_va_range_container(uvm_range_tree_last(&va_space->va_range_tree));
while (prev && !uvm_va_range_is_gmmu_mappable(prev))
prev = uvm_va_range_gmmu_mappable_prev(prev);
}
return prev && (prev->node.end >= gmmu_region_base);
@@ -767,6 +757,20 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
&ats_context->access_counters.accessed_mask,
&ats_context->prefetch_state.residency_mask);
// Pretend that pages that are already resident at the destination GPU were
// migrated now. This makes sure that the access counter is cleared even if
// the accessed pages, were already resident on the target.
// TODO: Bug 5296998: [uvm][ats] Not clearing stale access counter
// notifications can lead to missed migrations
// The same problem of stale notification exists for migration to other
// locations than local vidmem. However, stale notifications to data
// migrated to another remote location are identical to those triggered
// by accessing memory that cannot or should not be migrated.
if (uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->id)) {
uvm_page_mask_copy(&ats_context->access_counters.migrated_mask,
&ats_context->prefetch_state.residency_mask);
}
for_each_va_block_subregion_in_mask(subregion, &ats_context->access_counters.accessed_mask, region) {
NV_STATUS status;
NvU64 start = base + (subregion.first * PAGE_SIZE);
@@ -779,7 +783,7 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
// clear access counters if pages were migrated or migration needs to
// Clear access counters if pages were migrated or migration needs to
// be retried
if (status == NV_OK || status == NV_ERR_BUSY_RETRY)
uvm_page_mask_region_fill(migrated_mask, subregion);

View File

@@ -1,715 +0,0 @@
/*******************************************************************************
Copyright (c) 2018-2019 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_api.h"
#include "uvm_lock.h"
#include "uvm_kvmalloc.h"
#include "uvm_global.h"
#include "uvm_va_space.h"
#include "uvm_va_space_mm.h"
#include "uvm_ats_ibm.h"
#include "uvm_common.h"
#include <linux/pci.h>
#if UVM_IBM_NPU_SUPPORTED()
#include <linux/of.h>
#include <linux/sizes.h>
#include <asm/pci-bridge.h>
#include <asm/io.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#define NPU_ATSD_REG_MAP_SIZE 32
// There are three 8-byte registers in each ATSD mapping:
#define NPU_ATSD_REG_LAUNCH 0
#define NPU_ATSD_REG_AVA 1
#define NPU_ATSD_REG_STAT 2
// Fields within the NPU_ATSD_REG_LAUNCH register:
// "PRS" (process-scoped) bit. 1 means to limit invalidates to the specified
// PASID.
#define NPU_ATSD_REG_LAUNCH_PASID_ENABLE 13
// "PID" field. This specifies the PASID target of the invalidate.
#define NPU_ATSD_REG_LAUNCH_PASID_VAL 38
// "IS" bit. 0 means the specified virtual address range will be invalidated. 1
// means all entries will be invalidated.
#define NPU_ATSD_REG_LAUNCH_INVAL_ALL 12
// "AP" field. This encodes the size of a range-based invalidate.
#define NPU_ATSD_REG_LAUNCH_INVAL_SIZE 17
// "No flush" bit. 0 will trigger a flush (membar) from the GPU following the
// invalidate, 1 will not.
#define NPU_ATSD_REG_LAUNCH_FLUSH_DISABLE 39
// Helper to iterate over the active NPUs in the given VA space (all NPUs with
// GPUs that have GPU VA spaces registered in this VA space).
#define for_each_npu_index_in_va_space(npu_index, va_space) \
for (({uvm_assert_rwlock_locked(&(va_space)->ats.ibm.rwlock); \
(npu_index) = find_first_bit((va_space)->ats.ibm.npu_active_mask, NV_MAX_NPUS);}); \
(npu_index) < NV_MAX_NPUS; \
(npu_index) = find_next_bit((va_space)->ats.ibm.npu_active_mask, NV_MAX_NPUS, (npu_index) + 1))
// An invalidate requires operating on one set of registers in each NPU. This
// struct tracks which register set (id) is in use per NPU for a given
// operation.
typedef struct
{
NvU8 ids[NV_MAX_NPUS];
} uvm_atsd_regs_t;
// Get the index of the input npu pointer within UVM's global npus array
static size_t uvm_ibm_npu_index(uvm_ibm_npu_t *npu)
{
size_t npu_index = npu - &g_uvm_global.npus[0];
UVM_ASSERT(npu_index < ARRAY_SIZE(g_uvm_global.npus));
return npu_index;
}
// Find an existing NPU matching pci_domain, or return an empty NPU slot if none
// is found. Returns NULL if no slots are available.
static uvm_ibm_npu_t *uvm_ibm_npu_find(int pci_domain)
{
size_t i;
uvm_ibm_npu_t *npu, *first_free = NULL;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
for (i = 0; i < ARRAY_SIZE(g_uvm_global.npus); i++) {
npu = &g_uvm_global.npus[i];
if (npu->num_retained_gpus == 0) {
if (!first_free)
first_free = npu;
}
else if (npu->pci_domain == pci_domain) {
return npu;
}
}
return first_free;
}
static void uvm_ibm_npu_destroy(uvm_ibm_npu_t *npu)
{
size_t i;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
UVM_ASSERT(npu->num_retained_gpus == 0);
UVM_ASSERT(bitmap_empty(npu->atsd_regs.locks, UVM_MAX_ATSD_REGS));
for (i = 0; i < npu->atsd_regs.count; i++) {
UVM_ASSERT(npu->atsd_regs.io_addrs[i]);
iounmap(npu->atsd_regs.io_addrs[i]);
}
memset(npu, 0, sizeof(*npu));
}
static NV_STATUS uvm_ibm_npu_init(uvm_ibm_npu_t *npu, struct pci_dev *npu_dev)
{
struct pci_controller *hose;
size_t i, reg_count, reg_size = sizeof(npu->atsd_regs.io_addrs[0]);
int ret;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
UVM_ASSERT(npu->num_retained_gpus == 0);
UVM_ASSERT(bitmap_empty(npu->atsd_regs.locks, UVM_MAX_ATSD_REGS));
npu->pci_domain = pci_domain_nr(npu_dev->bus);
if (!UVM_ATS_IBM_SUPPORTED_IN_DRIVER())
return NV_OK;
hose = pci_bus_to_host(npu_dev->bus);
ret = of_property_count_elems_of_size(hose->dn, "ibm,mmio-atsd", reg_size);
if (ret < 0) {
UVM_ERR_PRINT("Failed to query NPU %d ATSD register count: %d\n", npu->pci_domain, ret);
return errno_to_nv_status(ret);
}
// For ATS to be enabled globally, we must have NPU ATSD registers
reg_count = ret;
if (reg_count == 0 || reg_count > UVM_MAX_ATSD_REGS) {
UVM_ERR_PRINT("NPU %d has invalid ATSD register count: %zu\n", npu->pci_domain, reg_count);
return NV_ERR_INVALID_STATE;
}
// Map the ATSD registers
for (i = 0; i < reg_count; i++) {
u64 phys_addr;
__be64 __iomem *io_addr;
ret = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", i, &phys_addr);
UVM_ASSERT(ret == 0);
io_addr = ioremap(phys_addr, NPU_ATSD_REG_MAP_SIZE);
if (!io_addr) {
uvm_ibm_npu_destroy(npu);
return NV_ERR_NO_MEMORY;
}
npu->atsd_regs.io_addrs[npu->atsd_regs.count++] = io_addr;
}
return NV_OK;
}
NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu)
{
struct pci_dev *npu_dev = pnv_pci_get_npu_dev(parent_gpu->pci_dev, 0);
uvm_ibm_npu_t *npu;
NV_STATUS status;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
if (!npu_dev)
return NV_OK;
npu = uvm_ibm_npu_find(pci_domain_nr(npu_dev->bus));
if (!npu) {
// If this happens then we can't support the system configuation until
// NV_MAX_NPUS is updated. Return the same error as when the number of
// GPUs exceeds UVM_MAX_GPUS.
UVM_ERR_PRINT("No more NPU slots available, update NV_MAX_NPUS\n");
return NV_ERR_INSUFFICIENT_RESOURCES;
}
if (npu->num_retained_gpus == 0) {
status = uvm_ibm_npu_init(npu, npu_dev);
if (status != NV_OK)
return status;
}
// This npu field could be read concurrently by a thread in the ATSD
// invalidate path. We don't need to provide ordering with those threads
// because those invalidates won't apply to the GPU being added until a GPU
// VA space on this GPU is registered.
npu->atsd_regs.num_membars = max(npu->atsd_regs.num_membars, parent_gpu->num_hshub_tlb_invalidate_membars);
parent_gpu->npu = npu;
++npu->num_retained_gpus;
return NV_OK;
}
void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu)
{
uvm_ibm_npu_t *npu = parent_gpu->npu;
uvm_parent_gpu_t *other_parent_gpu;
NvU32 num_membars_new = 0;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
if (!npu)
return;
UVM_ASSERT(npu->num_retained_gpus > 0);
if (--npu->num_retained_gpus == 0) {
uvm_ibm_npu_destroy(npu);
}
else {
// Re-calculate the membar count
for_each_parent_gpu(other_parent_gpu) {
// The current GPU being removed should've already been removed from
// the global list.
UVM_ASSERT(other_parent_gpu != parent_gpu);
if (other_parent_gpu->npu == npu)
num_membars_new = max(num_membars_new, other_parent_gpu->num_hshub_tlb_invalidate_membars);
}
UVM_ASSERT(num_membars_new > 0);
npu->atsd_regs.num_membars = num_membars_new;
}
}
#if UVM_ATS_IBM_SUPPORTED()
void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space)
{
uvm_ibm_va_space_t *ibm_va_space;
UVM_ASSERT(va_space);
ibm_va_space = &va_space->ats.ibm;
uvm_rwlock_irqsave_init(&ibm_va_space->rwlock, UVM_LOCK_ORDER_LEAF);
}
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
static void npu_release_dummy(struct npu_context *npu_context, void *va_mm)
{
// See the comment on the call to pnv_npu2_init_context()
}
static NV_STATUS uvm_ats_ibm_register_gpu_va_space_kernel(uvm_gpu_va_space_t *gpu_va_space)
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
struct npu_context *npu_context;
// pnv_npu2_init_context() registers current->mm with
// mmu_notifier_register(). We need that to match the mm we passed to our
// own mmu_notifier_register() for this VA space.
if (current->mm != va_space->va_space_mm.mm)
return NV_ERR_NOT_SUPPORTED;
uvm_assert_mmap_lock_locked_write(current->mm);
uvm_assert_rwsem_locked_write(&va_space->lock);
// pnv_npu2_init_context() doesn't handle being called multiple times for
// the same GPU under the same mm, which could happen if multiple VA spaces
// are created in this process. To handle that we pass the VA space pointer
// as the callback parameter: the callback values are shared by all devices
// under this mm, so pnv_npu2_init_context() enforces that the values match
// the ones already registered to the mm.
//
// Otherwise we don't use the callback, since we have our own callback
// registered under the va_space_mm that will be called at the same point
// (mmu_notifier release).
npu_context = pnv_npu2_init_context(gpu_va_space->gpu->parent->pci_dev,
(MSR_DR | MSR_PR | MSR_HV),
npu_release_dummy,
va_space);
if (IS_ERR(npu_context)) {
int err = PTR_ERR(npu_context);
// We'll get -EINVAL if the callback value (va_space) differs from the
// one already registered to the npu_context associated with this mm.
// That can only happen when multiple VA spaces attempt registration
// within the same process, which is disallowed and should return
// NV_ERR_NOT_SUPPORTED.
if (err == -EINVAL)
return NV_ERR_NOT_SUPPORTED;
return errno_to_nv_status(err);
}
ibm_gpu_va_space->npu_context = npu_context;
return NV_OK;
}
static void uvm_ats_ibm_unregister_gpu_va_space_kernel(uvm_gpu_va_space_t *gpu_va_space)
{
uvm_gpu_va_space_state_t state;
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_ibm_va_space_t *ibm_va_space;
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
if (!ibm_gpu_va_space->npu_context)
return;
// va_space is guaranteed to not be NULL if ibm_gpu_va_space->npu_context is
// not NULL.
UVM_ASSERT(va_space);
state = uvm_gpu_va_space_state(gpu_va_space);
UVM_ASSERT(state == UVM_GPU_VA_SPACE_STATE_INIT || state == UVM_GPU_VA_SPACE_STATE_DEAD);
ibm_va_space = &va_space->ats.ibm;
// pnv_npu2_destroy_context() may in turn call mmu_notifier_unregister().
// If uvm_va_space_mm_shutdown() is concurrently executing in another
// thread, mmu_notifier_unregister() will wait for
// uvm_va_space_mm_shutdown() to finish. uvm_va_space_mm_shutdown() takes
// mmap_lock and the VA space lock, so we can't be holding those locks on
// this path.
uvm_assert_unlocked_order(UVM_LOCK_ORDER_MMAP_LOCK);
uvm_assert_unlocked_order(UVM_LOCK_ORDER_VA_SPACE);
pnv_npu2_destroy_context(ibm_gpu_va_space->npu_context, gpu_va_space->gpu->parent->pci_dev);
ibm_gpu_va_space->npu_context = NULL;
}
#else
static void uvm_ats_ibm_register_gpu_va_space_driver(uvm_gpu_va_space_t *gpu_va_space)
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
uvm_gpu_t *gpu = gpu_va_space->gpu;
size_t npu_index = uvm_ibm_npu_index(gpu->parent->npu);
uvm_ibm_va_space_t *ibm_va_space;
UVM_ASSERT(va_space);
ibm_va_space = &va_space->ats.ibm;
uvm_assert_rwsem_locked_write(&va_space->lock);
uvm_write_lock_irqsave(&ibm_va_space->rwlock);
// If this is the first GPU VA space to use this NPU in the VA space, mark
// the NPU as active so invalidates are issued to it.
if (ibm_va_space->npu_ref_counts[npu_index] == 0) {
// If this is the first active NPU in the entire VA space, we have to
// tell the kernel to send TLB invalidations to the IOMMU. See kernel
// commit 03b8abedf4f4965e7e9e0d4f92877c42c07ce19f for background.
//
// This is safe to do without holding mm_users high or mmap_lock.
if (bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS))
mm_context_add_copro(va_space->va_space_mm.mm);
UVM_ASSERT(!test_bit(npu_index, ibm_va_space->npu_active_mask));
__set_bit(npu_index, ibm_va_space->npu_active_mask);
}
else {
UVM_ASSERT(test_bit(npu_index, ibm_va_space->npu_active_mask));
}
++ibm_va_space->npu_ref_counts[npu_index];
// As soon as this lock is dropped, invalidates on this VA space's mm may
// begin issuing ATSDs to this NPU.
uvm_write_unlock_irqrestore(&ibm_va_space->rwlock);
ibm_gpu_va_space->did_ibm_driver_init = true;
}
static void uvm_ats_ibm_unregister_gpu_va_space_driver(uvm_gpu_va_space_t *gpu_va_space)
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_gpu_t *gpu = gpu_va_space->gpu;
size_t npu_index = uvm_ibm_npu_index(gpu->parent->npu);
bool do_remove = false;
uvm_ibm_va_space_t *ibm_va_space;
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
if (!ibm_gpu_va_space->did_ibm_driver_init)
return;
UVM_ASSERT(va_space);
ibm_va_space = &va_space->ats.ibm;
// Note that we aren't holding the VA space lock here, so another thread
// could be in uvm_ats_ibm_register_gpu_va_space() for this same GPU right
// now. The write lock and ref counts below will handle that case.
// Once we return from this function with a bit cleared in the
// npu_active_mask, we have to guarantee that this VA space no longer
// accesses that NPU's ATSD registers. This is needed in case GPU unregister
// needs to unmap those registers. We use the reader/writer lock to
// guarantee this, which means that invalidations must not access the ATSD
// registers outside of the lock.
//
// Future work: if we could synchronize_srcu() on the mmu_notifier SRCU we
// might do that here instead to flush out all invalidates. That would allow
// us to avoid taking a read lock in the invalidate path, though we'd have
// to be careful when clearing the mask bit relative to the synchronize, and
// we'd have to be careful in cases where this thread doesn't hold a
// reference to mm_users.
uvm_write_lock_irqsave(&ibm_va_space->rwlock);
UVM_ASSERT(ibm_va_space->npu_ref_counts[npu_index] > 0);
UVM_ASSERT(test_bit(npu_index, ibm_va_space->npu_active_mask));
--ibm_va_space->npu_ref_counts[npu_index];
if (ibm_va_space->npu_ref_counts[npu_index] == 0) {
__clear_bit(npu_index, ibm_va_space->npu_active_mask);
if (bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS))
do_remove = true;
}
uvm_write_unlock_irqrestore(&ibm_va_space->rwlock);
if (do_remove) {
// mm_context_remove_copro() must be called outside of the spinlock
// because it may issue invalidates across CPUs in this mm. The
// coprocessor count is atomically refcounted by that function, so it's
// safe to call here even if another thread jumps in with a register and
// calls mm_context_add_copro() between this thread's unlock and this
// call.
UVM_ASSERT(va_space->va_space_mm.mm);
mm_context_remove_copro(va_space->va_space_mm.mm);
}
}
#endif // UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
static mm_context_id_t va_space_pasid(uvm_va_space_t *va_space)
{
struct mm_struct *mm = va_space->va_space_mm.mm;
UVM_ASSERT(mm);
return mm->context.id;
}
NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
NV_STATUS status = NV_OK;
UVM_ASSERT(gpu_va_space->ats.enabled);
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_INIT);
UVM_ASSERT(va_space->va_space_mm.mm);
uvm_assert_rwsem_locked_write(&va_space->lock);
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
status = uvm_ats_ibm_register_gpu_va_space_kernel(gpu_va_space);
#else
uvm_ats_ibm_register_gpu_va_space_driver(gpu_va_space);
#endif
gpu_va_space->ats.pasid = (NvU32) va_space_pasid(gpu_va_space->va_space);
return status;
}
void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
{
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
uvm_ats_ibm_unregister_gpu_va_space_kernel(gpu_va_space);
#else
uvm_ats_ibm_unregister_gpu_va_space_driver(gpu_va_space);
#endif
gpu_va_space->ats.pasid = -1U;
}
#if UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
// Find any available ATSD register set in this NPU and return that index. This
// will busy wait until a register set is free.
static NvU8 atsd_reg_acquire(uvm_ibm_npu_t *npu)
{
uvm_spin_loop_t spin;
size_t i;
bool first = true;
while (1) {
// Using for_each_clear_bit is racy, since the bits could change at any
// point. That's ok since we'll either just retry or use a real atomic
// to lock the bit. Checking for clear bits first avoids spamming
// atomics in the contended case.
for_each_clear_bit(i, npu->atsd_regs.locks, npu->atsd_regs.count) {
if (!test_and_set_bit_lock(i, npu->atsd_regs.locks))
return (NvU8)i;
}
// Back off and try again, avoiding the overhead of initializing the
// tracking timers unless we need them.
if (first) {
uvm_spin_loop_init(&spin);
first = false;
}
else {
UVM_SPIN_LOOP(&spin);
}
}
}
static void atsd_reg_release(uvm_ibm_npu_t *npu, NvU8 reg)
{
UVM_ASSERT(reg < npu->atsd_regs.count);
UVM_ASSERT(test_bit(reg, npu->atsd_regs.locks));
clear_bit_unlock(reg, npu->atsd_regs.locks);
}
static __be64 atsd_reg_read(uvm_ibm_npu_t *npu, NvU8 reg, size_t offset)
{
__be64 __iomem *io_addr = npu->atsd_regs.io_addrs[reg] + offset;
UVM_ASSERT(reg < npu->atsd_regs.count);
return __raw_readq(io_addr);
}
static void atsd_reg_write(uvm_ibm_npu_t *npu, NvU8 reg, size_t offset, NvU64 val)
{
__be64 __iomem *io_addr = npu->atsd_regs.io_addrs[reg] + offset;
UVM_ASSERT(reg < npu->atsd_regs.count);
__raw_writeq_be(val, io_addr);
}
// Acquire a set of registers in each NPU which is active in va_space
static void atsd_regs_acquire(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
{
size_t i;
for_each_npu_index_in_va_space(i, va_space)
regs->ids[i] = atsd_reg_acquire(&g_uvm_global.npus[i]);
}
static void atsd_regs_release(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
{
size_t i;
for_each_npu_index_in_va_space(i, va_space)
atsd_reg_release(&g_uvm_global.npus[i], regs->ids[i]);
}
// Write the provided value to each NPU active in va_space at the provided
// register offset.
static void atsd_regs_write(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, size_t offset, NvU64 val)
{
size_t i;
for_each_npu_index_in_va_space(i, va_space)
atsd_reg_write(&g_uvm_global.npus[i], regs->ids[i], offset, val);
}
// Wait for all prior operations issued to active NPUs in va_space on the given
// registers to finish.
static void atsd_regs_wait(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
{
uvm_spin_loop_t spin;
size_t i;
for_each_npu_index_in_va_space(i, va_space) {
UVM_SPIN_WHILE(atsd_reg_read(&g_uvm_global.npus[i], regs->ids[i], NPU_ATSD_REG_STAT), &spin)
;
}
}
// Encode an invalidate targeting the given pasid and the given size for the
// NPU_ATSD_REG_LAUNCH register. The target address is encoded separately.
//
// psize must be one of the MMU_PAGE_* values defined in powerpc's asm/mmu.h. A
// psize of MMU_PAGE_COUNT means to invalidate the entire address space.
static NvU64 atsd_get_launch_val(mm_context_id_t pasid, int psize)
{
NvU64 val = 0;
val |= PPC_BIT(NPU_ATSD_REG_LAUNCH_PASID_ENABLE);
val |= pasid << PPC_BITLSHIFT(NPU_ATSD_REG_LAUNCH_PASID_VAL);
if (psize == MMU_PAGE_COUNT) {
val |= PPC_BIT(NPU_ATSD_REG_LAUNCH_INVAL_ALL);
}
else {
// The NPU registers do not support arbitrary sizes
UVM_ASSERT(psize == MMU_PAGE_64K || psize == MMU_PAGE_2M || psize == MMU_PAGE_1G);
val |= (NvU64)mmu_get_ap(psize) << PPC_BITLSHIFT(NPU_ATSD_REG_LAUNCH_INVAL_SIZE);
}
return val;
}
// Return the encoded size to use for an ATSD targeting the given range, in one
// of the MMU_PAGE_* values defined in powerpc's asm/mmu.h. A return value of
// MMU_PAGE_COUNT means the entire address space must be invalidated.
//
// start is an in/out parameter. On return start will be set to the aligned
// starting address to use for the ATSD. end is inclusive.
static int atsd_calc_size(NvU64 *start, NvU64 end)
{
// ATSDs have high latency, so we prefer to over-invalidate rather than
// issue multiple precise invalidates. Supported sizes are only 64K, 2M, and
// 1G.
*start = UVM_ALIGN_DOWN(*start, SZ_64K);
end = UVM_ALIGN_DOWN(end, SZ_64K);
if (*start == end)
return MMU_PAGE_64K;
*start = UVM_ALIGN_DOWN(*start, SZ_2M);
end = UVM_ALIGN_DOWN(end, SZ_2M);
if (*start == end)
return MMU_PAGE_2M;
*start = UVM_ALIGN_DOWN(*start, SZ_1G);
end = UVM_ALIGN_DOWN(end, SZ_1G);
if (*start == end)
return MMU_PAGE_1G;
return MMU_PAGE_COUNT;
}
// Issue an ATSD to all NPUs and wait for completion
static void atsd_launch_wait(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, NvU64 val)
{
atsd_regs_write(va_space, regs, NPU_ATSD_REG_LAUNCH, val);
atsd_regs_wait(va_space, regs);
}
// Issue and wait for the required membars following an invalidate
static void atsd_issue_membars(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
{
size_t i;
NvU32 num_membars = 0;
// These membars are issued using ATSDs which target a reserved PASID of 0.
// That PASID is valid on the GPU in order for the membar to be valid, but
// 0 will never be used by the kernel for an actual address space so the
// ATSD won't actually invalidate any entries.
NvU64 val = atsd_get_launch_val(0, MMU_PAGE_COUNT);
for_each_npu_index_in_va_space(i, va_space) {
uvm_ibm_npu_t *npu = &g_uvm_global.npus[i];
num_membars = max(num_membars, npu->atsd_regs.num_membars);
}
for (i = 0; i < num_membars; i++)
atsd_launch_wait(va_space, regs, val);
}
static void uvm_ats_ibm_invalidate_all(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
{
NvU64 val = atsd_get_launch_val(va_space_pasid(va_space), MMU_PAGE_COUNT);
atsd_launch_wait(va_space, regs, val);
atsd_issue_membars(va_space, regs);
}
static void uvm_ats_ibm_invalidate_range(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, NvU64 start, int psize)
{
NvU64 val = atsd_get_launch_val(va_space_pasid(va_space), psize);
// Barriers are expensive, so write all address registers first then do a
// single barrier for all of them.
atsd_regs_write(va_space, regs, NPU_ATSD_REG_AVA, start);
eieio();
atsd_launch_wait(va_space, regs, val);
atsd_issue_membars(va_space, regs);
}
#endif // UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
{
#if UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
unsigned long irq_flags;
uvm_atsd_regs_t regs;
NvU64 atsd_start = start;
int psize = atsd_calc_size(&atsd_start, end);
uvm_ibm_va_space_t *ibm_va_space = &va_space->ats.ibm;
BUILD_BUG_ON(order_base_2(UVM_MAX_ATSD_REGS) > 8*sizeof(regs.ids[0]));
// We must hold this lock in at least read mode when accessing NPU
// registers. See the comment in uvm_ats_ibm_unregister_gpu_va_space_driver.
uvm_read_lock_irqsave(&ibm_va_space->rwlock, irq_flags);
if (!bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS)) {
atsd_regs_acquire(va_space, &regs);
if (psize == MMU_PAGE_COUNT)
uvm_ats_ibm_invalidate_all(va_space, &regs);
else
uvm_ats_ibm_invalidate_range(va_space, &regs, atsd_start, psize);
atsd_regs_release(va_space, &regs);
}
uvm_read_unlock_irqrestore(&ibm_va_space->rwlock, irq_flags);
#else
UVM_ASSERT_MSG(0, "This function should not be called on this kernel version\n");
#endif // UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
}
#endif // UVM_ATS_IBM_SUPPORTED
#endif // UVM_IBM_NPU_SUPPORTED

View File

@@ -1,266 +0,0 @@
/*******************************************************************************
Copyright (c) 2018-2019 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_ATS_IBM_H__
#define __UVM_ATS_IBM_H__
#include "uvm_linux.h"
#include "uvm_forward_decl.h"
#include "uvm_hal_types.h"
#if defined(NVCPU_PPC64LE) && defined(NV_PNV_PCI_GET_NPU_DEV_PRESENT)
#include <asm/mmu.h>
#if defined(NV_MAX_NPUS)
#define UVM_IBM_NPU_SUPPORTED() 1
#else
#define UVM_IBM_NPU_SUPPORTED() 0
#endif
#else
#define UVM_IBM_NPU_SUPPORTED() 0
#endif
#if defined(NV_ASM_OPAL_API_H_PRESENT)
// For OPAL_NPU_INIT_CONTEXT
#include <asm/opal-api.h>
#endif
// Timeline of kernel changes:
//
// 0) Before 1ab66d1fbadad86b1f4a9c7857e193af0ee0022c
// - No NPU-ATS code existed, nor did the OPAL_NPU_INIT_CONTEXT firmware
// call.
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Not defined
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Not defined
// - OPAL_NPU_INIT_CONTEXT Not defined
// - ATS support type None
//
// 1) NPU ATS code added: 1ab66d1fbadad86b1f4a9c7857e193af0ee0022c, v4.12
// (2017-04-03)
// - This commit added initial support for NPU ATS, including the necessary
// OPAL firmware calls. This support was developmental and required
// several bug fixes before it could be used in production.
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Defined
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Not defined
// - OPAL_NPU_INIT_CONTEXT Defined
// - ATS support type None
//
// 2) NPU ATS code fixed: a1409adac748f0db655e096521bbe6904aadeb98, v4.17
// (2018-04-11)
// - This commit changed the function signature for pnv_npu2_init_context's
// callback parameter. Since all required bug fixes went in prior to this
// change, we can use the callback signature as a flag to indicate
// whether the PPC arch layer in the kernel supports ATS in production.
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Defined
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Defined
// - OPAL_NPU_INIT_CONTEXT Defined
// - ATS support type Kernel
//
// 3) NPU ATS code removed: 7eb3cf761927b2687164e182efa675e6c09cfe44, v5.3
// (2019-06-25)
// - This commit removed NPU-ATS support from the PPC arch layer, so the
// driver needs to handle things instead. pnv_npu2_init_context is no
// longer present, so we use OPAL_NPU_INIT_CONTEXT to differentiate
// between this state and scenario #0.
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Not defined
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Not defined
// - OPAL_NPU_INIT_CONTEXT Defined
// - ATS support type Driver
//
#if defined(NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID)
#define UVM_ATS_IBM_SUPPORTED_IN_KERNEL() 1
#define UVM_ATS_IBM_SUPPORTED_IN_DRIVER() 0
#elif !defined(NV_PNV_NPU2_INIT_CONTEXT_PRESENT) && defined(OPAL_NPU_INIT_CONTEXT) && UVM_CAN_USE_MMU_NOTIFIERS()
#define UVM_ATS_IBM_SUPPORTED_IN_KERNEL() 0
#define UVM_ATS_IBM_SUPPORTED_IN_DRIVER() 1
#else
#define UVM_ATS_IBM_SUPPORTED_IN_KERNEL() 0
#define UVM_ATS_IBM_SUPPORTED_IN_DRIVER() 0
#endif
#define UVM_ATS_IBM_SUPPORTED() (UVM_ATS_IBM_SUPPORTED_IN_KERNEL() || UVM_ATS_IBM_SUPPORTED_IN_DRIVER())
// Maximum number of parallel ATSD register sets per NPU
#define UVM_MAX_ATSD_REGS 16
typedef struct
{
#if UVM_IBM_NPU_SUPPORTED()
// These are the active NPUs in this VA space, that is, all NPUs with
// GPUs that have GPU VA spaces registered in this VA space.
//
// If a bit is clear in npu_active_mask then the corresponding entry of
// npu_ref_counts is 0. If a bit is set then the corresponding entry of
// npu_ref_counts is greater than 0.
NvU32 npu_ref_counts[NV_MAX_NPUS];
DECLARE_BITMAP(npu_active_mask, NV_MAX_NPUS);
#endif
// Lock protecting npu_ref_counts and npu_active_mask. Invalidations
// take this lock for read. GPU VA space register and unregister take
// this lock for write. Since all invalidations take the lock for read
// for the duration of the invalidate, taking the lock for write also
// flushes all invalidates.
//
// This is a spinlock because the invalidation code paths may be called
// with interrupts disabled, so those paths can't take the VA space
// lock. We could use a normal exclusive spinlock instead, but a reader/
// writer lock is preferred to allow concurrent invalidates in the same
// VA space.
uvm_rwlock_irqsave_t rwlock;
} uvm_ibm_va_space_t;
typedef struct
{
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
struct npu_context *npu_context;
#endif
// Used on the teardown path to know what to clean up. npu_context acts
// as the equivalent flag for kernel-provided support.
bool did_ibm_driver_init;
} uvm_ibm_gpu_va_space_t;
struct uvm_ibm_npu_struct
{
// Number of retained GPUs under this NPU. The other fields in this struct
// are only valid if this is non-zero.
unsigned int num_retained_gpus;
// PCI domain containing this NPU. This acts as a unique system-wide ID for
// this UVM NPU.
int pci_domain;
// The ATS-related fields are only valid when ATS support is enabled and
// UVM_ATS_IBM_SUPPORTED_IN_DRIVER() is 1.
struct
{
// Mapped addresses of the ATSD trigger registers. There may be more
// than one set of identical registers per NPU to enable concurrent
// invalidates.
//
// These will not be accessed unless there is a GPU VA space registered
// on a GPU under this NPU. They are protected by bit locks in the locks
// field.
__be64 __iomem *io_addrs[UVM_MAX_ATSD_REGS];
// Actual number of registers in the io_addrs array
size_t count;
// Bitmask for allocation and locking of the registers. Bit index n
// corresponds to io_addrs[n]. A set bit means that index is in use
// (locked).
DECLARE_BITMAP(locks, UVM_MAX_ATSD_REGS);
// Max value of any uvm_parent_gpu_t::num_hshub_tlb_invalidate_membars
// for all retained GPUs under this NPU.
NvU32 num_membars;
} atsd_regs;
};
#if UVM_IBM_NPU_SUPPORTED()
NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu);
void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu);
#else
static NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu)
{
return NV_OK;
}
static void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu)
{
}
#endif // UVM_IBM_NPU_SUPPORTED
#if UVM_ATS_IBM_SUPPORTED()
// Initializes IBM specific GPU state.
//
// LOCKING: None
void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space);
// Enables ATS access for the gpu_va_space on the mm_struct associated with
// the VA space (va_space_mm).
//
// If UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1, NV_ERR_NOT_SUPPORTED is
// returned if current->mm does not match va_space_mm.mm or if a GPU VA
// space within another VA space has already called this function on the
// same mm.
//
// If UVM_ATS_IBM_SUPPORTED_IN_DRIVER() is 1 there are no such restrictions.
//
// LOCKING: The VA space lock must be held in write mode.
// current->mm->mmap_lock must be held in write mode iff
// UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1.
NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
// Disables ATS access for the gpu_va_space. Prior to calling this function,
// the caller must guarantee that the GPU will no longer make any ATS
// accesses in this GPU VA space, and that no ATS fault handling for this
// GPU will be attempted.
//
// LOCKING: This function may block on mmap_lock and the VA space lock, so
// neither must be held.
void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
// Synchronously invalidate ATS translations cached by GPU TLBs. The
// invalidate applies to all GPUs with active GPU VA spaces in va_space, and
// covers all pages touching any part of the given range. end is inclusive.
//
// GMMU translations in the given range are not guaranteed to be
// invalidated.
//
// LOCKING: No locks are required, but this function may be called with
// interrupts disabled.
void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end);
#else
static void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space)
{
}
static NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
{
return NV_OK;
}
static void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
{
}
static void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
{
}
#endif // UVM_ATS_IBM_SUPPORTED
static NV_STATUS uvm_ats_ibm_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
{
return NV_OK;
}
static void uvm_ats_ibm_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
{
}
#endif // __UVM_ATS_IBM_H__

View File

@@ -139,7 +139,11 @@ static NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg)
static void smmu_vcmdq_write64(void __iomem *smmu_cmdqv_base, int reg, NvU64 val)
{
#if NV_IS_EXPORT_SYMBOL_PRESENT___iowrite64_lo_hi
__iowrite64_lo_hi(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
#else
iowrite64(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
#endif
}
// Fix for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU

View File

@@ -38,12 +38,10 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_blackwell_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
(sizeof(dummy->fault_source.utlb_id) * 8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Blackwell covers 64 PB and that's the minimum
@@ -83,10 +81,6 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->non_replayable_faults_supported = true;
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;
@@ -106,4 +100,15 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->no_ats_range_required = true;
parent_gpu->conf_computing.per_channel_key_rotation = true;
// TODO: Bug 5023085: this should be queried from RM instead of determined
// by UVM.
if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100 &&
parent_gpu->rm_info.gpuImplementation ==
NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B)
parent_gpu->is_integrated_gpu = true;
if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 &&
parent_gpu->rm_info.gpuImplementation ==
NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B)
parent_gpu->is_integrated_gpu = true;
}

View File

@@ -254,3 +254,31 @@ void uvm_hal_blackwell_host_tlb_invalidate_test(uvm_push_t *push,
HWVALUE(C96F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
}
}
uvm_access_counter_clear_op_t
uvm_hal_blackwell_access_counter_query_clear_op_gb100(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries)
{
if (parent_gpu->rm_info.accessCntrBufferCount > 1) {
NvU32 i;
for (i = 0; i < num_entries; i++) {
const uvm_access_counter_buffer_entry_t *entry = buffer_entries[i];
// The LSb identifies the die ID.
if ((entry->tag & 0x1) == 1)
return UVM_ACCESS_COUNTER_CLEAR_OP_ALL;
}
}
return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED;
}
uvm_access_counter_clear_op_t
uvm_hal_blackwell_access_counter_query_clear_op_gb20x(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries)
{
return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED;
}

View File

@@ -39,6 +39,7 @@
#include "hwref/blackwell/gb100/dev_fault.h"
static uvm_mmu_mode_hal_t blackwell_mmu_mode_hal;
static uvm_mmu_mode_hal_t blackwell_integrated_mmu_mode_hal;
static NvU32 page_table_depth_blackwell(NvU64 page_size)
{
@@ -59,35 +60,71 @@ static NvU64 page_sizes_blackwell(void)
return UVM_PAGE_SIZE_256G | UVM_PAGE_SIZE_512M | UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
}
static NvU64 page_sizes_blackwell_integrated(void)
{
return UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
}
static uvm_mmu_mode_hal_t *__uvm_hal_mmu_mode_blackwell(uvm_mmu_mode_hal_t *mmu_mode_hal,
NvU64 big_page_size)
{
uvm_mmu_mode_hal_t *hopper_mmu_mode_hal;
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
hopper_mmu_mode_hal = uvm_hal_mmu_mode_hopper(big_page_size);
UVM_ASSERT(hopper_mmu_mode_hal);
// The assumption made is that arch_hal->mmu_mode_hal() will be called
// under the global lock the first time, so check it here.
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
*mmu_mode_hal = *hopper_mmu_mode_hal;
mmu_mode_hal->page_table_depth = page_table_depth_blackwell;
return mmu_mode_hal;
}
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell(NvU64 big_page_size)
{
static bool initialized = false;
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
// TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with
// 128K big page size for Pascal+ GPUs
if (big_page_size == UVM_PAGE_SIZE_128K)
return NULL;
if (!initialized) {
uvm_mmu_mode_hal_t *hopper_mmu_mode_hal = uvm_hal_mmu_mode_hopper(big_page_size);
UVM_ASSERT(hopper_mmu_mode_hal);
// The assumption made is that arch_hal->mmu_mode_hal() will be called
// under the global lock the first time, so check it here.
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
blackwell_mmu_mode_hal = *hopper_mmu_mode_hal;
blackwell_mmu_mode_hal.page_table_depth = page_table_depth_blackwell;
blackwell_mmu_mode_hal.page_sizes = page_sizes_blackwell;
uvm_mmu_mode_hal_t *mmu_mode_hal;
mmu_mode_hal = __uvm_hal_mmu_mode_blackwell(&blackwell_mmu_mode_hal, big_page_size);
mmu_mode_hal->page_sizes = page_sizes_blackwell;
initialized = true;
}
return &blackwell_mmu_mode_hal;
}
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell_integrated(NvU64 big_page_size)
{
static bool initialized = false;
// TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with
// 128K big page size for Pascal+ GPUs
if (big_page_size == UVM_PAGE_SIZE_128K)
return NULL;
if (!initialized) {
uvm_mmu_mode_hal_t *mmu_mode_hal;
mmu_mode_hal = __uvm_hal_mmu_mode_blackwell(&blackwell_integrated_mmu_mode_hal, big_page_size);
mmu_mode_hal->page_sizes = page_sizes_blackwell_integrated;
initialized = true;
}
return &blackwell_integrated_mmu_mode_hal;
}
NvU16 uvm_hal_blackwell_mmu_client_id_to_utlb_id(NvU16 client_id)
{
switch (client_id) {

View File

@@ -65,7 +65,10 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
memset(host_ptr, 0, CE_TEST_MEM_SIZE);
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
status = uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, CE_TEST_MEM_SIZE, 0, &mem[i]);
uvm_rm_mem_type_t type;
type = gpu->mem_info.size ? UVM_RM_MEM_TYPE_GPU : UVM_RM_MEM_TYPE_SYS;
status = uvm_rm_mem_alloc(gpu, type, CE_TEST_MEM_SIZE, 0, &mem[i]);
TEST_CHECK_GOTO(status == NV_OK, done);
}
@@ -405,6 +408,7 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
uvm_rm_mem_t *sys_rm_mem = NULL;
uvm_rm_mem_t *gpu_rm_mem = NULL;
uvm_gpu_address_t gpu_addresses[4] = {0};
size_t gpu_addresses_length = 0;
size_t size = gpu->big_page.internal_size;
static const size_t element_sizes[] = {1, 4, 8};
const size_t iterations = 4;
@@ -435,7 +439,7 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
// Virtual address (in UVM's internal address space) backed by sysmem
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
gpu_addresses[0] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
gpu_addresses[gpu_addresses_length++] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
if (g_uvm_global.conf_computing_enabled) {
for (i = 0; i < iterations; ++i) {
@@ -472,21 +476,23 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
// Physical address in sysmem
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &sys_uvm_mem), done);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(sys_uvm_mem, gpu), done);
gpu_addresses[1] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
gpu_addresses[gpu_addresses_length++] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
// Physical address in vidmem
mem_params.backing_gpu = gpu;
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
gpu_addresses[2] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
if (gpu->mem_info.size > 0) {
// Physical address in vidmem
mem_params.backing_gpu = gpu;
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
gpu_addresses[gpu_addresses_length++] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
// Virtual address (in UVM's internal address space) backed by vidmem
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
gpu_addresses[3] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
// Virtual address (in UVM's internal address space) backed by vidmem
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
gpu_addresses[gpu_addresses_length++] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
}
for (i = 0; i < iterations; ++i) {
for (j = 0; j < ARRAY_SIZE(gpu_addresses); ++j) {
for (k = 0; k < ARRAY_SIZE(gpu_addresses); ++k) {
for (j = 0; j < gpu_addresses_length; ++j) {
for (k = 0; k < gpu_addresses_length; ++k) {
for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
gpu_addresses[k],

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -110,16 +110,22 @@ typedef enum
bool uvm_channel_pool_is_p2p(uvm_channel_pool_t *pool)
{
uvm_channel_manager_t *manager = pool->manager;
uvm_gpu_t *gpu = manager->gpu;
uvm_gpu_id_t id;
if (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_GPU] == pool)
return true;
for_each_gpu_id_in_mask(id, &manager->gpu->peer_info.peer_gpu_mask) {
if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool)
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool) {
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
return true;
}
}
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
return false;
}
@@ -1974,6 +1980,7 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
{
uvm_channel_pool_t *pool;
NV_STATUS status = NV_OK;
uvm_gpu_t *gpu = channel_manager->gpu;
uvm_gpu_id_t gpu_id;
DECLARE_BITMAP(suspended_pools, UVM_COPY_ENGINE_COUNT_MAX);
@@ -1981,7 +1988,9 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
// Use bitmap to track which were suspended.
bitmap_zero(suspended_pools, channel_manager->num_channel_pools);
for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
if (pool && !test_bit(uvm_channel_pool_index_in_channel_manager(pool), suspended_pools)) {
status = channel_pool_suspend_p2p(pool);
@@ -2014,6 +2023,7 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
{
uvm_channel_pool_t *pool;
uvm_gpu_t *gpu = channel_manager->gpu;
uvm_gpu_id_t gpu_id;
DECLARE_BITMAP(resumed_pools, UVM_COPY_ENGINE_COUNT_MAX);
@@ -2021,7 +2031,9 @@ void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
// Use bitmap to track which were suspended.
bitmap_zero(resumed_pools, channel_manager->num_channel_pools);
for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
if (pool && !test_and_set_bit(uvm_channel_pool_index_in_channel_manager(pool), resumed_pools))
channel_pool_resume_p2p(pool);
@@ -2889,6 +2901,13 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
static bool ce_is_usable(const UvmGpuCopyEngineCaps *cap)
{
// When Confidential Computing is enabled, all Copy Engines must support
// encryption / decryption, tracked by 'secure' flag. This holds even for
// non-CPU-GPU transactions because each channel has an associate semaphore,
// and semaphore release must be observable by all processing units.
if (g_uvm_global.conf_computing_enabled && !cap->secure)
return false;
return cap->supported && !cap->grce;
}
@@ -3243,9 +3262,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
manager->conf.num_gpfifo_entries = UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT;
if (manager->conf.num_gpfifo_entries != uvm_channel_num_gpfifo_entries) {
pr_info("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
uvm_channel_num_gpfifo_entries,
manager->conf.num_gpfifo_entries);
UVM_INFO_PRINT("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
uvm_channel_num_gpfifo_entries,
manager->conf.num_gpfifo_entries);
}
// 2- Allocation locations
@@ -3285,9 +3304,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
pushbuffer_loc_value = uvm_channel_pushbuffer_loc;
if (!is_string_valid_location(pushbuffer_loc_value)) {
pushbuffer_loc_value = UVM_CHANNEL_PUSHBUFFER_LOC_DEFAULT;
pr_info("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
uvm_channel_pushbuffer_loc,
pushbuffer_loc_value);
UVM_INFO_PRINT("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
uvm_channel_pushbuffer_loc,
pushbuffer_loc_value);
}
// Override the default value if requested by the user
@@ -3297,8 +3316,8 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
// so force the location to sys for now.
// TODO: Bug 2904133: Remove the following "if" after the bug is fixed.
if (NVCPU_IS_AARCH64) {
pr_info("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
pushbuffer_loc_value);
UVM_INFO_PRINT("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
pushbuffer_loc_value);
manager->conf.pushbuffer_loc = UVM_BUFFER_LOCATION_SYS;
}
else {
@@ -3310,8 +3329,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
// Only support the knobs for GPFIFO/GPPut on Volta+
if (!gpu->parent->gpfifo_in_vidmem_supported) {
if (manager->conf.gpput_loc == UVM_BUFFER_LOCATION_SYS) {
pr_info("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s instead\n",
buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
UVM_INFO_PRINT("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s "
"instead\n",
buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
}
manager->conf.gpfifo_loc = UVM_BUFFER_LOCATION_DEFAULT;
@@ -3323,17 +3343,17 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
gpfifo_loc_value = uvm_channel_gpfifo_loc;
if (!is_string_valid_location(gpfifo_loc_value)) {
gpfifo_loc_value = UVM_CHANNEL_GPFIFO_LOC_DEFAULT;
pr_info("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
uvm_channel_gpfifo_loc,
gpfifo_loc_value);
UVM_INFO_PRINT("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
uvm_channel_gpfifo_loc,
gpfifo_loc_value);
}
gpput_loc_value = uvm_channel_gpput_loc;
if (!is_string_valid_location(gpput_loc_value)) {
gpput_loc_value = UVM_CHANNEL_GPPUT_LOC_DEFAULT;
pr_info("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
uvm_channel_gpput_loc,
gpput_loc_value);
UVM_INFO_PRINT("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
uvm_channel_gpput_loc,
gpput_loc_value);
}
// On coherent platforms where the GPU does not cache sysmem but the CPU

View File

@@ -57,6 +57,7 @@ enum {
// NULL.
void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
// Long prefix - typically for debugging and tests.
#define UVM_PRINT_FUNC_PREFIX(func, prefix, fmt, ...) \
func(prefix "%s:%u %s[pid:%d]" fmt, \
kbasename(__FILE__), \
@@ -65,10 +66,15 @@ void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
current->pid, \
##__VA_ARGS__)
// Short prefix - typically for information.
#define UVM_PRINT_FUNC_SHORT_PREFIX(func, prefix, fmt, ...) \
func(prefix fmt, ##__VA_ARGS__)
// No prefix - used by kernel panic messages.
#define UVM_PRINT_FUNC(func, fmt, ...) \
UVM_PRINT_FUNC_PREFIX(func, "", fmt, ##__VA_ARGS__)
// Check whether UVM_{ERR,DBG,INFO)_PRINT* should be enabled
// Check whether UVM_{ERR,DBG)_PRINT* should be enabled.
bool uvm_debug_prints_enabled(void);
// A printing helper like UVM_PRINT_FUNC_PREFIX that only prints if
@@ -80,10 +86,10 @@ bool uvm_debug_prints_enabled(void);
} \
} while (0)
#define UVM_ASSERT_PRINT(fmt, ...) \
#define UVM_ERR_PRINT_ALWAYS(fmt, ...) \
UVM_PRINT_FUNC_PREFIX(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_ASSERT_PRINT_RL(fmt, ...) \
#define UVM_ERR_PRINT_ALWAYS_RL(fmt, ...) \
UVM_PRINT_FUNC_PREFIX(printk_ratelimited, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_ERR_PRINT(fmt, ...) \
@@ -95,13 +101,16 @@ bool uvm_debug_prints_enabled(void);
#define UVM_DBG_PRINT(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_DBG_PRINT_RL(fmt, ...) \
#define UVM_DBG_PRINT_RL(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk_ratelimited, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
// UVM_INFO_PRINT prints in all modes (including in the release mode.) It is
// used for relaying driver-level information, rather than detailed debugging
// information; therefore, it does not add the "pretty long prefix".
#define UVM_INFO_PRINT(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
UVM_PRINT_FUNC_SHORT_PREFIX(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
UVM_ERR_PRINT("ERROR: %s : " msg "\n", nvstatusToString(rmStatus), ##__VA_ARGS__)
#define UVM_PANIC() UVM_PRINT_FUNC(panic, "\n")
@@ -134,13 +143,13 @@ void on_uvm_test_fail(void);
// Unlike on_uvm_test_fail it provides 'panic' coverity semantics
void on_uvm_assert(void);
#define _UVM_ASSERT_MSG(expr, cond, fmt, ...) \
do { \
if (unlikely(!(expr))) { \
UVM_ASSERT_PRINT("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
dump_stack(); \
on_uvm_assert(); \
} \
#define _UVM_ASSERT_MSG(expr, cond, fmt, ...) \
do { \
if (unlikely(!(expr))) { \
UVM_ERR_PRINT_ALWAYS("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
dump_stack(); \
on_uvm_assert(); \
} \
} while (0)
// Prevent function calls in expr and the print argument list from being
@@ -151,7 +160,8 @@ void on_uvm_assert(void);
UVM_NO_PRINT(fmt, ##__VA_ARGS__); \
} while (0)
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity builds
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity
// builds.
#if UVM_IS_DEBUG() || defined __COVERITY__
#define UVM_ASSERT_MSG(expr, fmt, ...) _UVM_ASSERT_MSG(expr, #expr, ": " fmt, ##__VA_ARGS__)
#define UVM_ASSERT(expr) _UVM_ASSERT_MSG(expr, #expr, "\n")
@@ -174,16 +184,16 @@ extern bool uvm_release_asserts_set_global_error_for_tests;
// Given these are enabled for release builds, we need to be more cautious than
// in UVM_ASSERT(). Use a ratelimited print and only dump the stack if a module
// param is enabled.
#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \
do { \
if (uvm_release_asserts && unlikely(!(expr))) { \
UVM_ASSERT_PRINT_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
uvm_global_set_fatal_error(NV_ERR_INVALID_STATE); \
if (uvm_release_asserts_dump_stack) \
dump_stack(); \
on_uvm_assert(); \
} \
#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \
do { \
if (uvm_release_asserts && unlikely(!(expr))) { \
UVM_ERR_PRINT_ALWAYS_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
uvm_global_set_fatal_error(NV_ERR_INVALID_STATE); \
if (uvm_release_asserts_dump_stack) \
dump_stack(); \
on_uvm_assert(); \
} \
} while (0)
#define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...) _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)
@@ -240,15 +250,6 @@ static inline NvBool uvm_ranges_overlap(NvU64 a_start, NvU64 a_end, NvU64 b_star
return a_end >= b_start && b_end >= a_start;
}
static int debug_mode(void)
{
#ifdef DEBUG
return 1;
#else
return 0;
#endif
}
static inline void kmem_cache_destroy_safe(struct kmem_cache **ppCache)
{
if (ppCache)
@@ -326,22 +327,6 @@ typedef struct
NvHandle user_object;
} uvm_rm_user_object_t;
typedef enum
{
UVM_FD_UNINITIALIZED,
UVM_FD_INITIALIZING,
UVM_FD_VA_SPACE,
UVM_FD_MM,
UVM_FD_COUNT
} uvm_fd_type_t;
// This should be large enough to fit the valid values from uvm_fd_type_t above.
// Note we can't use order_base_2(UVM_FD_COUNT) to define this because our code
// coverage tool fails due when the preprocessor expands that to a huge mess of
// ternary operators.
#define UVM_FD_TYPE_BITS 2
#define UVM_FD_TYPE_MASK ((1UL << UVM_FD_TYPE_BITS) - 1)
// Macro used to compare two values for types that support less than operator.
// It returns -1 if a < b, 1 if a > b and 0 if a == 0
#define UVM_CMP_DEFAULT(a,b) \
@@ -364,37 +349,13 @@ typedef enum
// file. A NULL input returns false.
bool uvm_file_is_nvidia_uvm(struct file *filp);
// Returns the type of data filp->private_data contains to and if ptr_val !=
// NULL returns the value of the pointer.
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val);
// Returns the pointer stored in filp->private_data if the type
// matches, otherwise returns NULL.
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type);
// Reads the first word in the supplied struct page.
static inline void uvm_touch_page(struct page *page)
{
char *mapping;
UVM_ASSERT(page);
mapping = (char *) kmap(page);
(void)READ_ONCE(*mapping);
kunmap(page);
}
// Like uvm_file_is_nvidia_uvm(), but further requires that the input file
// represent a UVM VA space (has fd type UVM_FD_VA_SPACE).
bool uvm_file_is_nvidia_uvm_va_space(struct file *filp);
// Return true if the VMA is one used by UVM managed allocations.
bool uvm_vma_is_managed(struct vm_area_struct *vma);
static bool uvm_platform_uses_canonical_form_address(void)
{
if (NVCPU_IS_PPC64LE)
return false;
return true;
}
// Similar to the GPU MMU HAL num_va_bits(), it returns the CPU's num_va_bits().
static NvU32 uvm_cpu_num_va_bits(void)
{
@@ -410,7 +371,7 @@ static void uvm_get_unaddressable_range(NvU32 num_va_bits, NvU64 *first, NvU64 *
// Maxwell GPUs (num_va_bits == 40b) do not support canonical form address
// even when plugged into platforms using it.
if (uvm_platform_uses_canonical_form_address() && num_va_bits > 40) {
if (num_va_bits > 40) {
*first = 1ULL << (num_va_bits - 1);
*outer = (NvU64)((NvS64)(1ULL << 63) >> (64 - num_va_bits));
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021-2024 NVIDIA Corporation
Copyright (c) 2021-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -532,7 +532,7 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
{
NV_STATUS status;
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;
// There is no dedicated lock for the CSL context associated with replayable
// faults. The mutual exclusion required by the RM CSL API is enforced by
@@ -571,7 +571,7 @@ void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status;
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;
// See comment in uvm_conf_computing_fault_decrypt
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
@@ -708,7 +708,12 @@ void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu)
bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu)
{
return gpu->channel_manager->conf_computing.key_rotation_enabled;
UVM_ASSERT(gpu);
// If the channel_manager is not set, we're in channel manager destroy
// path after the pointer was NULL-ed. Chances are that other key rotation
// infrastructure is not available either. Disallow the key rotation.
return gpu->channel_manager && gpu->channel_manager->conf_computing.key_rotation_enabled;
}
bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool)

View File

@@ -0,0 +1,114 @@
/*******************************************************************************
Copyright (c) 2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_fd_type.h"
#include "uvm_common.h"
#include "uvm_linux.h"
#include "uvm_va_space.h"
#include "uvm_test_file.h"
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
{
unsigned long uptr;
uvm_fd_type_t type;
void *ptr;
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
uptr = atomic_long_read_acquire((atomic_long_t *) (&filp->private_data));
type = (uvm_fd_type_t)(uptr & UVM_FD_TYPE_MASK);
ptr = (void *)(uptr & ~UVM_FD_TYPE_MASK);
BUILD_BUG_ON(UVM_FD_COUNT > UVM_FD_TYPE_MASK + 1);
switch (type) {
case UVM_FD_UNINITIALIZED:
case UVM_FD_INITIALIZING:
UVM_ASSERT(!ptr);
break;
case UVM_FD_VA_SPACE:
UVM_ASSERT(ptr);
BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
break;
case UVM_FD_MM:
UVM_ASSERT(ptr);
BUILD_BUG_ON(__alignof__(struct file) < (1UL << UVM_FD_TYPE_BITS));
break;
case UVM_FD_TEST:
UVM_ASSERT(ptr);
BUILD_BUG_ON(__alignof__(uvm_test_file_t) < (1UL << UVM_FD_TYPE_BITS));
break;
default:
UVM_ASSERT(0);
}
if (ptr_val)
*ptr_val = ptr;
return type;
}
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type)
{
void *ptr;
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
if (uvm_fd_type(filp, &ptr) == type)
return ptr;
else
return NULL;
}
uvm_fd_type_t uvm_fd_type_init_cas(struct file *filp)
{
long old = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
return (uvm_fd_type_t)(old & UVM_FD_TYPE_MASK);
}
NV_STATUS uvm_fd_type_init(struct file *filp)
{
uvm_fd_type_t old = uvm_fd_type_init_cas(filp);
if (old != UVM_FD_UNINITIALIZED)
return NV_ERR_IN_USE;
return NV_OK;
}
void uvm_fd_type_set(struct file *filp, uvm_fd_type_t type, void *ptr)
{
void *tmp_ptr;
UVM_ASSERT(uvm_fd_type(filp, &tmp_ptr) == UVM_FD_INITIALIZING);
UVM_ASSERT(!tmp_ptr);
if (type == UVM_FD_UNINITIALIZED)
UVM_ASSERT(!ptr);
UVM_ASSERT(((uintptr_t)ptr & type) == 0);
atomic_long_set_release((atomic_long_t *)&filp->private_data, (uintptr_t)ptr | type);
}

View File

@@ -0,0 +1,69 @@
/*******************************************************************************
Copyright (c) 2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_FD_TYPE_H__
#define __UVM_FD_TYPE_H__
#include "nvstatus.h"
typedef enum
{
UVM_FD_UNINITIALIZED,
UVM_FD_INITIALIZING,
UVM_FD_VA_SPACE,
UVM_FD_MM,
UVM_FD_TEST,
UVM_FD_COUNT
} uvm_fd_type_t;
// This should be large enough to fit the valid values from uvm_fd_type_t above.
// Note we can't use order_base_2(UVM_FD_COUNT) to define this because our code
// coverage tool fails when the preprocessor expands that to a huge mess of
// ternary operators.
#define UVM_FD_TYPE_BITS 3
#define UVM_FD_TYPE_MASK ((1UL << UVM_FD_TYPE_BITS) - 1)
struct file;
// Returns the type of data filp->private_data contains and if ptr_val != NULL
// returns the value of the pointer.
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val);
// Returns the pointer stored in filp->private_data if the type matches,
// otherwise returns NULL.
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type);
// Does atomic CAS on filp->private_data, expecting UVM_FD_UNINITIALIZED and
// swapping in UVM_FD_INITIALIZING. Returns the old type regardless of CAS
// success.
uvm_fd_type_t uvm_fd_type_init_cas(struct file *filp);
// Like uvm_fd_type_init_cas() but returns NV_OK on CAS success and
// NV_ERR_IN_USE on CAS failure.
NV_STATUS uvm_fd_type_init(struct file *filp);
// Assigns {type, ptr} to filp. filp's current type must be UVM_FD_INITIALIZING.
// If the new type is UVM_FD_UNINITIALIZED, ptr must be NULL.
void uvm_fd_type_set(struct file *filp, uvm_fd_type_t type, void *ptr);
#endif // __UVM_FD_TYPE_H__

View File

@@ -93,13 +93,12 @@ typedef struct uvm_service_block_context_struct uvm_service_block_context_t;
typedef struct uvm_ats_fault_invalidate_struct uvm_ats_fault_invalidate_t;
typedef struct uvm_replayable_fault_buffer_info_struct uvm_replayable_fault_buffer_info_t;
typedef struct uvm_non_replayable_fault_buffer_info_struct uvm_non_replayable_fault_buffer_info_t;
typedef struct uvm_replayable_fault_buffer_struct uvm_replayable_fault_buffer_t;
typedef struct uvm_non_replayable_fault_buffer_struct uvm_non_replayable_fault_buffer_t;
typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
typedef struct uvm_access_counter_buffer_struct uvm_access_counter_buffer_t;
typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
typedef struct uvm_pmm_sysmem_mappings_struct uvm_pmm_sysmem_mappings_t;
typedef struct uvm_reverse_map_struct uvm_reverse_map_t;
typedef struct uvm_ibm_npu_struct uvm_ibm_npu_t;
#endif //__UVM_FORWARD_DECL_H__

View File

@@ -194,6 +194,12 @@ NV_STATUS uvm_global_init(void)
goto error;
}
status = uvm_access_counters_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_access_counters_init failed: %s\n", nvstatusToString(status));
goto error;
}
// This sets up the ISR (interrupt service routine), by hooking into RM's
// top-half ISR callback. As soon as this call completes, GPU interrupts
// will start arriving, so it's important to be prepared to receive
@@ -224,8 +230,8 @@ void uvm_global_exit(void)
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
uvm_unregister_callbacks();
uvm_access_counters_exit();
uvm_service_block_context_exit();
uvm_perf_heuristics_exit();
uvm_perf_events_exit();
uvm_migrate_exit();
@@ -287,7 +293,7 @@ static NV_STATUS uvm_suspend(void)
// * Flush relevant kthread queues (bottom half, etc.)
// Some locks acquired by this function, such as pm.lock, are released
// by uvm_resume(). This is contrary to the lock tracking code's
// by uvm_resume(). This is contrary to the lock tracking code's
// expectations, so lock tracking is disabled.
uvm_thread_context_lock_disable_tracking();
@@ -304,7 +310,7 @@ static NV_STATUS uvm_suspend(void)
gpu = uvm_gpu_get(gpu_id);
// Since fault buffer state may be lost across sleep cycles, UVM must
// ensure any outstanding replayable faults are dismissed. The RM
// ensure any outstanding replayable faults are dismissed. The RM
// guarantees that all user channels have been preempted before
// uvm_suspend() is called, which implies that no user channels can be
// stalled on faults when this point is reached.
@@ -330,7 +336,7 @@ static NV_STATUS uvm_suspend(void)
}
// Acquire each VA space's lock in write mode to lock out VMA open and
// release callbacks. These entry points do not have feasible early exit
// release callbacks. These entry points do not have feasible early exit
// options, and so aren't suitable for synchronization with pm.lock.
uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
@@ -360,7 +366,7 @@ static NV_STATUS uvm_resume(void)
g_uvm_global.pm.is_suspended = false;
// Some locks released by this function, such as pm.lock, were acquired
// by uvm_suspend(). This is contrary to the lock tracking code's
// by uvm_suspend(). This is contrary to the lock tracking code's
// expectations, so lock tracking is disabled.
uvm_thread_context_lock_disable_tracking();
@@ -392,7 +398,7 @@ static NV_STATUS uvm_resume(void)
uvm_thread_context_lock_enable_tracking();
// Force completion of any release callbacks successfully queued for
// deferred completion while suspended. The deferred release
// deferred completion while suspended. The deferred release
// queue is not guaranteed to remain empty following this flush since
// some threads that failed to acquire pm.lock in uvm_release() may
// not have scheduled their handlers yet.
@@ -424,7 +430,8 @@ void uvm_global_set_fatal_error_impl(NV_STATUS error)
}
else {
UVM_ERR_PRINT("Encountered a global fatal error: %s after a global error has been already set: %s\n",
nvstatusToString(error), nvstatusToString(previous_error));
nvstatusToString(error),
nvstatusToString(previous_error));
}
nvUvmInterfaceReportFatalError(error);

View File

@@ -31,7 +31,6 @@
#include "uvm_processors.h"
#include "uvm_gpu.h"
#include "uvm_lock.h"
#include "uvm_ats_ibm.h"
// Global state of the uvm driver
struct uvm_global_struct
@@ -124,12 +123,6 @@ struct uvm_global_struct
bool enabled;
} ats;
#if UVM_IBM_NPU_SUPPORTED()
// On IBM systems this array tracks the active NPUs (the NPUs which are
// attached to retained GPUs).
uvm_ibm_npu_t npus[NV_MAX_NPUS];
#endif
// List of all active VA spaces
struct
{

View File

@@ -149,7 +149,7 @@ static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
}
// Return a PASID to use with the internal address space (AS), or -1 if not
// supported. This PASID is needed to enable ATS in the internal AS, but it is
// supported. This PASID is needed to enable ATS in the internal AS, but it is
// not used in address translation requests, which only translate GPA->SPA.
// The buffer management thus remains the same: DMA mapped GPA addresses can
// be accessed by the GPU, while unmapped addresses can not and any access is
@@ -358,10 +358,11 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
addr_shift = gpu_addr_shift;
// Pascal+ GPUs are capable of accessing kernel pointers in various modes
// by applying the same upper-bit checks that x86, ARM, and Power
// processors do. x86 and ARM use canonical form addresses. For ARM, even
// by applying the same upper-bit checks that x86 or ARM CPU processors do.
// The x86 and ARM platforms use canonical form addresses. For ARM, even
// with Top-Byte Ignore enabled, the following logic validates addresses
// from the kernel VA range. PowerPC does not use canonical form address.
// from the kernel VA range.
//
// The following diagram illustrates the valid (V) VA regions that can be
// mapped (or addressed) by the GPU/CPU when the CPU uses canonical form.
// (C) regions are only accessible by the CPU. Similarly, (G) regions
@@ -389,8 +390,8 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
// 0 +----------------+ 0 +----------------+
// On canonical form address platforms and Pascal+ GPUs.
if (uvm_platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
// On Pascal+ GPUs.
if (gpu_addr_shift > 40) {
// On x86, when cpu_addr_shift > gpu_addr_shift, it means the CPU uses
// 5-level paging and the GPU is pre-Hopper. On Pascal-Ada GPUs (49b
// wide VA) we set addr_shift to match a 4-level paging x86 (48b wide).
@@ -431,30 +432,28 @@ NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)
NvU8 addr_shift;
NvU64 input_addr = addr;
if (uvm_platform_uses_canonical_form_address()) {
// When the CPU VA width is larger than GPU's, it means that:
// On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
// On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
// We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
// behavior of CPUs with smaller (than GPU) VA widths.
gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
cpu_addr_shift = uvm_cpu_num_va_bits();
// When the CPU VA width is larger than GPU's, it means that:
// On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
// On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
// We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
// behavior of CPUs with smaller (than GPU) VA widths.
gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
cpu_addr_shift = uvm_cpu_num_va_bits();
if (cpu_addr_shift > gpu_addr_shift)
addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
else if (gpu_addr_shift == 57)
addr_shift = gpu_addr_shift;
else
addr_shift = cpu_addr_shift;
if (cpu_addr_shift > gpu_addr_shift)
addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
else if (gpu_addr_shift == 57)
addr_shift = gpu_addr_shift;
else
addr_shift = cpu_addr_shift;
addr = (NvU64)((NvS64)(addr << (64 - addr_shift)) >> (64 - addr_shift));
addr = (NvU64)((NvS64)(addr << (64 - addr_shift)) >> (64 - addr_shift));
// This protection acts on when the address is not covered by the GPU's
// OOR_ADDR_CHECK. This can only happen when OOR_ADDR_CHECK is in
// permissive (NO_CHECK) mode.
if ((addr << (64 - gpu_addr_shift)) != (input_addr << (64 - gpu_addr_shift)))
return input_addr;
}
// This protection acts on when the address is not covered by the GPU's
// OOR_ADDR_CHECK. This can only happen when OOR_ADDR_CHECK is in
// permissive (NO_CHECK) mode.
if ((addr << (64 - gpu_addr_shift)) != (input_addr << (64 - gpu_addr_shift)))
return input_addr;
return addr;
}
@@ -485,7 +484,7 @@ static void gpu_info_print_ce_caps(uvm_gpu_t *gpu, struct seq_file *s)
continue;
UVM_SEQ_OR_DBG_PRINT(s, " ce %u pce mask 0x%08x grce %u shared %u sysmem read %u sysmem write %u sysmem %u "
"nvlink p2p %u p2p %u\n",
"nvlink p2p %u p2p %u secure %u\n",
i,
ce_caps->cePceMask,
ce_caps->grce,
@@ -494,7 +493,8 @@ static void gpu_info_print_ce_caps(uvm_gpu_t *gpu, struct seq_file *s)
ce_caps->sysmemWrite,
ce_caps->sysmem,
ce_caps->nvlinkP2p,
ce_caps->p2p);
ce_caps->p2p,
ce_caps->secure);
}
out:
@@ -538,7 +538,9 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
NvU64 num_pages_in;
NvU64 num_pages_out;
NvU64 mapped_cpu_pages_size;
NvU32 get, put;
NvU32 get;
NvU32 put;
NvU32 i;
unsigned int cpu;
UVM_SEQ_OR_DBG_PRINT(s, "GPU %s\n", uvm_gpu_name(gpu));
@@ -593,9 +595,6 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
window_size / (1024 * 1024));
}
if (gpu->parent->npu)
UVM_SEQ_OR_DBG_PRINT(s, "npu_domain %d\n", gpu->parent->npu->pci_domain);
UVM_SEQ_OR_DBG_PRINT(s, "interrupts %llu\n", gpu->parent->isr.interrupt_count);
if (gpu->parent->isr.replayable_faults.handling) {
@@ -608,19 +607,19 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
gpu->parent->isr.replayable_faults.stats.cpu_exec_count[cpu]);
}
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_buffer_entries %u\n",
gpu->parent->fault_buffer_info.replayable.max_faults);
gpu->parent->fault_buffer.replayable.max_faults);
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_cached_get %u\n",
gpu->parent->fault_buffer_info.replayable.cached_get);
gpu->parent->fault_buffer.replayable.cached_get);
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_cached_put %u\n",
gpu->parent->fault_buffer_info.replayable.cached_put);
gpu->parent->fault_buffer.replayable.cached_put);
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_get %u\n",
gpu->parent->fault_buffer_hal->read_get(gpu->parent));
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_put %u\n",
gpu->parent->fault_buffer_hal->read_put(gpu->parent));
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_fault_batch_size %u\n",
gpu->parent->fault_buffer_info.max_batch_size);
gpu->parent->fault_buffer.max_batch_size);
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_replay_policy %s\n",
uvm_perf_fault_replay_policy_string(gpu->parent->fault_buffer_info.replayable.replay_policy));
uvm_perf_fault_replay_policy_string(gpu->parent->fault_buffer.replayable.replay_policy));
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_num_faults %llu\n",
gpu->parent->stats.num_replayable_faults);
}
@@ -634,32 +633,35 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
gpu->parent->isr.non_replayable_faults.stats.cpu_exec_count[cpu]);
}
UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults_buffer_entries %u\n",
gpu->parent->fault_buffer_info.non_replayable.max_faults);
gpu->parent->fault_buffer.non_replayable.max_faults);
UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults_num_faults %llu\n",
gpu->parent->stats.num_non_replayable_faults);
}
if (gpu->parent->isr.access_counters.handling_ref_count > 0) {
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_bh %llu\n",
gpu->parent->isr.access_counters.stats.bottom_half_count);
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_bh/cpu\n");
for_each_cpu(cpu, &gpu->parent->isr.access_counters.stats.cpus_used_mask) {
UVM_SEQ_OR_DBG_PRINT(s, " cpu%02u %llu\n",
cpu,
gpu->parent->isr.access_counters.stats.cpu_exec_count[cpu]);
for (i = 0; i < gpu_info->accessCntrBufferCount; i++) {
if (gpu->parent->access_counters_supported && gpu->parent->isr.access_counters[i].handling_ref_count > 0) {
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_notif_buffer_index %u\n", i);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_bh %llu\n",
gpu->parent->isr.access_counters[i].stats.bottom_half_count);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_bh/cpu\n");
for_each_cpu(cpu, &gpu->parent->isr.access_counters[i].stats.cpus_used_mask) {
UVM_SEQ_OR_DBG_PRINT(s, " cpu%02u %llu\n",
cpu,
gpu->parent->isr.access_counters[i].stats.cpu_exec_count[cpu]);
}
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_buffer_entries %u\n",
gpu->parent->access_counter_buffer[i].max_notifications);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_cached_get %u\n",
gpu->parent->access_counter_buffer[i].cached_get);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_cached_put %u\n",
gpu->parent->access_counter_buffer[i].cached_put);
get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferGet);
put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferPut);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_get %u\n", get);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_put %u\n", put);
}
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_buffer_entries %u\n",
gpu->parent->access_counter_buffer_info.max_notifications);
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_cached_get %u\n",
gpu->parent->access_counter_buffer_info.cached_get);
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_cached_put %u\n",
gpu->parent->access_counter_buffer_info.cached_put);
get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer_info.rm_info.pAccessCntrBufferGet);
put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer_info.rm_info.pAccessCntrBufferPut);
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_get %u\n", get);
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_put %u\n", put);
}
num_pages_out = atomic64_read(&gpu->parent->stats.num_pages_out);
@@ -694,18 +696,18 @@ gpu_fault_stats_print_common(uvm_parent_gpu_t *parent_gpu, struct seq_file *s)
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults %llu\n", parent_gpu->stats.num_replayable_faults);
UVM_SEQ_OR_DBG_PRINT(s, "duplicates %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_duplicate_faults);
parent_gpu->fault_buffer.replayable.stats.num_duplicate_faults);
UVM_SEQ_OR_DBG_PRINT(s, "faults_by_access_type:\n");
UVM_SEQ_OR_DBG_PRINT(s, " prefetch %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_prefetch_faults);
parent_gpu->fault_buffer.replayable.stats.num_prefetch_faults);
UVM_SEQ_OR_DBG_PRINT(s, " read %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_read_faults);
parent_gpu->fault_buffer.replayable.stats.num_read_faults);
UVM_SEQ_OR_DBG_PRINT(s, " write %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_write_faults);
parent_gpu->fault_buffer.replayable.stats.num_write_faults);
UVM_SEQ_OR_DBG_PRINT(s, " atomic %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_atomic_faults);
num_pages_out = atomic64_read(&parent_gpu->fault_buffer_info.replayable.stats.num_pages_out);
num_pages_in = atomic64_read(&parent_gpu->fault_buffer_info.replayable.stats.num_pages_in);
parent_gpu->fault_buffer.replayable.stats.num_atomic_faults);
num_pages_out = atomic64_read(&parent_gpu->fault_buffer.replayable.stats.num_pages_out);
num_pages_in = atomic64_read(&parent_gpu->fault_buffer.replayable.stats.num_pages_in);
UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
@@ -713,25 +715,25 @@ gpu_fault_stats_print_common(uvm_parent_gpu_t *parent_gpu, struct seq_file *s)
(num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
UVM_SEQ_OR_DBG_PRINT(s, "replays:\n");
UVM_SEQ_OR_DBG_PRINT(s, " start %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_replays);
parent_gpu->fault_buffer.replayable.stats.num_replays);
UVM_SEQ_OR_DBG_PRINT(s, " start_ack_all %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_replays_ack_all);
parent_gpu->fault_buffer.replayable.stats.num_replays_ack_all);
UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults %llu\n", parent_gpu->stats.num_non_replayable_faults);
UVM_SEQ_OR_DBG_PRINT(s, "faults_by_access_type:\n");
UVM_SEQ_OR_DBG_PRINT(s, " read %llu\n",
parent_gpu->fault_buffer_info.non_replayable.stats.num_read_faults);
parent_gpu->fault_buffer.non_replayable.stats.num_read_faults);
UVM_SEQ_OR_DBG_PRINT(s, " write %llu\n",
parent_gpu->fault_buffer_info.non_replayable.stats.num_write_faults);
parent_gpu->fault_buffer.non_replayable.stats.num_write_faults);
UVM_SEQ_OR_DBG_PRINT(s, " atomic %llu\n",
parent_gpu->fault_buffer_info.non_replayable.stats.num_atomic_faults);
parent_gpu->fault_buffer.non_replayable.stats.num_atomic_faults);
UVM_SEQ_OR_DBG_PRINT(s, "faults_by_addressing:\n");
UVM_SEQ_OR_DBG_PRINT(s, " virtual %llu\n",
parent_gpu->stats.num_non_replayable_faults -
parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults);
parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults);
UVM_SEQ_OR_DBG_PRINT(s, " physical %llu\n",
parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults);
num_pages_out = atomic64_read(&parent_gpu->fault_buffer_info.non_replayable.stats.num_pages_out);
num_pages_in = atomic64_read(&parent_gpu->fault_buffer_info.non_replayable.stats.num_pages_in);
parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults);
num_pages_out = atomic64_read(&parent_gpu->fault_buffer.non_replayable.stats.num_pages_out);
num_pages_in = atomic64_read(&parent_gpu->fault_buffer.non_replayable.stats.num_pages_in);
UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
@@ -743,16 +745,25 @@ static void gpu_access_counters_print_common(uvm_parent_gpu_t *parent_gpu, struc
{
NvU64 num_pages_in;
NvU64 num_pages_out;
NvU32 i;
UVM_ASSERT(uvm_procfs_is_debug_enabled());
num_pages_out = atomic64_read(&parent_gpu->access_counter_buffer_info.stats.num_pages_out);
num_pages_in = atomic64_read(&parent_gpu->access_counter_buffer_info.stats.num_pages_in);
UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_out %llu (%llu MB)\n", num_pages_out,
(num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
// procfs_files are created before gpu_init_isr, we need to check if the
// access_counter_buffer is allocated.
if (parent_gpu->access_counter_buffer) {
for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++) {
uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[i];
num_pages_out = atomic64_read(&access_counters->stats.num_pages_out);
num_pages_in = atomic64_read(&access_counters->stats.num_pages_in);
UVM_SEQ_OR_DBG_PRINT(s, "migrations - buffer index %u:\n", i);
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_out %llu (%llu MB)\n", num_pages_out,
(num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
}
}
}
// This function converts an index of 2D array of size [N x N] into an index
@@ -892,7 +903,7 @@ static int nv_procfs_read_gpu_info(struct seq_file *s, void *v)
uvm_gpu_t *gpu = (uvm_gpu_t *)s->private;
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
return -EAGAIN;
return -EAGAIN;
gpu_info_print_common(gpu, s);
@@ -911,7 +922,7 @@ static int nv_procfs_read_gpu_fault_stats(struct seq_file *s, void *v)
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)s->private;
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
return -EAGAIN;
return -EAGAIN;
gpu_fault_stats_print_common(parent_gpu, s);
@@ -930,7 +941,7 @@ static int nv_procfs_read_gpu_access_counters(struct seq_file *s, void *v)
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)s->private;
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
return -EAGAIN;
return -EAGAIN;
gpu_access_counters_print_common(parent_gpu, s);
@@ -1027,7 +1038,7 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
{
struct proc_dir_entry *gpu_base_dir_entry;
char symlink_name[16]; // Hold a uvm_gpu_id_t value in decimal.
char uuid_buffer[max(UVM_PARENT_GPU_UUID_STRING_LENGTH, UVM_GPU_UUID_STRING_LENGTH)];
char uuid_buffer[NV_MAX(UVM_PARENT_GPU_UUID_STRING_LENGTH, UVM_GPU_UUID_STRING_LENGTH)];
char gpu_dir_name[sizeof(symlink_name) + sizeof(uuid_buffer) + 1];
if (!uvm_procfs_is_enabled())
@@ -1182,7 +1193,9 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
uvm_uuid_copy(&parent_gpu->uuid, gpu_uuid);
uvm_sema_init(&parent_gpu->isr.replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
uvm_sema_init(&parent_gpu->isr.non_replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
uvm_sema_init(&parent_gpu->isr.access_counters.service_lock, 1, UVM_LOCK_ORDER_ISR);
uvm_mutex_init(&parent_gpu->access_counters_enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS);
uvm_mutex_init(&parent_gpu->access_counters_clear_tracker_lock, UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS);
uvm_tracker_init(&parent_gpu->access_counters_clear_tracker);
uvm_spin_lock_irqsave_init(&parent_gpu->isr.interrupts_lock, UVM_LOCK_ORDER_LEAF);
uvm_spin_lock_init(&parent_gpu->instance_ptr_table_lock, UVM_LOCK_ORDER_LEAF);
uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
@@ -1200,6 +1213,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
return NV_OK;
cleanup:
uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker);
uvm_kvfree(parent_gpu);
return status;
@@ -1221,7 +1235,7 @@ static uvm_gpu_t *alloc_gpu(uvm_parent_gpu_t *parent_gpu, uvm_gpu_id_t gpu_id)
// Initialize enough of the gpu struct for remove_gpu to be called
gpu->magic = UVM_GPU_MAGIC_VALUE;
uvm_spin_lock_init(&gpu->peer_info.peer_gpus_lock, UVM_LOCK_ORDER_LEAF);
uvm_spin_lock_init(&gpu->peer_info.peer_gpu_lock, UVM_LOCK_ORDER_LEAF);
sub_processor_index = uvm_id_sub_processor_index(gpu_id);
parent_gpu->gpus[sub_processor_index] = gpu;
@@ -1235,13 +1249,15 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
NvU32 num_entries;
NvU64 va_size;
NvU64 va_per_entry;
NvU64 physical_address;
NvU64 dma_address;
uvm_mmu_page_table_alloc_t *tree_alloc;
status = uvm_page_tree_init(gpu,
NULL,
UVM_PAGE_TREE_TYPE_KERNEL,
gpu->big_page.internal_size,
uvm_get_page_tree_location(gpu->parent),
uvm_get_page_tree_location(gpu),
&gpu->address_space_tree);
if (status != NV_OK) {
UVM_ERR_PRINT("Initializing the page tree failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
@@ -1265,12 +1281,17 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
gpu->parent->rm_va_size,
va_per_entry);
tree_alloc = uvm_page_tree_pdb(&gpu->address_space_tree);
tree_alloc = uvm_page_tree_pdb_internal(&gpu->address_space_tree);
if (tree_alloc->addr.aperture == UVM_APERTURE_VID)
physical_address = tree_alloc->addr.address;
else
physical_address = page_to_phys(tree_alloc->handle.page);
status = uvm_rm_locked_call(nvUvmInterfaceSetPageDirectory(gpu->rm_address_space,
tree_alloc->addr.address,
physical_address,
num_entries,
tree_alloc->addr.aperture == UVM_APERTURE_VID,
gpu_get_internal_pasid(gpu)));
gpu_get_internal_pasid(gpu),
&dma_address));
if (status != NV_OK) {
UVM_ERR_PRINT("nvUvmInterfaceSetPageDirectory() failed: %s, GPU %s\n",
nvstatusToString(status),
@@ -1278,6 +1299,9 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
return status;
}
if (tree_alloc->addr.aperture == UVM_APERTURE_SYS)
gpu->address_space_tree.pdb_rm_dma_address = uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_address);
gpu->rm_address_space_moved_to_page_tree = true;
return NV_OK;
@@ -1390,13 +1414,12 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
parent_gpu->egm.enabled = gpu_info->egmEnabled;
parent_gpu->egm.local_peer_id = gpu_info->egmPeerId;
parent_gpu->egm.base_address = gpu_info->egmBaseAddr;
parent_gpu->access_counters_supported = (gpu_info->accessCntrBufferCount != 0);
status = uvm_rm_locked_call(nvUvmInterfaceGetFbInfo(parent_gpu->rm_device, &fb_info));
if (status != NV_OK)
return status;
parent_gpu->sli_enabled = (gpu_info->subdeviceCount > 1);
if (!fb_info.bZeroFb)
parent_gpu->max_allocatable_address = fb_info.maxAllocatableAddress;
@@ -1616,7 +1639,7 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
// trackers.
if (sync_replay_tracker) {
uvm_parent_gpu_replayable_faults_isr_lock(parent_gpu);
status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.replayable.replay_tracker);
status = uvm_tracker_wait(&parent_gpu->fault_buffer.replayable.replay_tracker);
uvm_parent_gpu_replayable_faults_isr_unlock(parent_gpu);
if (status != NV_OK)
@@ -1627,7 +1650,7 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
// VA block trackers, too.
if (sync_clear_faulted_tracker) {
uvm_parent_gpu_non_replayable_faults_isr_lock(parent_gpu);
status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.non_replayable.clear_faulted_tracker);
status = uvm_tracker_wait(&parent_gpu->fault_buffer.non_replayable.clear_faulted_tracker);
uvm_parent_gpu_non_replayable_faults_isr_unlock(parent_gpu);
if (status != NV_OK)
@@ -1635,10 +1658,10 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
}
// Sync the access counter clear tracker too.
if (parent_gpu->access_counters_supported) {
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
status = uvm_tracker_wait(&parent_gpu->access_counter_buffer_info.clear_tracker);
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
if (parent_gpu->access_counters_supported && parent_gpu->access_counter_buffer) {
uvm_mutex_lock(&parent_gpu->access_counters_clear_tracker_lock);
status = uvm_tracker_wait(&parent_gpu->access_counters_clear_tracker);
uvm_mutex_unlock(&parent_gpu->access_counters_clear_tracker_lock);
if (status != NV_OK)
UVM_ASSERT(status == uvm_global_get_status());
@@ -1680,15 +1703,11 @@ static void deinit_parent_gpu(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->instance_ptr_table));
UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->tsg_table));
// Access counters should have been disabled when the GPU is no longer
// registered in any VA space.
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
deinit_parent_procfs_files(parent_gpu);
// Return ownership to RM
uvm_parent_gpu_deinit_isr(parent_gpu);
deinit_parent_procfs_files(parent_gpu);
uvm_pmm_devmem_deinit(parent_gpu);
uvm_ats_remove_gpu(parent_gpu);
@@ -1778,6 +1797,8 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)
for_each_sub_processor_index(sub_processor_index)
UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);
uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker);
uvm_kvfree(parent_gpu);
}
@@ -1794,14 +1815,14 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
switch (fault_entry->fault_access_type)
{
case UVM_FAULT_ACCESS_TYPE_READ:
++parent_gpu->fault_buffer_info.non_replayable.stats.num_read_faults;
++parent_gpu->fault_buffer.non_replayable.stats.num_read_faults;
break;
case UVM_FAULT_ACCESS_TYPE_WRITE:
++parent_gpu->fault_buffer_info.non_replayable.stats.num_write_faults;
++parent_gpu->fault_buffer.non_replayable.stats.num_write_faults;
break;
case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
++parent_gpu->fault_buffer_info.non_replayable.stats.num_atomic_faults;
++parent_gpu->fault_buffer.non_replayable.stats.num_atomic_faults;
break;
default:
UVM_ASSERT_MSG(false, "Invalid access type for non-replayable faults\n");
@@ -1809,7 +1830,7 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
}
if (!fault_entry->is_virtual)
++parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults;
++parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults;
++parent_gpu->stats.num_non_replayable_faults;
@@ -1821,23 +1842,23 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
switch (fault_entry->fault_access_type)
{
case UVM_FAULT_ACCESS_TYPE_PREFETCH:
++parent_gpu->fault_buffer_info.replayable.stats.num_prefetch_faults;
++parent_gpu->fault_buffer.replayable.stats.num_prefetch_faults;
break;
case UVM_FAULT_ACCESS_TYPE_READ:
++parent_gpu->fault_buffer_info.replayable.stats.num_read_faults;
++parent_gpu->fault_buffer.replayable.stats.num_read_faults;
break;
case UVM_FAULT_ACCESS_TYPE_WRITE:
++parent_gpu->fault_buffer_info.replayable.stats.num_write_faults;
++parent_gpu->fault_buffer.replayable.stats.num_write_faults;
break;
case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
++parent_gpu->fault_buffer_info.replayable.stats.num_atomic_faults;
++parent_gpu->fault_buffer.replayable.stats.num_atomic_faults;
break;
default:
break;
}
if (is_duplicate || fault_entry->filtered)
++parent_gpu->fault_buffer_info.replayable.stats.num_duplicate_faults;
++parent_gpu->fault_buffer.replayable.stats.num_duplicate_faults;
++parent_gpu->stats.num_replayable_faults;
}
@@ -1901,21 +1922,29 @@ static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_
if (gpu_dst) {
atomic64_add(pages, &gpu_dst->parent->stats.num_pages_in);
if (is_replayable_fault)
atomic64_add(pages, &gpu_dst->parent->fault_buffer_info.replayable.stats.num_pages_in);
else if (is_non_replayable_fault)
atomic64_add(pages, &gpu_dst->parent->fault_buffer_info.non_replayable.stats.num_pages_in);
else if (is_access_counter)
atomic64_add(pages, &gpu_dst->parent->access_counter_buffer_info.stats.num_pages_in);
if (is_replayable_fault) {
atomic64_add(pages, &gpu_dst->parent->fault_buffer.replayable.stats.num_pages_in);
}
else if (is_non_replayable_fault) {
atomic64_add(pages, &gpu_dst->parent->fault_buffer.non_replayable.stats.num_pages_in);
}
else if (is_access_counter) {
NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
atomic64_add(pages, &gpu_dst->parent->access_counter_buffer[index].stats.num_pages_in);
}
}
if (gpu_src) {
atomic64_add(pages, &gpu_src->parent->stats.num_pages_out);
if (is_replayable_fault)
atomic64_add(pages, &gpu_src->parent->fault_buffer_info.replayable.stats.num_pages_out);
else if (is_non_replayable_fault)
atomic64_add(pages, &gpu_src->parent->fault_buffer_info.non_replayable.stats.num_pages_out);
else if (is_access_counter)
atomic64_add(pages, &gpu_src->parent->access_counter_buffer_info.stats.num_pages_out);
if (is_replayable_fault) {
atomic64_add(pages, &gpu_src->parent->fault_buffer.replayable.stats.num_pages_out);
}
else if (is_non_replayable_fault) {
atomic64_add(pages, &gpu_src->parent->fault_buffer.non_replayable.stats.num_pages_out);
}
else if (is_access_counter) {
NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
atomic64_add(pages, &gpu_src->parent->access_counter_buffer[index].stats.num_pages_out);
}
}
}
@@ -1929,8 +1958,9 @@ static void uvm_param_conf(void)
}
else {
if (strcmp(uvm_peer_copy, UVM_PARAM_PEER_COPY_PHYSICAL) != 0) {
pr_info("Invalid value for uvm_peer_copy = %s, using %s instead.\n",
uvm_peer_copy, UVM_PARAM_PEER_COPY_PHYSICAL);
UVM_INFO_PRINT("Invalid value for uvm_peer_copy = %s, using %s instead.\n",
uvm_peer_copy,
UVM_PARAM_PEER_COPY_PHYSICAL);
}
g_uvm_global.peer_copy_mode = UVM_GPU_PEER_COPY_MODE_PHYSICAL;
@@ -2397,6 +2427,7 @@ static NV_STATUS peers_init(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *pe
{
NV_STATUS status;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
UVM_ASSERT(peer_caps->ref_count == 0);
status = parent_peers_retain(gpu0->parent, gpu1->parent);
@@ -2419,25 +2450,13 @@ static NV_STATUS peers_init(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *pe
UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);
// In the case of NVLINK peers, this initialization will happen during
// add_gpu. As soon as the peer info table is assigned below, the access
// counter bottom half could start operating on the GPU being newly
// added and inspecting the peer caps, so all of the appropriate
// initialization must happen before this point.
uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
uvm_spin_lock(&gpu0->peer_info.peer_gpu_lock);
uvm_processor_mask_set(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
UVM_ASSERT(gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] == NULL);
gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = gpu1;
uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
uvm_spin_unlock(&gpu0->peer_info.peer_gpu_lock);
uvm_spin_lock(&gpu1->peer_info.peer_gpu_lock);
uvm_processor_mask_set(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
UVM_ASSERT(gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] == NULL);
gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = gpu0;
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
uvm_spin_unlock(&gpu1->peer_info.peer_gpu_lock);
return NV_OK;
@@ -2465,18 +2484,18 @@ static NV_STATUS peers_retain(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
static void peers_destroy(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *peer_caps)
{
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
uvm_mmu_destroy_peer_identity_mappings(gpu0, gpu1);
uvm_mmu_destroy_peer_identity_mappings(gpu1, gpu0);
uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
uvm_spin_lock(&gpu0->peer_info.peer_gpu_lock);
uvm_processor_mask_clear(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = NULL;
uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
uvm_spin_unlock(&gpu0->peer_info.peer_gpu_lock);
uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
uvm_spin_lock(&gpu1->peer_info.peer_gpu_lock);
uvm_processor_mask_clear(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = NULL;
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
uvm_spin_unlock(&gpu1->peer_info.peer_gpu_lock);
// Flush the access counter buffer to avoid getting stale notifications for
// accesses to GPUs to which peer access is being disabled. This is also
@@ -2690,7 +2709,7 @@ static void remove_gpu(uvm_gpu_t *gpu)
uvm_processor_mask_clear(&g_uvm_global.retained_gpus, gpu->id);
// If the parent is being freed, stop scheduling new bottom halves and
// update relevant software state. Else flush any pending bottom halves
// update relevant software state. Else flush any pending bottom halves
// before continuing.
if (free_parent)
uvm_parent_gpu_disable_isr(parent_gpu);
@@ -2713,6 +2732,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
const UvmGpuInfo *gpu_info,
const UvmGpuPlatformInfo *gpu_platform_info,
uvm_parent_gpu_t *parent_gpu,
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
uvm_gpu_t **gpu_out)
{
NV_STATUS status;
@@ -2725,6 +2745,9 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
status = alloc_parent_gpu(gpu_uuid, uvm_parent_gpu_id_from_gpu_id(gpu_id), &parent_gpu);
if (status != NV_OK)
return status;
if (uvm_enable_builtin_tests)
parent_gpu->test = *parent_gpu_error;
}
gpu = alloc_gpu(parent_gpu, gpu_id);
@@ -2794,7 +2817,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
// Clear the interrupt bit and force the re-evaluation of the interrupt
// condition to ensure that we don't miss any pending interrupt
parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
parent_gpu->fault_buffer_info.replayable.cached_get);
parent_gpu->fault_buffer.replayable.cached_get);
}
// Access counters are enabled on demand
@@ -2837,6 +2860,7 @@ error:
// the partition.
static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
const uvm_rm_user_object_t *user_rm_device,
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
uvm_gpu_t **gpu_out)
{
NV_STATUS status = NV_OK;
@@ -2869,6 +2893,10 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
if (status != NV_OK)
goto error_unregister;
// TODO: Bug 5262806: Remove this WAR once the bug is fixed.
if (gpu_info->accessCntrBufferCount > 1)
gpu_info->accessCntrBufferCount = 1;
if (parent_gpu != NULL) {
// If the UUID has been seen before, and if SMC is enabled, then check
// if this specific partition has been seen previously. The UUID-based
@@ -2888,7 +2916,7 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
if (status != NV_OK)
goto error_unregister;
status = add_gpu(gpu_uuid, gpu_id, gpu_info, &gpu_platform_info, parent_gpu, &gpu);
status = add_gpu(gpu_uuid, gpu_id, gpu_info, &gpu_platform_info, parent_gpu, parent_gpu_error, &gpu);
if (status != NV_OK)
goto error_unregister;
}
@@ -2913,11 +2941,12 @@ error_free_gpu_info:
NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
const uvm_rm_user_object_t *user_rm_device,
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
uvm_gpu_t **gpu_out)
{
NV_STATUS status;
uvm_mutex_lock(&g_uvm_global.global_lock);
status = gpu_retain_by_uuid_locked(gpu_uuid, user_rm_device, gpu_out);
status = gpu_retain_by_uuid_locked(gpu_uuid, user_rm_device, parent_gpu_error, gpu_out);
uvm_mutex_unlock(&g_uvm_global.global_lock);
return status;
}
@@ -3072,118 +3101,81 @@ bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
return (address.address >= gpu->parent->peer_va_base &&
address.address < (gpu->parent->peer_va_base + gpu->parent->peer_va_size));
}
} else {
}
else {
uvm_parent_gpu_t *parent_gpu;
phys_addr_t phys_addr;
if (uvm_aperture_is_peer(address.aperture)) {
bool is_peer = true;
uvm_parent_processor_mask_t parent_gpus;
uvm_parent_gpu_t *parent_peer_gpu;
// Local EGM accesses don't go over NVLINK
if (gpu->parent->egm.enabled && address.aperture == gpu->parent->egm.local_peer_id)
return false;
// EGM uses peer IDs but they are different from VIDMEM peer IDs.
// Check if the address aperture is an EGM aperture.
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
uvm_parent_gpus_from_processor_mask(&parent_gpus, &gpu->peer_info.peer_gpu_mask);
uvm_spin_lock(&gpu->peer_info.peer_gpus_lock);
for_each_parent_gpu_in_mask(parent_peer_gpu, &parent_gpus) {
uvm_aperture_t egm_peer_aperture;
if (!parent_peer_gpu->egm.enabled)
continue;
egm_peer_aperture = uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu);
if (address.aperture == egm_peer_aperture) {
is_peer = false;
break;
}
// EGM uses peer IDs but they are different from VIDMEM peer
// IDs.
// Check if the address aperture is an EGM aperture.
// We should not use remote EGM addresses internally until
// NVLINK STO handling is updated to handle EGM.
// TODO: Bug: 5068688 [UVM] Detect STO and prevent data leaks
// when accessing EGM memory
// TODO: Bug: 5007527 [UVM] Extend STO recovery to EGM enabled
// systems
UVM_ASSERT(address.aperture != uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu));
}
uvm_spin_unlock(&gpu->peer_info.peer_gpus_lock);
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
return true;
} else if (address.aperture == UVM_APERTURE_SYS) {
bool is_peer = false;
// GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
// either inline, or via ATS.
phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
// Exposed coherent vidmem can be accessed via sys aperture
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
for_each_parent_gpu(parent_gpu) {
if (parent_gpu == gpu->parent)
continue;
if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
phys_addr <= parent_gpu->system_bus.memory_window_end) {
is_peer = true;
}
}
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
return is_peer;
}
if (address.aperture != UVM_APERTURE_SYS)
return false;
// GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
// either inline, or via ATS.
phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
// Exposed coherent vidmem can be accessed via sys aperture
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
for_each_parent_gpu(parent_gpu) {
if (parent_gpu == gpu->parent)
continue;
if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
phys_addr <= parent_gpu->system_bus.memory_window_end) {
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
return true;
}
}
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
UVM_ASSERT(address.aperture == UVM_APERTURE_VID);
}
return false;
}
uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu)
uvm_aperture_t uvm_get_page_tree_location(const uvm_gpu_t *gpu)
{
// See comment in page_tree_set_location
if (uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu) || g_uvm_global.conf_computing_enabled)
// See comments in page_tree_set_location
if (uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent) || g_uvm_global.conf_computing_enabled)
return UVM_APERTURE_VID;
if (!gpu->mem_info.size)
return UVM_APERTURE_SYS;
return UVM_APERTURE_DEFAULT;
}
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr)
{
uvm_processor_id_t id = UVM_ID_INVALID;
// TODO: Bug 1899622: On P9 systems with multiple CPU sockets, SYS aperture
// is also reported for accesses to remote GPUs connected to a different CPU
// NUMA domain. We will need to determine the actual processor id using the
// reported physical address.
if (addr.aperture == UVM_APERTURE_SYS)
return UVM_ID_CPU;
else if (addr.aperture == UVM_APERTURE_VID)
return gpu->id;
uvm_spin_lock(&gpu->peer_info.peer_gpus_lock);
for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
uvm_gpu_t *other_gpu = gpu->peer_info.peer_gpus[uvm_id_gpu_index(id)];
UVM_ASSERT(other_gpu);
UVM_ASSERT(!uvm_gpus_are_smc_peers(gpu, other_gpu));
if (uvm_parent_gpus_are_nvswitch_connected(gpu->parent, other_gpu->parent)) {
// NVSWITCH connected systems use an extended physical address to
// map to peers. Find the physical memory 'slot' containing the
// given physical address to find the peer gpu that owns the
// physical address
NvU64 fabric_window_end = other_gpu->parent->nvswitch_info.fabric_memory_window_start +
other_gpu->mem_info.max_allocatable_address;
if (other_gpu->parent->nvswitch_info.fabric_memory_window_start <= addr.address &&
fabric_window_end >= addr.address)
break;
}
else if (uvm_gpu_peer_aperture(gpu, other_gpu) == addr.aperture) {
break;
}
}
uvm_spin_unlock(&gpu->peer_info.peer_gpus_lock);
return id;
}
static NvU64 instance_ptr_to_key(uvm_gpu_phys_address_t instance_ptr)
{
NvU64 key;
@@ -3570,20 +3562,19 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare
*out_va_space = NULL;
*out_gpu = NULL;
UVM_ASSERT(entry->address.is_virtual);
uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);
user_channel = instance_ptr_to_user_channel(parent_gpu, entry->virtual_info.instance_ptr);
user_channel = instance_ptr_to_user_channel(parent_gpu, entry->instance_ptr);
if (!user_channel) {
status = NV_ERR_INVALID_CHANNEL;
goto exit_unlock;
}
if (!user_channel->in_subctx) {
UVM_ASSERT_MSG(entry->virtual_info.ve_id == 0,
UVM_ASSERT_MSG(entry->ve_id == 0,
"Access counter packet contains SubCTX %u for channel not in subctx\n",
entry->virtual_info.ve_id);
entry->ve_id);
gpu_va_space = user_channel->gpu_va_space;
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
@@ -3591,7 +3582,7 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare
*out_gpu = gpu_va_space->gpu;
}
else {
gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->virtual_info.ve_id);
gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->ve_id);
if (gpu_va_space) {
*out_va_space = gpu_va_space->va_space;
*out_gpu = gpu_va_space->gpu;
@@ -3638,7 +3629,7 @@ static NvU64 gpu_addr_to_dma_addr(uvm_parent_gpu_t *parent_gpu, NvU64 gpu_addr)
// dma_addressable_start (in bifSetupDmaWindow_IMPL()) and hence when
// referencing sysmem from the GPU, dma_addressable_start should be
// subtracted from the DMA address we get from the OS.
static NvU64 dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr)
NvU64 uvm_parent_gpu_dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr)
{
NvU64 gpu_addr = dma_addr - parent_gpu->dma_addressable_start;
UVM_ASSERT(dma_addr >= gpu_addr);
@@ -3646,32 +3637,40 @@ static NvU64 dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr)
return gpu_addr;
}
void *uvm_parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu, gfp_t gfp_flags, NvU64 *dma_address_out)
static void *parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu, gfp_t gfp_flags, NvU64 *dma_address_out)
{
NvU64 dma_addr;
void *cpu_addr;
cpu_addr = dma_alloc_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, &dma_addr, gfp_flags);
if (!cpu_addr)
return cpu_addr;
*dma_address_out = dma_addr_to_gpu_addr(parent_gpu, dma_addr);
*dma_address_out = uvm_parent_gpu_dma_addr_to_gpu_addr(parent_gpu, dma_addr);
atomic64_add(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
return cpu_addr;
}
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address)
NV_STATUS uvm_gpu_dma_alloc_page(uvm_gpu_t *gpu, gfp_t gfp_flags, void **cpu_addr_out, NvU64 *dma_address_out)
{
void *cpu_addr = parent_gpu_dma_alloc_page(gpu->parent, gfp_flags, dma_address_out);
if (!cpu_addr)
return NV_ERR_NO_MEMORY;
// TODO: Bug 4868590: Issue GPA invalidate here
*cpu_addr_out = cpu_addr;
return NV_OK;
}
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *cpu_addr, NvU64 dma_address)
{
dma_address = gpu_addr_to_dma_addr(parent_gpu, dma_address);
dma_free_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, va, dma_address);
dma_free_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, cpu_addr, dma_address);
atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
}
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu,
struct page *page,
size_t size,
NvU64 *dma_address_out)
static NV_STATUS parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
{
NvU64 dma_addr;
@@ -3694,11 +3693,20 @@ NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu,
}
atomic64_add(size, &parent_gpu->mapped_cpu_pages_size);
*dma_address_out = dma_addr_to_gpu_addr(parent_gpu, dma_addr);
*dma_address_out = uvm_parent_gpu_dma_addr_to_gpu_addr(parent_gpu, dma_addr);
return NV_OK;
}
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out)
{
NV_STATUS status = parent_gpu_map_cpu_pages(gpu->parent, page, size, dma_address_out);
// TODO: Bug 4868590: Issue GPA invalidate here
return status;
}
void uvm_parent_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size)
{
UVM_ASSERT(PAGE_ALIGNED(size));

View File

@@ -189,6 +189,9 @@ struct uvm_service_block_context_struct
// Prefetch temporary state.
uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
// Access counters notification buffer index.
NvU32 access_counters_buffer_index;
};
typedef struct
@@ -197,8 +200,8 @@ typedef struct
{
struct
{
// Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned region
// of a SAM VMA. Used for batching ATS faults in a vma.
// Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned
// region of a SAM VMA. Used for batching ATS faults in a vma.
uvm_page_mask_t prefetch_only_fault_mask;
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region
@@ -350,7 +353,7 @@ typedef struct
// entries from the GPU buffer
NvU32 max_batch_size;
struct uvm_replayable_fault_buffer_info_struct
struct uvm_replayable_fault_buffer_struct
{
// Maximum number of faults entries that can be stored in the buffer
NvU32 max_faults;
@@ -414,7 +417,7 @@ typedef struct
uvm_ats_fault_invalidate_t ats_invalidate;
} replayable;
struct uvm_non_replayable_fault_buffer_info_struct
struct uvm_non_replayable_fault_buffer_struct
{
// Maximum number of faults entries that can be stored in the buffer
NvU32 max_faults;
@@ -468,7 +471,7 @@ typedef struct
// Timestamp when prefetch faults where disabled last time
NvU64 disable_prefetch_faults_timestamp;
} uvm_fault_buffer_info_t;
} uvm_fault_buffer_t;
struct uvm_access_counter_service_batch_context_struct
{
@@ -476,30 +479,14 @@ struct uvm_access_counter_service_batch_context_struct
NvU32 num_cached_notifications;
struct
{
uvm_access_counter_buffer_entry_t **notifications;
uvm_access_counter_buffer_entry_t **notifications;
NvU32 num_notifications;
NvU32 num_notifications;
// Boolean used to avoid sorting the fault batch by instance_ptr if we
// determine at fetch time that all the access counter notifications in
// the batch report the same instance_ptr
bool is_single_instance_ptr;
} virt;
struct
{
uvm_access_counter_buffer_entry_t **notifications;
uvm_reverse_map_t *translations;
NvU32 num_notifications;
// Boolean used to avoid sorting the fault batch by aperture if we
// determine at fetch time that all the access counter notifications in
// the batch report the same aperture
bool is_single_aperture;
} phys;
// Boolean used to avoid sorting the fault batch by instance_ptr if we
// determine at fetch time that all the access counter notifications in
// the batch report the same instance_ptr
bool is_single_instance_ptr;
// Helper page mask to compute the accessed pages within a VA block
uvm_page_mask_t accessed_pages;
@@ -514,31 +501,15 @@ struct uvm_access_counter_service_batch_context_struct
NvU32 batch_id;
};
typedef struct
struct uvm_access_counter_buffer_struct
{
// Values used to configure access counters in RM
struct
{
UVM_ACCESS_COUNTER_GRANULARITY granularity;
UVM_ACCESS_COUNTER_USE_LIMIT use_limit;
} rm;
uvm_parent_gpu_t *parent_gpu;
// The following values are precomputed by the access counter notification
// handling code. See comments for UVM_MAX_TRANSLATION_SIZE in
// uvm_gpu_access_counters.c for more details.
NvU64 translation_size;
NvU64 translations_per_counter;
NvU64 sub_granularity_region_size;
NvU64 sub_granularity_regions_per_translation;
} uvm_gpu_access_counter_type_config_t;
typedef struct
{
UvmGpuAccessCntrInfo rm_info;
// Access counters may have multiple notification buffers.
NvU32 index;
NvU32 max_notifications;
NvU32 max_batch_size;
@@ -551,19 +522,27 @@ typedef struct
// PCIe
NvU32 cached_put;
// Tracker used to aggregate access counters clear operations, needed for
// GPU removal
uvm_tracker_t clear_tracker;
// Current access counter configuration. During normal operation this
// information is computed once during GPU initialization. However, tests
// may override it to try different configuration values.
struct
{
uvm_gpu_access_counter_type_config_t mimc;
uvm_gpu_access_counter_type_config_t momc;
// Values used to configure access counters in RM
struct
{
UVM_ACCESS_COUNTER_GRANULARITY granularity;
} rm;
NvU32 threshold;
// The following values are precomputed by the access counter
// notification handling code. See comments for UVM_MAX_TRANSLATION_SIZE
// in uvm_gpu_access_counters.c for more details.
NvU64 translation_size;
NvU64 sub_granularity_region_size;
NvU64 sub_granularity_regions_per_translation;
NvU32 threshold;
} current_config;
// Access counter statistics
@@ -575,7 +554,7 @@ typedef struct
} stats;
// Ignoring access counters means that notifications are left in the HW
// buffer without being serviced. Requests to ignore access counters
// buffer without being serviced. Requests to ignore access counters
// are counted since the suspend path inhibits access counter interrupts,
// and the resume path needs to know whether to reenable them.
NvU32 notifications_ignored_count;
@@ -583,13 +562,25 @@ typedef struct
// Context structure used to service a GPU access counter batch
uvm_access_counter_service_batch_context_t batch_service_context;
// VA space that reconfigured the access counters configuration, if any.
// Used in builtin tests only, to avoid reconfigurations from different
// processes
//
// Locking: both readers and writers must hold the access counters ISR lock
uvm_va_space_t *reconfiguration_owner;
} uvm_access_counter_buffer_info_t;
struct
{
// VA space that reconfigured the access counters configuration, if any.
// Used in builtin tests only, to avoid reconfigurations from different
// processes.
//
// Locking: both readers and writers must hold the access counters ISR
// lock.
uvm_va_space_t *reconfiguration_owner;
// The service access counters loop breaks after processing the first
// batch. It will be retriggered if there are pending notifications, but
// it releases the ISR service lock to check certain races that would be
// difficult to hit otherwise.
bool one_iteration_per_batch;
NvU32 sleep_per_iteration_us;
} test;
};
typedef struct
{
@@ -745,15 +736,11 @@ struct uvm_gpu_struct
struct
{
// Mask of peer_gpus set
// Mask of peer_gpus set.
uvm_processor_mask_t peer_gpu_mask;
// lazily-populated array of peer GPUs, indexed by the peer's GPU index
uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];
// Leaf spinlock used to synchronize access to the peer_gpus table so
// that it can be safely accessed from the access counters bottom half
uvm_spinlock_t peer_gpus_lock;
// Leaf spinlock used to synchronize access to peer_gpu_mask.
uvm_spinlock_t peer_gpu_lock;
} peer_info;
// Maximum number of subcontexts supported
@@ -957,6 +944,16 @@ struct uvm_gpu_struct
uvm_mutex_t device_p2p_lock;
};
typedef struct
{
bool access_counters_alloc_buffer;
bool access_counters_alloc_block_context;
bool isr_access_counters_alloc;
bool isr_access_counters_alloc_stats_cpu;
bool access_counters_batch_context_notifications;
bool access_counters_batch_context_notification_cache;
} uvm_test_parent_gpu_inject_error_t;
// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
@@ -965,8 +962,8 @@ struct uvm_gpu_struct
struct uvm_parent_gpu_struct
{
// Reference count for how many places are holding on to a parent GPU
// (internal to the UVM driver). This includes any GPUs we know about, not
// just GPUs that are registered with a VA space. Most GPUs end up being
// (internal to the UVM driver). This includes any GPUs we know about, not
// just GPUs that are registered with a VA space. Most GPUs end up being
// registered, but there are brief periods when they are not registered,
// such as during interrupt handling, and in add_gpu() or remove_gpu().
nv_kref_t gpu_kref;
@@ -976,7 +973,7 @@ struct uvm_parent_gpu_struct
uvm_gpu_t *gpus[UVM_PARENT_ID_MAX_SUB_PROCESSORS];
// Bitmap of valid child entries in the gpus[] table. Used to retrieve a
// Bitmap of valid child entries in the gpus[] table. Used to retrieve a
// usable child GPU in bottom-halves.
DECLARE_BITMAP(valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);
@@ -1000,17 +997,6 @@ struct uvm_parent_gpu_struct
// nvUvmInterfaceUnregisterGpu()).
struct pci_dev *pci_dev;
// NVLINK Processing Unit (NPU) on PowerPC platforms. The NPU is a
// collection of CPU-side PCI devices which bridge GPU NVLINKs and the CPU
// memory bus.
//
// There is one PCI device per NVLINK. A set of NVLINKs connects to a single
// GPU, and all NVLINKs for a given socket are collected logically under
// this UVM NPU because some resources (such as register mappings) are
// shared by all those NVLINKs. This means multiple GPUs may connect to the
// same UVM NPU.
uvm_ibm_npu_t *npu;
// On kernels with NUMA support, this entry contains the closest CPU NUMA
// node to this GPU. Otherwise, the value will be -1.
int closest_cpu_numa_node;
@@ -1033,13 +1019,12 @@ struct uvm_parent_gpu_struct
// dma_addressable_start (in bifSetupDmaWindow_IMPL()) and hence when
// referencing sysmem from the GPU, dma_addressable_start should be
// subtracted from the physical address. The DMA mapping helpers like
// uvm_parent_gpu_map_cpu_pages() and uvm_parent_gpu_dma_alloc_page() take
// care of that.
// uvm_gpu_map_cpu_pages() and uvm_gpu_dma_alloc_page() take care of that.
NvU64 dma_addressable_start;
NvU64 dma_addressable_limit;
// Total size (in bytes) of physically mapped (with
// uvm_parent_gpu_map_cpu_pages) sysmem pages, used for leak detection.
// uvm_gpu_map_cpu_pages) sysmem pages, used for leak detection.
atomic64_t mapped_cpu_pages_size;
// Hardware Abstraction Layer
@@ -1079,9 +1064,9 @@ struct uvm_parent_gpu_struct
bool access_counters_supported;
// If this is true, physical address based access counter notifications are
// potentially generated. If false, only virtual address based notifications
// are generated (assuming access_counters_supported is true too).
// TODO: Bug 4637114: [UVM] Remove support for physical access counter
// notifications. Always set to false, until we remove the PMM reverse
// mapping code.
bool access_counters_can_use_physical_addresses;
bool fault_cancel_va_supported;
@@ -1144,6 +1129,13 @@ struct uvm_parent_gpu_struct
// Indicates whether the GPU can map sysmem with pages larger than 4k
bool can_map_sysmem_with_large_pages;
// An integrated GPU has no vidmem and coherent access to sysmem. Note
// integrated GPUs have a write-back L2 cache (cf. discrete GPUs
// write-through cache.)
// TODO: Bug 5023085: this should be queried from RM instead of determined
// by UVM.
bool is_integrated_gpu;
struct
{
// If true, the granularity of key rotation is a single channel. If
@@ -1205,17 +1197,22 @@ struct uvm_parent_gpu_struct
// Interrupt handling state and locks
uvm_isr_info_t isr;
// Fault buffer info. This is only valid if supports_replayable_faults is
// set to true.
uvm_fault_buffer_info_t fault_buffer_info;
// This is only valid if supports_replayable_faults is set to true.
uvm_fault_buffer_t fault_buffer;
// PMM lazy free processing queue.
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
nv_kthread_q_t lazy_free_q;
// Access counter buffer info. This is only valid if
// supports_access_counters is set to true.
uvm_access_counter_buffer_info_t access_counter_buffer_info;
// This is only valid if supports_access_counters is set to true. This array
// has rm_info.accessCntrBufferCount entries.
uvm_access_counter_buffer_t *access_counter_buffer;
uvm_mutex_t access_counters_enablement_lock;
// Tracker used to aggregate access counters clear operations, needed for
// GPU removal. It is only used when supports_access_counters is set.
uvm_tracker_t access_counters_clear_tracker;
uvm_mutex_t access_counters_clear_tracker_lock;
// Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
NvU32 utlb_per_gpc_count;
@@ -1264,9 +1261,6 @@ struct uvm_parent_gpu_struct
uvm_rb_tree_t instance_ptr_table;
uvm_spinlock_t instance_ptr_table_lock;
// This is set to true if the GPU belongs to an SLI group.
bool sli_enabled;
struct
{
bool supported;
@@ -1348,8 +1342,12 @@ struct uvm_parent_gpu_struct
// GPUs.
NvU64 base_address;
} egm;
uvm_test_parent_gpu_inject_error_t test;
};
NvU64 uvm_parent_gpu_dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr);
static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
{
return parent_gpu->name;
@@ -1395,10 +1393,10 @@ typedef struct
// detected to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was
// called.
//
// - The peer_gpus_lock is held on one of the GPUs. In this case, the other
// GPU must be read from the original GPU's peer_gpus table. The fields
// will not change while the lock is held, but they may no longer be valid
// because the other GPU might be in teardown.
// - The peer_gpu_lock is held on one of the GPUs. In this case, the other
// GPU must be referred from the original GPU's peer_gpu_mask reference.
// The fields will not change while the lock is held, but they may no
// longer be valid because the other GPU might be in teardown.
// This field is used to determine when this struct has been initialized
// (ref_count != 0). NVLink peers are initialized at GPU registration time.
@@ -1510,7 +1508,7 @@ uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
// Like uvm_parent_gpu_get_by_uuid(), but this variant does not assertion-check
// that the caller is holding the global_lock. This is a narrower-purpose
// that the caller is holding the global_lock. This is a narrower-purpose
// function, and is only intended for use by the top-half ISR, or other very
// limited cases.
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);
@@ -1521,6 +1519,7 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_u
// LOCKING: Takes and releases the global lock for the caller.
NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
const uvm_rm_user_object_t *user_rm_device,
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
uvm_gpu_t **gpu_out);
// Retain a gpu which is known to already be retained. Does NOT require the
@@ -1578,10 +1577,6 @@ uvm_gpu_address_t uvm_gpu_peer_copy_address(uvm_gpu_t *owning_gpu, NvU64 address
// The two GPUs must have different parents.
NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);
// Get the processor id accessible by the given GPU for the given physical
// address.
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
// Get the EGM aperture for local_gpu to use to map memory resident on the CPU
// NUMA node that remote_gpu is attached to.
// Note that local_gpu can be equal to remote_gpu when memory is resident in
@@ -1655,7 +1650,8 @@ static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_
// Check whether the provided address points to peer memory:
// * Physical address using one of the PEER apertures
// * Physical address using SYS aperture that belongs to an exposed coherent memory
// * Physical address using SYS aperture that belongs to an exposed coherent
// memory
// * Virtual address in the region [peer_va_base, peer_va_base + peer_va_size)
bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address);
@@ -1684,24 +1680,25 @@ NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu);
// Check for NVLINK errors without calling into RM
//
// Calling into RM is problematic in many places, this check is always safe to
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK error
// and it's required to call uvm_gpu_check_nvlink_error() to be sure.
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK
// error and it's required to call uvm_gpu_check_nvlink_error() to be sure.
NV_STATUS uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_t *gpu);
// Map size bytes of contiguous sysmem on the GPU for physical access
// Map size bytes of contiguous sysmem on the GPU for physical access.
//
// size has to be aligned to PAGE_SIZE.
//
// Returns the physical address of the pages that can be used to access them on
// the GPU.
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
// the GPU. This address is usable by any GPU under the same parent for the
// lifetime of that parent.
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out);
// Unmap num_pages pages previously mapped with uvm_parent_gpu_map_cpu_pages().
// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
void uvm_parent_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
static NV_STATUS uvm_parent_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
static NV_STATUS uvm_gpu_map_cpu_page(uvm_gpu_t *gpu, struct page *page, NvU64 *dma_address_out)
{
return uvm_parent_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
return uvm_gpu_map_cpu_pages(gpu, page, PAGE_SIZE, dma_address_out);
}
static void uvm_parent_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
@@ -1712,16 +1709,15 @@ static void uvm_parent_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dm
// Allocate and map a page of system DMA memory on the GPU for physical access
//
// Returns
// - the address of the page that can be used to access them on
// the GPU in the dma_address_out parameter.
// - the address of allocated memory in CPU virtual address space.
void *uvm_parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu,
gfp_t gfp_flags,
NvU64 *dma_address_out);
// - the address of the page that can be used to access them on
// the GPU in the dma_address_out parameter. This address is usable by any GPU
// under the same parent for the lifetime of that parent.
NV_STATUS uvm_gpu_dma_alloc_page(uvm_gpu_t *gpu, gfp_t gfp_flags, void **cpu_addr_out, NvU64 *dma_address_out);
// Unmap and free size bytes of contiguous sysmem DMA previously allocated
// with uvm_parent_gpu_map_cpu_pages().
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address);
// with uvm_gpu_dma_alloc_page().
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *cpu_addr, NvU64 dma_address);
// Returns whether the given range is within the GPU's addressable VA ranges.
// It requires the input 'addr' to be in canonical form for platforms compliant
@@ -1742,8 +1738,6 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
// The GPU must be initialized before calling this function.
bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
bool uvm_platform_uses_canonical_form_address(void);
// Returns addr's canonical form for host systems that use canonical form
// addresses.
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
@@ -1786,7 +1780,7 @@ static bool uvm_parent_gpu_needs_proxy_channel_pool(const uvm_parent_gpu_t *pare
return uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu);
}
uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu);
uvm_aperture_t uvm_get_page_tree_location(const uvm_gpu_t *gpu);
// Add the given instance pointer -> user_channel mapping to this GPU. The
// bottom half GPU page fault handler uses this to look up the VA space for GPU

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -27,11 +27,11 @@
#include "uvm_forward_decl.h"
#include "uvm_test_ioctl.h"
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu);
void uvm_service_access_counters(uvm_access_counter_buffer_t *access_counters);
void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
@@ -46,17 +46,23 @@ void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
//
// When uningoring, the interrupt conditions will be re-evaluated to trigger
// processing of buffered notifications, if any exist.
//
// All parent_gpu's notifications buffers are affected.
void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore);
// Return whether the VA space has access counter migrations enabled. The
// caller must ensure that the VA space cannot go away.
bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space);
// Global perf initialization/cleanup functions
// Global access counters initialization/cleanup functions.
NV_STATUS uvm_access_counters_init(void);
void uvm_access_counters_exit(void);
// Global perf initialization/cleanup functions.
NV_STATUS uvm_perf_access_counters_init(void);
void uvm_perf_access_counters_exit(void);
// VA space Initialization/cleanup functions. See comments in
// VA space initialization/cleanup functions. See comments in
// uvm_perf_heuristics.h
NV_STATUS uvm_perf_access_counters_load(uvm_va_space_t *va_space);
void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);
@@ -72,17 +78,18 @@ bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
// counters are currently enabled. The hardware notifications and interrupts on
// the GPU are enabled the first time any VA space invokes
// uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
// uvm_parent_gpu_access_counters_disable().
// uvm_gpu_access_counters_disable().
//
// Locking: the VA space lock must not be held by the caller since these
// functions may take the access counters ISR lock.
NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu, uvm_va_space_t *va_space);
void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_query_access_counters(UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
#endif // __UVM_GPU_ACCESS_COUNTERS_H__

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -154,62 +154,73 @@ static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_
return 1;
}
static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
{
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
// On Volta, accessCntrBufferCount is > 0, but we don't support access
// counters in UVM (access_counters_supported is cleared during HAL
// initialization.) This check prevents the top-half from accessing
// unallocated memory.
if (!parent_gpu->access_counters_supported)
return 0;
if (parent_gpu->isr.is_suspended)
return 0;
if (!parent_gpu->isr.access_counters.handling_ref_count)
if (!parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count)
return 0;
if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
if (down_trylock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem) != 0)
return 0;
if (!uvm_parent_gpu_access_counters_pending(parent_gpu)) {
up(&parent_gpu->isr.access_counters.service_lock.sem);
if (!uvm_parent_gpu_access_counters_pending(parent_gpu, notif_buf_index)) {
up(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem);
return 0;
}
nv_kref_get(&parent_gpu->gpu_kref);
// Interrupts need to be disabled to avoid an interrupt storm
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
uvm_access_counters_intr_disable(&parent_gpu->access_counter_buffer[notif_buf_index]);
nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
&parent_gpu->isr.access_counters.bottom_half_q_item);
&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item);
return 1;
}
// This is called from RM's top-half ISR (see: the nvidia_isr() function), and UVM is given a
// chance to handle the interrupt, before most of the RM processing. UVM communicates what it
// did, back to RM, via the return code:
// This is called from RM's top-half ISR (see: the nvidia_isr() function), and
// UVM is given a chance to handle the interrupt, before most of the RM
// processing. UVM communicates what it did, back to RM, via the return code:
//
// NV_OK:
// UVM handled an interrupt.
//
// NV_WARN_MORE_PROCESSING_REQUIRED:
// UVM did not schedule a bottom half, because it was unable to get the locks it
// needed, but there is still UVM work to be done. RM will return "not handled" to the
// Linux kernel, *unless* RM handled other faults in its top half. In that case, the
// fact that UVM did not handle its interrupt is lost. However, life and interrupt
// processing continues anyway: the GPU will soon raise another interrupt, because
// that's what it does when there are replayable page faults remaining (GET != PUT in
// the fault buffer).
// UVM did not schedule a bottom half, because it was unable to get the
// locks it needed, but there is still UVM work to be done. RM will
// return "not handled" to the Linux kernel, *unless* RM handled other
// faults in its top half. In that case, the fact that UVM did not
// handle its interrupt is lost. However, life and interrupt processing
// continues anyway: the GPU will soon raise another interrupt, because
// that's what it does when there are replayable page faults remaining
// (GET != PUT in the fault buffer).
//
// NV_ERR_NO_INTR_PENDING:
// UVM did not find any work to do. Currently this is handled in RM in exactly the same
// way as NV_WARN_MORE_PROCESSING_REQUIRED is handled. However, the extra precision is
// available for the future. RM's interrupt handling tends to evolve as new chips and
// new interrupts get created.
// UVM did not find any work to do. Currently this is handled in RM in
// exactly the same way as NV_WARN_MORE_PROCESSING_REQUIRED is handled.
// However, the extra precision is available for the future. RM's
// interrupt handling tends to evolve as new chips and new interrupts
// get created.
static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
{
uvm_parent_gpu_t *parent_gpu;
unsigned num_handlers_scheduled = 0;
NV_STATUS status = NV_OK;
NvU32 i;
if (!in_interrupt() && in_atomic()) {
// Early-out if we're not in interrupt context, but memory allocations
@@ -243,14 +254,16 @@ static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
nv_kref_get(&parent_gpu->gpu_kref);
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
// Now that we got a GPU object, lock it so that it can't be removed without us noticing.
// Now that we got a GPU object, lock it so that it can't be removed without
// us noticing.
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
++parent_gpu->isr.interrupt_count;
num_handlers_scheduled += schedule_replayable_faults_handler(parent_gpu);
num_handlers_scheduled += schedule_non_replayable_faults_handler(parent_gpu);
num_handlers_scheduled += schedule_access_counters_handler(parent_gpu);
for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++)
num_handlers_scheduled += schedule_access_counters_handler(parent_gpu, i);
if (num_handlers_scheduled == 0) {
if (parent_gpu->isr.is_suspended)
@@ -288,6 +301,55 @@ static NV_STATUS init_queue_on_node(nv_kthread_q_t *queue, const char *name, int
return errno_to_nv_status(nv_kthread_q_init(queue, name));
}
static NV_STATUS uvm_isr_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
{
NV_STATUS status = NV_OK;
uvm_va_block_context_t *block_context;
UVM_ASSERT(parent_gpu->access_counters_supported);
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
uvm_sema_init(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, 1, UVM_LOCK_ORDER_ISR);
status = uvm_parent_gpu_init_access_counters(parent_gpu, notif_buf_index);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s, notif buf index: %u\n",
nvstatusToString(status),
uvm_parent_gpu_name(parent_gpu),
notif_buf_index);
return status;
}
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_block_context)
return NV_ERR_NO_MEMORY;
block_context = uvm_va_block_context_alloc(NULL);
if (!block_context)
return NV_ERR_NO_MEMORY;
parent_gpu->access_counter_buffer[notif_buf_index].batch_service_context.block_service_context.block_context =
block_context;
nv_kthread_q_item_init(&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item,
access_counters_isr_bottom_half_entry,
&parent_gpu->access_counter_buffer[notif_buf_index]);
// Access counters interrupts are initially disabled. They are
// dynamically enabled when the GPU is registered on a VA space.
parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count = 0;
if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc_stats_cpu)
return NV_ERR_NO_MEMORY;
parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count =
uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count) *
num_possible_cpus());
if (!parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count)
return NV_ERR_NO_MEMORY;
return NV_OK;
}
NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status = NV_OK;
@@ -316,7 +378,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
if (!block_context)
return NV_ERR_NO_MEMORY;
parent_gpu->fault_buffer_info.replayable.block_service_context.block_context = block_context;
parent_gpu->fault_buffer.replayable.block_service_context.block_context = block_context;
parent_gpu->isr.replayable_faults.handling = true;
@@ -344,7 +406,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
if (!block_context)
return NV_ERR_NO_MEMORY;
parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context = block_context;
parent_gpu->fault_buffer.non_replayable.block_service_context.block_context = block_context;
parent_gpu->isr.non_replayable_faults.handling = true;
@@ -361,32 +423,31 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
}
if (parent_gpu->access_counters_supported) {
status = uvm_parent_gpu_init_access_counters(parent_gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
nvstatusToString(status),
uvm_parent_gpu_name(parent_gpu));
return status;
NvU32 index_count = parent_gpu->rm_info.accessCntrBufferCount;
NvU32 notif_buf_index;
UVM_ASSERT(index_count > 0);
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_buffer)
return NV_ERR_NO_MEMORY;
parent_gpu->access_counter_buffer = uvm_kvmalloc_zero(sizeof(*parent_gpu->access_counter_buffer) *
index_count);
if (!parent_gpu->access_counter_buffer)
return NV_ERR_NO_MEMORY;
if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc)
return NV_ERR_NO_MEMORY;
parent_gpu->isr.access_counters = uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters) * index_count);
if (!parent_gpu->isr.access_counters)
return NV_ERR_NO_MEMORY;
for (notif_buf_index = 0; notif_buf_index < index_count; notif_buf_index++) {
status = uvm_isr_init_access_counters(parent_gpu, notif_buf_index);
if (status != NV_OK)
return status;
}
block_context = uvm_va_block_context_alloc(NULL);
if (!block_context)
return NV_ERR_NO_MEMORY;
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context =
block_context;
nv_kthread_q_item_init(&parent_gpu->isr.access_counters.bottom_half_q_item,
access_counters_isr_bottom_half_entry,
parent_gpu);
// Access counters interrupts are initially disabled. They are
// dynamically enabled when the GPU is registered on a VA space.
parent_gpu->isr.access_counters.handling_ref_count = 0;
parent_gpu->isr.access_counters.stats.cpu_exec_count =
uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters.stats.cpu_exec_count) * num_possible_cpus());
if (!parent_gpu->isr.access_counters.stats.cpu_exec_count)
return NV_ERR_NO_MEMORY;
}
}
@@ -401,7 +462,15 @@ void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu)
void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
NvU32 notif_buf_index;
if (parent_gpu->isr.access_counters) {
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
UVM_ASSERT_MSG(parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count == 0,
"notif buf index: %u\n",
notif_buf_index);
}
}
// Now that the GPU is safely out of the global table, lock the GPU and mark
// it as no longer handling interrupts so the top half knows not to schedule
@@ -459,24 +528,38 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
}
if (parent_gpu->access_counters_supported) {
// It is safe to deinitialize access counters even if they have not been
// successfully initialized.
uvm_parent_gpu_deinit_access_counters(parent_gpu);
block_context =
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context;
uvm_va_block_context_free(block_context);
NvU32 notif_buf_index;
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
// It is safe to deinitialize access counters even if they have not
// been successfully initialized.
uvm_parent_gpu_deinit_access_counters(parent_gpu, notif_buf_index);
if (parent_gpu->access_counter_buffer) {
uvm_access_counter_buffer_t *access_counter = &parent_gpu->access_counter_buffer[notif_buf_index];
block_context = access_counter->batch_service_context.block_service_context.block_context;
uvm_va_block_context_free(block_context);
}
if (parent_gpu->isr.access_counters)
uvm_kvfree(parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count);
}
uvm_kvfree(parent_gpu->isr.access_counters);
uvm_kvfree(parent_gpu->access_counter_buffer);
}
if (parent_gpu->non_replayable_faults_supported) {
block_context = parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context;
block_context = parent_gpu->fault_buffer.non_replayable.block_service_context.block_context;
uvm_va_block_context_free(block_context);
uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
}
block_context = parent_gpu->fault_buffer_info.replayable.block_service_context.block_context;
block_context = parent_gpu->fault_buffer.replayable.block_service_context.block_context;
uvm_va_block_context_free(block_context);
uvm_kvfree(parent_gpu->isr.replayable_faults.stats.cpu_exec_count);
uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
uvm_kvfree(parent_gpu->isr.access_counters.stats.cpu_exec_count);
}
uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
@@ -584,25 +667,29 @@ static void non_replayable_faults_isr_bottom_half_entry(void *args)
static void access_counters_isr_bottom_half(void *args)
{
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
uvm_access_counter_buffer_t *access_counters = (uvm_access_counter_buffer_t *)args;
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
unsigned int cpu;
UVM_ASSERT(parent_gpu->access_counters_supported);
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
uvm_record_lock(&parent_gpu->isr.access_counters.service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
uvm_record_lock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
// Multiple bottom halves for counter notifications can be running
// concurrently, but only one can be running this function for a given GPU
// since we enter with the access_counters_isr_lock held.
// concurrently, but only one per-notification-buffer (i.e.,
// notif_buf_index) can be running this function for a given GPU since we
// enter with the per-notification-buffer access_counters_isr_lock held.
cpu = get_cpu();
++parent_gpu->isr.access_counters.stats.bottom_half_count;
cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters.stats.cpus_used_mask);
++parent_gpu->isr.access_counters.stats.cpu_exec_count[cpu];
++parent_gpu->isr.access_counters[notif_buf_index].stats.bottom_half_count;
cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters[notif_buf_index].stats.cpus_used_mask);
++parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count[cpu];
put_cpu();
uvm_parent_gpu_service_access_counters(parent_gpu);
uvm_service_access_counters(access_counters);
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
uvm_access_counters_isr_unlock(access_counters);
uvm_parent_gpu_kref_put(parent_gpu);
}
@@ -725,7 +812,7 @@ void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
// clear_replayable_faults is a no-op for architectures that don't
// support pulse-based interrupts.
parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
parent_gpu->fault_buffer_info.replayable.cached_get);
parent_gpu->fault_buffer.replayable.cached_get);
}
// This unlock call has to be out-of-order unlock due to interrupts_lock
@@ -751,37 +838,41 @@ void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gp
uvm_up(&parent_gpu->isr.non_replayable_faults.service_lock);
}
void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu)
void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters)
{
// See comments in uvm_parent_gpu_replayable_faults_isr_lock
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
uvm_access_counters_intr_disable(access_counters);
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
uvm_down(&parent_gpu->isr.access_counters.service_lock);
uvm_down(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);
}
void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters)
{
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
uvm_access_counter_buffer_hal_t *ac_hal = parent_gpu->access_counter_buffer_hal;
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
// See comments in uvm_parent_gpu_replayable_faults_isr_unlock
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
uvm_access_counters_intr_enable(access_counters);
if (parent_gpu->isr.access_counters.handling_ref_count > 0) {
parent_gpu->access_counter_buffer_hal->clear_access_counter_notifications(parent_gpu,
parent_gpu->access_counter_buffer_info.cached_get);
}
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0)
ac_hal->clear_access_counter_notifications(access_counters, access_counters->cached_get);
// This unlock call has to be out-of-order unlock due to interrupts_lock
// still being held. Otherwise, it would result in a lock order violation.
uvm_up_out_of_order(&parent_gpu->isr.access_counters.service_lock);
uvm_up_out_of_order(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
}
@@ -806,8 +897,11 @@ static void uvm_parent_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *paren
parent_gpu->fault_buffer_hal->enable_replayable_faults(parent_gpu);
}
void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters)
{
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
// The read of handling_ref_count could race with a write from
@@ -815,24 +909,27 @@ void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
// ISR lock. But those functions are invoked with the interrupt disabled
// (disable_intr_ref_count > 0), so the check always returns false when the
// race occurs
if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(parent_gpu);
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(access_counters);
}
++parent_gpu->isr.access_counters.disable_intr_ref_count;
++parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;
}
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu)
void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters)
{
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
UVM_ASSERT(parent_gpu->isr.access_counters.disable_intr_ref_count > 0);
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
UVM_ASSERT(parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count > 0);
--parent_gpu->isr.access_counters.disable_intr_ref_count;
--parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;
if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(parent_gpu);
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(access_counters);
}
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -70,8 +70,8 @@ typedef struct
struct
{
// Number of the bottom-half invocations for this interrupt on a GPU over
// its lifetime
// Number of the bottom-half invocations for this interrupt on a GPU
// over its lifetime.
NvU64 bottom_half_count;
// A bitmask of the CPUs on which the bottom half has executed. The
@@ -110,20 +110,20 @@ typedef struct
// bottom-half per interrupt type.
nv_kthread_q_t bottom_half_q;
// Protects the state of interrupts (enabled/disabled) and whether the GPU is
// currently handling them. Taken in both interrupt and process context.
// Protects the state of interrupts (enabled/disabled) and whether the GPU
// is currently handling them. Taken in both interrupt and process context.
uvm_spinlock_irqsave_t interrupts_lock;
uvm_intr_handler_t replayable_faults;
uvm_intr_handler_t non_replayable_faults;
uvm_intr_handler_t access_counters;
uvm_intr_handler_t *access_counters;
// Kernel thread used to kill channels on fatal non-replayable faults.
// This is needed because we cannot call into RM from the bottom-half to
// avoid deadlocks.
nv_kthread_q_t kill_channel_q;
// Number of top-half ISRs called for this GPU over its lifetime
// Number of top-half ISRs called for this GPU over its lifetime.
NvU64 interrupt_count;
} uvm_isr_info_t;
@@ -133,7 +133,7 @@ NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid);
// Initialize ISR handling state
NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);
// Flush any currently scheduled bottom halves. This is called during GPU
// Flush any currently scheduled bottom halves. This is called during GPU
// removal.
void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);
@@ -146,7 +146,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
// Take parent_gpu->isr.replayable_faults.service_lock from a non-top/bottom
// half thread. This will also disable replayable page fault interrupts (if
// half thread. This will also disable replayable page fault interrupts (if
// supported by the GPU) because the top half attempts to take this lock, and we
// would cause an interrupt storm if we didn't disable them first.
//
@@ -154,49 +154,48 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
// Unlock parent_gpu->isr.replayable_faults.service_lock. This call may
// re-enable replayable page fault interrupts. Unlike
// uvm_parent_gpu_replayable_faults_isr_lock(), which should only called from
// re-enable replayable page fault interrupts. Unlike
// uvm_parent_gpu_replayable_faults_isr_lock(), which should only be called from
// non-top/bottom half threads, this can be called by any thread.
void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
// Lock/unlock routines for non-replayable faults. These do not need to prevent
// interrupt storms since the GPU fault buffers for non-replayable faults are
// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
// under the parent need to have been previously retained.
void uvm_parent_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
// See uvm_parent_gpu_replayable_faults_isr_lock/unlock
void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters);
void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters);
// Increments the reference count tracking whether access counter interrupts
// should be disabled. The caller is guaranteed that access counter interrupts
// are disabled upon return. Interrupts might already be disabled prior to
// making this call. Each call is ref-counted, so this must be paired with a
// call to uvm_parent_gpu_access_counters_intr_enable().
// call to uvm_access_counters_intr_enable().
//
// parent_gpu->isr.interrupts_lock must be held to call this function.
void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters);
// Decrements the reference count tracking whether access counter interrupts
// should be disabled. Only once the count reaches 0 are the HW interrupts
// actually enabled, so this call does not guarantee that the interrupts have
// been re-enabled upon return.
//
// uvm_parent_gpu_access_counters_intr_disable() must have been called prior to
// calling this function.
// uvm_access_counters_intr_disable() must have been called prior to calling
// this function.
//
// NOTE: For pulse-based interrupts, the caller is responsible for re-arming
// the interrupt.
//
// parent_gpu->isr.interrupts_lock must be held to call this function.
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters);
// Return the first valid GPU given the parent GPU or NULL if no MIG instances
// are registered. This should only be called from bottom halves or if the
// g_uvm_global.global_lock is held so that the returned pointer remains valid.
//
uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu);
#endif // __UVM_GPU_ISR_H__

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -119,18 +119,18 @@
// calling uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults on failure.
NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
non_replayable_faults->shadow_buffer_copy = NULL;
non_replayable_faults->fault_cache = NULL;
non_replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize /
non_replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize /
parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
non_replayable_faults->shadow_buffer_copy =
uvm_kvmalloc_zero(parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize);
uvm_kvmalloc_zero(parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize);
if (!non_replayable_faults->shadow_buffer_copy)
return NV_ERR_NO_MEMORY;
@@ -147,7 +147,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_
void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
if (non_replayable_faults->fault_cache) {
UVM_ASSERT(uvm_tracker_is_empty(&non_replayable_faults->clear_faulted_tracker));
@@ -170,7 +170,7 @@ bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(parent_gpu->isr.non_replayable_faults.handling);
status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
&has_pending_faults);
UVM_ASSERT(status == NV_OK);
@@ -182,14 +182,14 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
NV_STATUS status;
NvU32 i;
NvU32 entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
char *current_hw_entry = (char *)non_replayable_faults->shadow_buffer_copy;
uvm_fault_buffer_entry_t *fault_entry = non_replayable_faults->fault_cache;
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.non_replayable_faults.service_lock));
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
current_hw_entry,
cached_faults);
@@ -267,7 +267,7 @@ static NV_STATUS clear_faulted_method_on_gpu(uvm_user_channel_t *user_channel,
uvm_gpu_t *gpu = user_channel->gpu;
NV_STATUS status;
uvm_push_t push;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
UVM_ASSERT(!fault_entry->is_fatal);
@@ -355,7 +355,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_va_block_t *va_block,
uvm_processor_id_t new_residency;
bool read_duplicate;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
const uvm_va_policy_t *policy;
UVM_ASSERT(!fault_entry->is_fatal);
@@ -450,7 +450,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
NV_STATUS status, tracker_status;
uvm_va_block_retry_t va_block_retry;
uvm_gpu_t *gpu = fault_entry->gpu;
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.non_replayable.block_service_context;
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.non_replayable.block_service_context;
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
service_context->num_retries = 0;
@@ -467,7 +467,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
service_context,
hmm_migratable));
tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer_info.non_replayable.fault_service_tracker,
tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer.non_replayable.fault_service_tracker,
&va_block->tracker);
uvm_mutex_unlock(&va_block->lock);
@@ -507,7 +507,7 @@ static void schedule_kill_channel(uvm_fault_buffer_entry_t *fault_entry, uvm_use
{
uvm_va_space_t *va_space = fault_entry->va_space;
uvm_parent_gpu_t *parent_gpu = fault_entry->gpu->parent;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
void *packet = (char *)non_replayable_faults->shadow_buffer_copy +
(fault_entry->non_replayable.buffer_index * parent_gpu->fault_buffer_hal->entry_size(parent_gpu));
@@ -551,7 +551,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_gpu_t *gpu = gpu_va_space->gpu;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
NV_STATUS status = lookup_status;
NV_STATUS fatal_fault_status = NV_ERR_INVALID_ADDRESS;
@@ -588,7 +588,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
ats_invalidate->tlb_batch_pending = false;
va_range_next = uvm_va_space_iter_first(va_space, fault_entry->fault_address, ~0ULL);
va_range_next = uvm_va_space_iter_gmmu_mappable_first(va_space, fault_entry->fault_address);
// The VA isn't managed. See if ATS knows about it.
vma = find_vma_intersection(mm, fault_address, fault_address + 1);
@@ -649,7 +649,7 @@ static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,
struct mm_struct *mm;
uvm_gpu_va_space_t *gpu_va_space;
uvm_gpu_t *gpu;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
uvm_va_block_context_t *va_block_context = non_replayable_faults->block_service_context.block_context;
status = uvm_parent_gpu_fault_entry_to_va_space(parent_gpu,
@@ -757,7 +757,7 @@ exit_no_channel:
static NV_STATUS service_fault(uvm_parent_gpu_t *parent_gpu, uvm_fault_buffer_entry_t *fault_entry)
{
uvm_service_block_context_t *service_context =
&parent_gpu->fault_buffer_info.non_replayable.block_service_context;
&parent_gpu->fault_buffer.non_replayable.block_service_context;
NV_STATUS status;
bool hmm_migratable = true;
@@ -794,7 +794,7 @@ void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent
// non-replayable faults since getting multiple faults on the same
// memory region is not very likely
for (i = 0; i < cached_faults; ++i) {
status = service_fault(parent_gpu, &parent_gpu->fault_buffer_info.non_replayable.fault_cache[i]);
status = service_fault(parent_gpu, &parent_gpu->fault_buffer.non_replayable.fault_cache[i]);
if (status != NV_OK)
return;
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -119,7 +119,7 @@ module_param(uvm_perf_fault_coalesce, uint, S_IRUGO);
// the power management resume path.
static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
// Read the current get/put pointers, as this might not be the first time
// we take control of the fault buffer since the GPU was initialized,
@@ -129,7 +129,7 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);
// (Re-)enable fault prefetching
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled)
if (parent_gpu->fault_buffer.prefetch_faults_enabled)
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
else
parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
@@ -140,28 +140,28 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status = NV_OK;
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
UVM_ASSERT(parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize %
UVM_ASSERT(parent_gpu->fault_buffer.rm_info.replayable.bufferSize %
parent_gpu->fault_buffer_hal->entry_size(parent_gpu) == 0);
replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize /
replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.replayable.bufferSize /
parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
// Check provided module parameter value
parent_gpu->fault_buffer_info.max_batch_size = max(uvm_perf_fault_batch_count,
(NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
parent_gpu->fault_buffer_info.max_batch_size = min(parent_gpu->fault_buffer_info.max_batch_size,
replayable_faults->max_faults);
parent_gpu->fault_buffer.max_batch_size = max(uvm_perf_fault_batch_count,
(NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
parent_gpu->fault_buffer.max_batch_size = min(parent_gpu->fault_buffer.max_batch_size,
replayable_faults->max_faults);
if (parent_gpu->fault_buffer_info.max_batch_size != uvm_perf_fault_batch_count) {
pr_info("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_batch_count,
UVM_PERF_FAULT_BATCH_COUNT_MIN,
replayable_faults->max_faults,
parent_gpu->fault_buffer_info.max_batch_size);
if (parent_gpu->fault_buffer.max_batch_size != uvm_perf_fault_batch_count) {
UVM_INFO_PRINT("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_batch_count,
UVM_PERF_FAULT_BATCH_COUNT_MIN,
replayable_faults->max_faults,
parent_gpu->fault_buffer.max_batch_size);
}
batch_context->fault_cache = uvm_kvmalloc_zero(replayable_faults->max_faults * sizeof(*batch_context->fault_cache));
@@ -198,22 +198,22 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
UVM_PERF_FAULT_REPLAY_POLICY_DEFAULT;
if (replayable_faults->replay_policy != uvm_perf_fault_replay_policy) {
pr_info("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_replay_policy,
replayable_faults->replay_policy);
UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_replay_policy,
replayable_faults->replay_policy);
}
replayable_faults->replay_update_put_ratio = min(uvm_perf_fault_replay_update_put_ratio, 100u);
if (replayable_faults->replay_update_put_ratio != uvm_perf_fault_replay_update_put_ratio) {
pr_info("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_replay_update_put_ratio,
replayable_faults->replay_update_put_ratio);
UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_replay_update_put_ratio,
replayable_faults->replay_update_put_ratio);
}
// Re-enable fault prefetching just in case it was disabled in a previous run
parent_gpu->fault_buffer_info.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;
parent_gpu->fault_buffer.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;
fault_buffer_reinit_replayable_faults(parent_gpu);
@@ -222,7 +222,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
if (batch_context->fault_cache) {
@@ -230,9 +230,9 @@ static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
uvm_tracker_deinit(&replayable_faults->replay_tracker);
}
if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
// Re-enable prefetch faults in case we disabled them
if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer_info.prefetch_faults_enabled)
if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer.prefetch_faults_enabled)
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
}
@@ -252,7 +252,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(parent_gpu->replayable_faults_supported);
status = uvm_rm_locked_call(nvUvmInterfaceInitFaultInfo(parent_gpu->rm_device,
&parent_gpu->fault_buffer_info.rm_info));
&parent_gpu->fault_buffer.rm_info));
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to init fault buffer info from RM: %s, GPU %s\n",
nvstatusToString(status),
@@ -262,7 +262,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
// when it returns an error. Set the buffer handle to zero as it is
// used by the deinitialization logic to determine if it was correctly
// initialized.
parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
goto fail;
}
@@ -304,24 +304,25 @@ void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
fault_buffer_deinit_replayable_faults(parent_gpu);
if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
status = uvm_rm_locked_call(nvUvmInterfaceOwnPageFaultIntr(parent_gpu->rm_device, NV_FALSE));
UVM_ASSERT(status == NV_OK);
uvm_rm_locked_call_void(nvUvmInterfaceDestroyFaultInfo(parent_gpu->rm_device,
&parent_gpu->fault_buffer_info.rm_info));
&parent_gpu->fault_buffer.rm_info));
parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
}
}
bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
{
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
UVM_ASSERT(parent_gpu->replayable_faults_supported);
// Fast path 1: we left some faults unserviced in the buffer in the last pass
// Fast path 1: we left some faults unserviced in the buffer in the last
// pass
if (replayable_faults->cached_get != replayable_faults->cached_put)
return true;
@@ -357,7 +358,7 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
{
NV_STATUS status;
uvm_push_t push;
uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer_info.replayable.replay_tracker;
uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer.replayable.replay_tracker;
UVM_ASSERT(tracker != NULL);
@@ -443,7 +444,7 @@ static NV_STATUS cancel_fault_precise_va(uvm_fault_buffer_entry_t *fault_entry,
uvm_gpu_t *gpu = fault_entry->gpu;
uvm_gpu_phys_address_t pdb;
uvm_push_t push;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
NvU64 offset;
UVM_ASSERT(gpu->parent->replayable_faults_supported);
@@ -452,7 +453,7 @@ static NV_STATUS cancel_fault_precise_va(uvm_fault_buffer_entry_t *fault_entry,
gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
UVM_ASSERT(gpu_va_space);
pdb = uvm_page_tree_pdb(&gpu_va_space->page_tables)->addr;
pdb = uvm_page_tree_pdb_address(&gpu_va_space->page_tables);
// Record fatal fault event
uvm_tools_record_gpu_fatal_fault(gpu->id, va_space, fault_entry, fault_entry->fatal_reason);
@@ -505,7 +506,7 @@ static NV_STATUS push_replay_on_gpu(uvm_gpu_t *gpu,
{
NV_STATUS status;
uvm_push_t push;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
uvm_tracker_t *tracker = NULL;
if (batch_context)
@@ -556,7 +557,7 @@ static NV_STATUS push_replay_on_parent_gpu(uvm_parent_gpu_t *parent_gpu,
static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
{
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
@@ -589,7 +590,7 @@ static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu, hw_f
return NV_OK;
is_flush_mode_move = (NvBool) (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer_info.rm_info, is_flush_mode_move);
status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer.rm_info, is_flush_mode_move);
UVM_ASSERT(status == NV_OK);
@@ -618,7 +619,7 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
NvU32 get;
NvU32 put;
uvm_spin_loop_t spin;
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
NV_STATUS status;
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
@@ -852,7 +853,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
uvm_fault_buffer_entry_t *fault_cache;
uvm_spin_loop_t spin;
NV_STATUS status = NV_OK;
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
const bool in_pascal_cancel_path = (!parent_gpu->fault_cancel_va_supported && fetch_mode == FAULT_FETCH_MODE_ALL);
const bool may_filter = uvm_perf_fault_coalesce && !in_pascal_cancel_path;
@@ -887,7 +888,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
// Parse until get != put and have enough space to cache.
while ((get != put) &&
(fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer_info.max_batch_size)) {
(fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer.max_batch_size)) {
bool is_same_instance_ptr = true;
uvm_fault_buffer_entry_t *current_entry = &fault_cache[fault_index];
uvm_fault_utlb_info_t *current_tlb;
@@ -1385,7 +1386,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
uvm_page_index_t last_page_index;
NvU32 page_fault_count = 0;
uvm_range_group_range_iter_t iter;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
uvm_fault_buffer_entry_t **ordered_fault_cache = batch_context->ordered_fault_cache;
uvm_fault_buffer_entry_t *first_fault_entry = ordered_fault_cache[first_fault_index];
uvm_service_block_context_t *block_context = &replayable_faults->block_service_context;
@@ -1612,7 +1613,7 @@ static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
NV_STATUS status;
uvm_va_block_retry_t va_block_retry;
NV_STATUS tracker_status;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
uvm_service_block_context_t *fault_block_context = &replayable_faults->block_service_context;
fault_block_context->operation = UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS;
@@ -1803,7 +1804,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
uvm_page_mask_t *prefetch_only_fault_mask = &ats_context->faults.prefetch_only_fault_mask;
uvm_gpu_t *gpu = gpu_va_space->gpu;
bool replay_per_va_block =
(gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
(gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
UVM_ASSERT(vma);
@@ -1851,8 +1852,8 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
page_index = (fault_address - sub_batch_base) / PAGE_SIZE;
// Do not check for coalesced access type. If there are multiple different
// accesses to an address, we can disregard the prefetch one.
// Do not check for coalesced access type. If there are multiple
// different accesses to an address, we can disregard the prefetch one.
if ((access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) &&
(uvm_fault_access_type_mask_highest(current_entry->access_type_mask) == UVM_FAULT_ACCESS_TYPE_PREFETCH))
uvm_page_mask_set(prefetch_only_fault_mask, page_index);
@@ -1956,19 +1957,19 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
uvm_va_block_t *va_block;
uvm_gpu_t *gpu = gpu_va_space->gpu;
uvm_va_block_context_t *va_block_context =
gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
gpu->parent->fault_buffer.replayable.block_service_context.block_context;
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[fault_index];
struct mm_struct *mm = va_block_context->mm;
NvU64 fault_address = current_entry->fault_address;
(*block_faults) = 0;
va_range_next = uvm_va_space_iter_first(va_space, fault_address, ~0ULL);
va_range_next = uvm_va_space_iter_gmmu_mappable_first(va_space, fault_address);
if (va_range_next && (fault_address >= va_range_next->node.start)) {
UVM_ASSERT(fault_address < va_range_next->node.end);
va_range = va_range_next;
va_range_next = uvm_va_space_iter_next(va_range_next, ~0ULL);
va_range_next = uvm_va_range_gmmu_mappable_next(va_range);
}
if (va_range)
@@ -1985,7 +1986,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
NvU64 outer = ~0ULL;
UVM_ASSERT(replay_per_va_block ==
(gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));
(gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));
// Limit outer to the minimum of next va_range.start and first
// fault_address' next UVM_GMMU_ATS_GRANULARITY alignment so that it's
@@ -2046,8 +2047,8 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
uvm_gpu_t *gpu = batch_context->fatal_gpu;
uvm_gpu_va_space_t *gpu_va_space = NULL;
struct mm_struct *mm;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.replayable.block_service_context;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.replayable.block_service_context;
uvm_va_block_context_t *va_block_context = service_context->block_context;
UVM_ASSERT(va_space);
@@ -2155,7 +2156,7 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
++i;
}
else {
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer.replayable.ats_invalidate;
NvU32 block_faults;
const bool hmm_migratable = true;
@@ -2236,12 +2237,12 @@ static NV_STATUS service_fault_batch(uvm_parent_gpu_t *parent_gpu,
NvU32 i;
uvm_va_space_t *va_space = NULL;
uvm_gpu_va_space_t *prev_gpu_va_space = NULL;
uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer_info.replayable.ats_invalidate;
uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer.replayable.ats_invalidate;
struct mm_struct *mm = NULL;
const bool replay_per_va_block = service_mode != FAULT_SERVICE_MODE_CANCEL &&
parent_gpu->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
parent_gpu->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
uvm_service_block_context_t *service_context =
&parent_gpu->fault_buffer_info.replayable.block_service_context;
&parent_gpu->fault_buffer.replayable.block_service_context;
uvm_va_block_context_t *va_block_context = service_context->block_context;
bool hmm_migratable = true;
@@ -2711,8 +2712,9 @@ static void cancel_fault_batch(uvm_parent_gpu_t *parent_gpu,
// 5- Fetch all faults from buffer
// 6- Check what uTLBs are in lockdown mode and can be cancelled
// 7- Preprocess faults (order per va_space, fault address, access type)
// 8- Service all non-fatal faults and mark all non-serviceable faults as fatal
// 6.1- If fatal faults are not found, we are done
// 8- Service all non-fatal faults and mark all non-serviceable faults as
// fatal.
// 8.1- If fatal faults are not found, we are done
// 9- Search for a uTLB which can be targeted for cancel, as described in
// try_to_cancel_utlbs. If found, cancel it.
// END LOOP
@@ -2726,14 +2728,14 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
{
NV_STATUS status;
NV_STATUS tracker_status;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
bool first = true;
UVM_ASSERT(gpu->parent->replayable_faults_supported);
// 1) Disable prefetching to avoid new requests keep coming and flooding
// the buffer
if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
if (gpu->parent->fault_buffer.prefetch_faults_enabled)
gpu->parent->arch_hal->disable_prefetch_faults(gpu->parent);
while (1) {
@@ -2847,7 +2849,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
}
// 10) Re-enable prefetching
if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
if (gpu->parent->fault_buffer.prefetch_faults_enabled)
gpu->parent->arch_hal->enable_prefetch_faults(gpu->parent);
if (status == NV_OK)
@@ -2884,16 +2886,16 @@ static void enable_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu,
// comment in mark_fault_invalid_prefetch(..).
// Some tests rely on this logic (and ratio) to correctly disable prefetch
// fault reporting. If the logic changes, the tests will have to be changed.
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled &&
if (parent_gpu->fault_buffer.prefetch_faults_enabled &&
uvm_perf_reenable_prefetch_faults_lapse_msec > 0 &&
((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer_info.max_batch_size * 2) ||
((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer.max_batch_size * 2) ||
(uvm_enable_builtin_tests &&
parent_gpu->rm_info.isSimulated &&
batch_context->num_invalid_prefetch_faults > 5))) {
uvm_parent_gpu_disable_prefetch_faults(parent_gpu);
}
else if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp;
else if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer.disable_prefetch_faults_timestamp;
// Reenable prefetch faults after some time
if (lapse > ((NvU64)uvm_perf_reenable_prefetch_faults_lapse_msec * (1000 * 1000)))
@@ -2907,7 +2909,7 @@ void uvm_parent_gpu_service_replayable_faults(uvm_parent_gpu_t *parent_gpu)
NvU32 num_batches = 0;
NvU32 num_throttled = 0;
NV_STATUS status = NV_OK;
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
UVM_ASSERT(parent_gpu->replayable_faults_supported);
@@ -3030,9 +3032,9 @@ void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
parent_gpu->fault_buffer_info.prefetch_faults_enabled = true;
parent_gpu->fault_buffer.prefetch_faults_enabled = true;
}
}
@@ -3041,10 +3043,10 @@ void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
if (parent_gpu->fault_buffer.prefetch_faults_enabled) {
parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
parent_gpu->fault_buffer_info.prefetch_faults_enabled = false;
parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp = NV_GETTIME();
parent_gpu->fault_buffer.prefetch_faults_enabled = false;
parent_gpu->fault_buffer.disable_prefetch_faults_timestamp = NV_GETTIME();
}
}

View File

@@ -792,7 +792,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
//
// Notably as of 4.3, atomic64_set_release() and atomic64_read_acquire()
// have been added that are exactly what we need and could be slightly
// faster on arm and powerpc than the implementation below. But at least in
// faster on arm than the implementation below. But at least in
// 4.3 the implementation looks broken for arm32 (it maps directly to
// smp_load_acquire() and that doesn't support 64-bit reads on 32-bit
// architectures) so instead of dealing with that just use a slightly bigger

View File

@@ -217,8 +217,8 @@ static uvm_hal_class_ops_t host_table[] =
.clear_faulted_channel_method = uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported,
.clear_faulted_channel_register = uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported,
.access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
.access_counter_clear_type = uvm_hal_maxwell_access_counter_clear_type_unsupported,
.access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
.access_counter_query_clear_op = uvm_hal_maxwell_access_counter_query_clear_op_unsupported,
.get_time = uvm_hal_maxwell_get_time,
}
},
@@ -254,9 +254,6 @@ static uvm_hal_class_ops_t host_table[] =
.replay_faults = uvm_hal_volta_replay_faults,
.cancel_faults_va = uvm_hal_volta_cancel_faults_va,
.clear_faulted_channel_method = uvm_hal_volta_host_clear_faulted_channel_method,
.access_counter_clear_all = uvm_hal_volta_access_counter_clear_all,
.access_counter_clear_type = uvm_hal_volta_access_counter_clear_type,
.access_counter_clear_targeted = uvm_hal_volta_access_counter_clear_targeted,
.semaphore_timestamp = uvm_hal_volta_host_semaphore_timestamp,
}
},
@@ -271,6 +268,9 @@ static uvm_hal_class_ops_t host_table[] =
.tlb_invalidate_all = uvm_hal_turing_host_tlb_invalidate_all,
.tlb_invalidate_va = uvm_hal_turing_host_tlb_invalidate_va,
.tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test,
.access_counter_clear_all = uvm_hal_turing_access_counter_clear_all,
.access_counter_clear_targeted = uvm_hal_turing_access_counter_clear_targeted,
.access_counter_query_clear_op = uvm_hal_turing_access_counter_query_clear_op,
}
},
{
@@ -310,12 +310,15 @@ static uvm_hal_class_ops_t host_table[] =
.tlb_invalidate_all = uvm_hal_blackwell_host_tlb_invalidate_all,
.tlb_invalidate_va = uvm_hal_blackwell_host_tlb_invalidate_va,
.tlb_invalidate_test = uvm_hal_blackwell_host_tlb_invalidate_test,
.access_counter_query_clear_op = uvm_hal_blackwell_access_counter_query_clear_op_gb100,
}
},
{
.id = BLACKWELL_CHANNEL_GPFIFO_B,
.parent_id = BLACKWELL_CHANNEL_GPFIFO_A,
.u.host_ops = {}
.u.host_ops = {
.access_counter_query_clear_op = uvm_hal_blackwell_access_counter_query_clear_op_gb20x
}
},
};
@@ -409,6 +412,32 @@ static uvm_hal_class_ops_t arch_table[] =
},
};
// chip_table[] is different from the other class op tables - it is used to
// apply chip specific overrides to arch ops. This means unlike the other class
// op tables, parent_id does not refer to a preceding entry within the table
// itself. parent_id is an architecture (not a chip id) and instead refers to an
// entry in arch_table[]. This means that arch_table[] must be initialized
// before chip_table[]. chip_table[] must be initialized using
// ops_init_from_table(arch_table) instead of ops_init_from_parent().
// TODO: BUG 5044266: the chip ops should be separated from the arch ops.
static uvm_hal_class_ops_t chip_table[] =
{
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100 | NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
.u.arch_ops = {
.mmu_mode_hal = uvm_hal_mmu_mode_blackwell_integrated,
}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 | NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200,
.u.arch_ops = {
.mmu_mode_hal = uvm_hal_mmu_mode_blackwell_integrated,
}
},
};
static uvm_hal_class_ops_t fault_buffer_table[] =
{
{
@@ -537,22 +566,19 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
.u.access_counter_buffer_ops = {
.enable_access_counter_notifications = uvm_hal_volta_enable_access_counter_notifications,
.disable_access_counter_notifications = uvm_hal_volta_disable_access_counter_notifications,
.clear_access_counter_notifications = uvm_hal_volta_clear_access_counter_notifications,
.parse_entry = uvm_hal_volta_access_counter_buffer_parse_entry,
.entry_is_valid = uvm_hal_volta_access_counter_buffer_entry_is_valid,
.entry_clear_valid = uvm_hal_volta_access_counter_buffer_entry_clear_valid,
.entry_size = uvm_hal_volta_access_counter_buffer_entry_size,
}
.u.access_counter_buffer_ops = {}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
.u.access_counter_buffer_ops = {
.enable_access_counter_notifications = uvm_hal_turing_enable_access_counter_notifications,
.disable_access_counter_notifications = uvm_hal_turing_disable_access_counter_notifications,
.clear_access_counter_notifications = uvm_hal_turing_clear_access_counter_notifications,
.parse_entry = uvm_hal_turing_access_counter_buffer_parse_entry,
.entry_is_valid = uvm_hal_turing_access_counter_buffer_entry_is_valid,
.entry_clear_valid = uvm_hal_turing_access_counter_buffer_entry_clear_valid,
.entry_size = uvm_hal_turing_access_counter_buffer_entry_size,
}
},
{
@@ -675,33 +701,35 @@ static inline void op_copy(uvm_hal_class_ops_t *dst, uvm_hal_class_ops_t *src, N
memcpy(m_dst, m_src, sizeof(void *));
}
static inline NV_STATUS ops_init_from_parent(uvm_hal_class_ops_t *table,
NvU32 row_count,
NvLength op_count,
NvLength op_offset)
static inline NV_STATUS ops_init_from_table(uvm_hal_class_ops_t *dest_table,
NvU32 dest_row_count,
uvm_hal_class_ops_t *src_table,
NvU32 src_row_count,
NvLength op_count,
NvLength op_offset)
{
NvLength i;
for (i = 0; i < row_count; i++) {
for (i = 0; i < dest_row_count; i++) {
NvLength j;
uvm_hal_class_ops_t *parent = NULL;
if (table[i].parent_id != 0) {
parent = ops_find_by_id(table, i, table[i].parent_id);
if (dest_table[i].parent_id != 0) {
parent = ops_find_by_id(src_table, src_row_count, dest_table[i].parent_id);
if (parent == NULL)
return NV_ERR_INVALID_CLASS;
// Go through all the ops and assign from parent's corresponding op
// if NULL
for (j = 0; j < op_count; j++) {
if (op_is_null(table + i, j, op_offset))
op_copy(table + i, parent, j, op_offset);
if (op_is_null(dest_table + i, j, op_offset))
op_copy(dest_table + i, parent, j, op_offset);
}
}
// At this point, it is an error to have missing HAL operations
for (j = 0; j < op_count; j++) {
if (op_is_null(table + i, j, op_offset))
if (op_is_null(dest_table + i, j, op_offset))
return NV_ERR_INVALID_STATE;
}
}
@@ -709,6 +737,19 @@ static inline NV_STATUS ops_init_from_parent(uvm_hal_class_ops_t *table,
return NV_OK;
}
static inline NV_STATUS ops_init_from_parent(uvm_hal_class_ops_t *table,
NvU32 row_count,
NvLength op_count,
NvLength op_offset)
{
return ops_init_from_table(table,
row_count,
table,
row_count,
op_count,
op_offset);
}
NV_STATUS uvm_hal_init_table(void)
{
NV_STATUS status;
@@ -737,6 +778,18 @@ NV_STATUS uvm_hal_init_table(void)
return status;
}
// chip_table[] must be initialized after arch_table[].
status = ops_init_from_table(chip_table,
ARRAY_SIZE(chip_table),
arch_table,
ARRAY_SIZE(arch_table),
ARCH_OP_COUNT,
offsetof(uvm_hal_class_ops_t, u.arch_ops));
if (status != NV_OK) {
UVM_ERR_PRINT("ops_init_from_table(chip_table) failed: %s\n", nvstatusToString(status));
return status;
}
status = ops_init_from_parent(fault_buffer_table,
ARRAY_SIZE(fault_buffer_table),
FAULT_BUFFER_OP_COUNT,
@@ -802,6 +855,13 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
parent_gpu->arch_hal = &class_ops->u.arch_ops;
// Apply per chip overrides if required
class_ops = ops_find_by_id(chip_table,
ARRAY_SIZE(chip_table),
gpu_info->gpuArch | gpu_info->gpuImplementation);
if (class_ops)
parent_gpu->arch_hal = &class_ops->u.arch_ops;
class_ops = ops_find_by_id(fault_buffer_table, ARRAY_SIZE(fault_buffer_table), gpu_info->gpuArch);
if (class_ops == NULL) {
UVM_ERR_PRINT("Fault buffer HAL not found, GPU %s, arch: 0x%X\n",
@@ -843,10 +903,14 @@ static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
// Computing.
//
// TODO: Bug 200692962: Add support for access counters in vGPU
if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled) {
if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled)
parent_gpu->access_counters_supported = false;
parent_gpu->access_counters_can_use_physical_addresses = false;
}
// TODO: Bug 4637114: [UVM] Remove support for physical access counter
// notifications. Always set to false, until we remove the PMM reverse
// mapping code.
parent_gpu->access_counters_can_use_physical_addresses = false;
}
void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
@@ -1042,36 +1106,15 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry)
UVM_DBG_PRINT(" timestamp: %llu\n", entry->timestamp);
}
const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type)
{
BUILD_BUG_ON(UVM_ACCESS_COUNTER_TYPE_MAX != 2);
switch (access_counter_type) {
UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MIMC);
UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MOMC);
UVM_ENUM_STRING_DEFAULT();
}
}
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry)
{
if (!entry->address.is_virtual) {
UVM_DBG_PRINT("physical address: {0x%llx:%s}\n",
entry->address.address,
uvm_aperture_string(entry->address.aperture));
}
else {
UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address.address);
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n",
entry->virtual_info.instance_ptr.address,
uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
UVM_DBG_PRINT(" mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->virtual_info.mmu_engine_type));
UVM_DBG_PRINT(" mmu_engine_id %u\n", entry->virtual_info.mmu_engine_id);
UVM_DBG_PRINT(" ve_id %u\n", entry->virtual_info.ve_id);
}
UVM_DBG_PRINT(" is_virtual %u\n", entry->address.is_virtual);
UVM_DBG_PRINT(" counter_type %s\n", uvm_access_counter_type_string(entry->counter_type));
UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address);
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n",
entry->instance_ptr.address,
uvm_aperture_string(entry->instance_ptr.aperture));
UVM_DBG_PRINT(" mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->mmu_engine_type));
UVM_DBG_PRINT(" mmu_engine_id %u\n", entry->mmu_engine_id);
UVM_DBG_PRINT(" ve_id %u\n", entry->ve_id);
UVM_DBG_PRINT(" counter_value %u\n", entry->counter_value);
UVM_DBG_PRINT(" subgranularity 0x%08x\n", entry->sub_granularity);
UVM_DBG_PRINT(" bank %u\n", entry->bank);

View File

@@ -494,6 +494,7 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU64 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell(NvU64 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell_integrated(NvU64 big_page_size);
void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
@@ -686,54 +687,72 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry);
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry);
// Access counters
typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters, NvU32 get);
// Parse the entry on the given buffer index. This also clears the valid bit of
// the entry in the buffer.
typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_parent_gpu_t *parent_gpu,
typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_access_counter_buffer_t *access_counters,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry);
typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_access_counter_buffer_t *access_counters,
NvU32 index);
typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_access_counter_buffer_t *access_counters,
NvU32 index);
typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push);
typedef void (*uvm_hal_access_counter_clear_type_t)(uvm_push_t *push, uvm_access_counter_type_t type);
typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
typedef uvm_access_counter_clear_op_t
(*uvm_hal_access_counter_query_clear_op_t)(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries);
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 get);
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry);
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 index);
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 index);
NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push);
void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type);
void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
uvm_access_counter_clear_op_t
uvm_hal_maxwell_access_counter_query_clear_op_unsupported(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries);
void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry);
bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push);
void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type);
void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
void uvm_hal_turing_clear_access_counter_notifications(uvm_access_counter_buffer_t *access_counters, NvU32 get);
void uvm_hal_turing_access_counter_buffer_parse_entry(uvm_access_counter_buffer_t *access_counters,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry);
bool uvm_hal_turing_access_counter_buffer_entry_is_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
void uvm_hal_turing_access_counter_buffer_entry_clear_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push);
void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
uvm_access_counter_clear_op_t
uvm_hal_turing_access_counter_query_clear_op(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries);
uvm_access_counter_clear_op_t
uvm_hal_blackwell_access_counter_query_clear_op_gb100(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries);
uvm_access_counter_clear_op_t
uvm_hal_blackwell_access_counter_query_clear_op_gb20x(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries);
// The source and destination addresses must be 16-byte aligned. Note that the
// best performance is achieved with 256-byte alignment. The decrypt size must
@@ -786,8 +805,8 @@ struct uvm_host_hal_struct
uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_method;
uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register;
uvm_hal_access_counter_clear_all_t access_counter_clear_all;
uvm_hal_access_counter_clear_type_t access_counter_clear_type;
uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted;
uvm_hal_access_counter_query_clear_op_t access_counter_query_clear_op;
uvm_hal_get_time_t get_time;
};
@@ -866,7 +885,8 @@ struct uvm_sec2_hal_struct
typedef struct
{
// id is either a hardware class or GPU architecture
// TODO: BUG 5044266: the chip ops should be separated from the arch ops.
// id is either a hardware class, a chip or a GPU architecture
NvU32 id;
NvU32 parent_id;
union
@@ -877,7 +897,7 @@ typedef struct
// ce_ops: id is a hardware class
uvm_ce_hal_t ce_ops;
// arch_ops: id is an architecture
// arch_ops: id is an architecture or a chip
uvm_arch_hal_t arch_ops;
// fault_buffer_ops: id is an architecture

View File

@@ -473,67 +473,39 @@ static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)
typedef enum
{
UVM_ACCESS_COUNTER_TYPE_MIMC = 0,
UVM_ACCESS_COUNTER_TYPE_MOMC,
UVM_ACCESS_COUNTER_TYPE_MAX,
} uvm_access_counter_type_t;
const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type);
UVM_ACCESS_COUNTER_CLEAR_OP_NONE = 0,
UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED,
UVM_ACCESS_COUNTER_CLEAR_OP_ALL
} uvm_access_counter_clear_op_t;
struct uvm_access_counter_buffer_entry_struct
{
// Whether this counter refers to outbound accesses to remote GPUs or
// sysmem (MIMC), or it refers to inbound accesses from CPU or a non-peer
// GPU (whose accesses are routed through the CPU, too) to vidmem (MOMC)
uvm_access_counter_type_t counter_type;
// Address of the region for which a notification was sent
uvm_gpu_address_t address;
NvU64 address;
union
{
// These fields are only valid if address.is_virtual is true
struct
{
// Instance pointer of one of the channels in the TSG that triggered
// the notification.
uvm_gpu_phys_address_t instance_ptr;
// Instance pointer of one of the channels in the TSG that triggered
// the notification.
uvm_gpu_phys_address_t instance_ptr;
uvm_mmu_engine_type_t mmu_engine_type;
uvm_mmu_engine_type_t mmu_engine_type;
NvU32 mmu_engine_id;
NvU32 mmu_engine_id;
// Identifier of the subcontext that performed the memory accesses
// that triggered the notification. This value, combined with the
// instance_ptr, is needed to obtain the GPU VA space of the process
// that triggered the notification.
NvU32 ve_id;
// Identifier of the subcontext that performed the memory accesses
// that triggered the notification. This value, combined with the
// instance_ptr, is needed to obtain the GPU VA space of the process
// that triggered the notification.
NvU32 ve_id;
// VA space for the address that triggered the notification
uvm_va_space_t *va_space;
} virtual_info;
// VA space for the address that triggered the notification
uvm_va_space_t *va_space;
// These fields are only valid if address.is_virtual is false
struct
{
// Processor id where data is resident
//
// Although this information is not tied to a VA space, we can use
// a regular processor id because P2P is not allowed between
// partitioned GPUs.
uvm_processor_id_t resident_id;
} physical_info;
};
// This is the GPU that triggered the notification. Note that physical
// address based notifications are only supported on non-MIG-capable GPUs.
// This is the GPU that triggered the notification.
uvm_gpu_t *gpu;
// Number of times the tracked region was accessed since the last time it
// was cleared. Counter values saturate at the maximum value supported by
// the GPU (2^16 - 1 in Volta)
// the GPU (2^16 - 1 on Turing)
NvU32 counter_value;
// When the granularity of the tracked regions is greater than 64KB, the

View File

@@ -34,8 +34,9 @@ MODULE_PARM_DESC(uvm_disable_hmm,
"enabled if is not supported in this driver build "
"configuration, or if ATS settings conflict with HMM.");
#else
// So far, we've only tested HMM on x86_64, so disable it by default everywhere
// else.
// TODO: Bug 4103580: UVM: HMM: implement HMM support on ARM64 (aarch64)
// So far, we've only tested HMM on x86_64 and aarch64 and it is broken on
// aarch64 so disable it by default everywhere except x86_64.
static bool uvm_disable_hmm = true;
MODULE_PARM_DESC(uvm_disable_hmm,
"Force-disable HMM functionality in the UVM driver. "
@@ -186,7 +187,7 @@ static NV_STATUS hmm_copy_devmem_page(struct page *dst_page, struct page *src_pa
if (status != NV_OK)
goto out;
status = uvm_parent_gpu_map_cpu_pages(gpu->parent, dst_page, PAGE_SIZE, &dma_addr);
status = uvm_gpu_map_cpu_page(gpu, dst_page, &dma_addr);
if (status != NV_OK)
goto out_unmap_gpu;
@@ -1991,7 +1992,7 @@ static void fill_dst_pfn(uvm_va_block_t *va_block,
dpage = pfn_to_page(pfn);
UVM_ASSERT(is_device_private_page(dpage));
UVM_ASSERT(dpage->pgmap->owner == &g_uvm_global);
UVM_ASSERT(page_pgmap(dpage)->owner == &g_uvm_global);
hmm_mark_gpu_chunk_referenced(va_block, gpu, gpu_chunk);
UVM_ASSERT(!page_count(dpage));
@@ -2437,6 +2438,39 @@ static void hmm_release_atomic_pages(uvm_va_block_t *va_block,
}
}
static int hmm_make_device_exclusive_range(struct mm_struct *mm,
unsigned long start,
unsigned long end,
struct page **pages)
{
#if NV_IS_EXPORT_SYMBOL_PRESENT_make_device_exclusive
unsigned long addr;
int npages = 0;
for (addr = start; addr < end; addr += PAGE_SIZE) {
struct folio *folio;
struct page *page;
page = make_device_exclusive(mm, addr, &g_uvm_global, &folio);
if (IS_ERR(page)) {
while (npages) {
page = pages[--npages];
unlock_page(page);
put_page(page);
}
npages = PTR_ERR(page);
break;
}
pages[npages++] = page;
}
return npages;
#else
return make_device_exclusive_range(mm, start, end, pages, &g_uvm_global);
#endif
}
static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
uvm_va_block_t *va_block,
uvm_va_block_retry_t *va_block_retry,
@@ -2490,11 +2524,10 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
uvm_mutex_unlock(&va_block->lock);
npages = make_device_exclusive_range(service_context->block_context->mm,
npages = hmm_make_device_exclusive_range(service_context->block_context->mm,
uvm_va_block_cpu_page_address(va_block, region.first),
uvm_va_block_cpu_page_address(va_block, region.outer - 1) + PAGE_SIZE,
pages + region.first,
&g_uvm_global);
pages + region.first);
uvm_mutex_lock(&va_block->lock);

View File

@@ -50,12 +50,10 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_hopper_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
(sizeof(dummy->fault_source.utlb_id) * 8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Hopper covers 64 PB and that's the minimum
@@ -97,10 +95,6 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->non_replayable_faults_supported = true;
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;

View File

@@ -393,9 +393,13 @@ bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push,
bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
const bool peer_copy = uvm_gpu_address_is_peer(gpu, dst) || uvm_gpu_address_is_peer(gpu, src);
if (push->channel && peer_copy && !uvm_channel_is_p2p(push->channel)) {
if (uvm_gpu_address_is_peer(gpu, src)) {
UVM_ERR_PRINT("Peer copy from peer address (0x%llx) is not allowed!", src.address);
return false;
}
if (push->channel && uvm_gpu_address_is_peer(gpu, dst) && !uvm_channel_is_p2p(push->channel)) {
UVM_ERR_PRINT("Peer copy from address (0x%llx) to address (0x%llx) should use designated p2p channels!",
src.address,
dst.address);

View File

@@ -212,7 +212,13 @@ static NvU64 make_pte_hopper(uvm_aperture_t aperture, NvU64 address, uvm_prot_t
static NvU64 make_sked_reflected_pte_hopper(void)
{
// On discrete GPUs, SKED Reflected PTEs may use either the local aperture
// or the system non coherent aperture. However, integrated GPUs may only
// use the system non-coherent aperture. We always use the system
// non-coherent aperture as that is common to both discrete and integrated
// GPUs.
return HWCONST64(_MMU_VER3, PTE, VALID, TRUE) |
HWCONST64(_MMU_VER3, PTE, APERTURE, SYSTEM_NON_COHERENT_MEMORY) |
HWVALUE64(_MMU_VER3, PTE, PCF, pte_pcf(UVM_PROT_READ_WRITE_ATOMIC, UVM_MMU_PTE_FLAGS_NONE)) |
HWVALUE64(_MMU_VER3, PTE, KIND, NV_MMU_PTE_KIND_SMSKED_MESSAGE);
}
@@ -323,11 +329,6 @@ static NvU32 pde_pcf(bool valid, pde_type_t pde_type, uvm_page_directory_t *dir,
if (!g_uvm_global.ats.enabled)
return pcf[pde_type][ATS_ALLOWED];
// We assume all supported ATS platforms use canonical form address.
// See comments in uvm_gpu.c:uvm_gpu_can_address() and in
// uvm_mmu.c:page_tree_ats_init();
UVM_ASSERT(uvm_platform_uses_canonical_form_address());
// Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
// ATS and GMMU page tables. For managed memory we need to prevent this
// parallel lookup since we would not get any GPU fault if the CPU has

View File

@@ -526,25 +526,6 @@ typedef struct
NV_STATUS rmStatus; // OUT
} UVM_MEM_MAP_PARAMS;
//
// UvmDebugAccessMemory
//
#define UVM_DEBUG_ACCESS_MEMORY UVM_IOCTL_BASE(36)
typedef struct
{
#ifdef __linux__
NvS32 sessionIndex; // IN
#endif
NvU64 baseAddress NV_ALIGN_BYTES(8); // IN
NvU64 sizeInBytes NV_ALIGN_BYTES(8); // IN
NvU32 accessType; // IN (UvmDebugAccessType)
NvU64 buffer NV_ALIGN_BYTES(8); // IN/OUT
NvBool isBitmaskSet; // OUT
NvU64 bitmask NV_ALIGN_BYTES(8); // IN/OUT
NV_STATUS rmStatus; // OUT
} UVM_DEBUG_ACCESS_MEMORY_PARAMS;
//
// UvmRegisterGpu
//
@@ -1009,20 +990,35 @@ typedef struct
//
#define UVM_POPULATE_PAGEABLE UVM_IOCTL_BASE(71)
// Allow population of managed ranges.
//
// The UVM driver must have builtin tests enabled for the API to use the
// following two flags.
// Allow population of managed ranges. The goal is to validate that it is
// possible to populate pageable ranges backed by VMAs with the VM_MIXEDMAP or
// VM_DONTEXPAND special flags set. But since there is no portable way to force
// allocation of such memory from user space, and it is not safe to change the
// flags of an already-created VMA from kernel space, we take advantage of the
// fact that managed ranges have both special flags set at creation time (see
// uvm_mmap).
#define UVM_POPULATE_PAGEABLE_FLAG_ALLOW_MANAGED 0x00000001
// By default UVM_POPULATE_PAGEABLE returns an error if the destination vma
// does not have read permission. This flag skips that check.
#define UVM_POPULATE_PAGEABLE_FLAG_SKIP_PROT_CHECK 0x00000002
#define UVM_POPULATE_PAGEABLE_FLAGS_TEST_ALL (UVM_POPULATE_PAGEABLE_FLAG_ALLOW_MANAGED | \
// By default UVM_POPULATE_PAGEABLE returns an error if the destination vma
// is VM_IO or VM_PFNMAP. This flag skips that check.
#define UVM_POPULATE_PAGEABLE_FLAG_ALLOW_SPECIAL 0x00000004
// These flags are used internally within the driver and are not allowed from
// user space.
#define UVM_POPULATE_PAGEABLE_FLAGS_INTERNAL UVM_POPULATE_PAGEABLE_FLAG_ALLOW_SPECIAL
// These flags are allowed from user space only when builtin tests are enabled.
// Some of them may also be used internally within the driver in non-test use
// cases.
#define UVM_POPULATE_PAGEABLE_FLAGS_TEST (UVM_POPULATE_PAGEABLE_FLAG_ALLOW_MANAGED | \
UVM_POPULATE_PAGEABLE_FLAG_SKIP_PROT_CHECK)
#define UVM_POPULATE_PAGEABLE_FLAGS_ALL UVM_POPULATE_PAGEABLE_FLAGS_TEST_ALL
#define UVM_POPULATE_PAGEABLE_FLAGS_ALL (UVM_POPULATE_PAGEABLE_FLAGS_INTERNAL | \
UVM_POPULATE_PAGEABLE_FLAGS_TEST)
typedef struct
{
@@ -1142,7 +1138,6 @@ typedef struct
NV_STATUS rmStatus; // OUT
} UVM_IS_8_SUPPORTED_PARAMS;
#ifdef __cplusplus
}
#endif

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2020 NVIDIA Corporation
Copyright (c) 2016-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -111,13 +111,13 @@ void uvm_kvmalloc_exit(void)
return;
if (atomic_long_read(&g_uvm_leak_checker.bytes_allocated) > 0) {
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "Memory leak of %lu bytes detected.%s\n",
atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
UVM_INFO_PRINT("Memory leak of %lu bytes detected.%s\n",
atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
" insmod with uvm_leak_checker=2 for detailed information." :
"");
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
if (g_uvm_global.unload_state.ptr)
*g_uvm_global.unload_state.ptr |= UVM_TEST_UNLOAD_STATE_MEMORY_LEAK;
@@ -129,12 +129,12 @@ void uvm_kvmalloc_exit(void)
uvm_rb_tree_for_each_safe(node, next, &g_uvm_leak_checker.allocation_info) {
uvm_kvmalloc_info_t *info = container_of(node, uvm_kvmalloc_info_t, node);
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX " Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
uvm_kvsize((void *)((uintptr_t)info->node.key)),
kbasename(info->file),
info->line,
info->function,
info->node.key);
UVM_INFO_PRINT(" Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
uvm_kvsize((void *)((uintptr_t)info->node.key)),
kbasename(info->file),
info->line,
info->function,
info->node.key);
// Free so we don't keep eating up memory while debugging. Note that
// this also removes the entry from the table, frees info, and drops

Some files were not shown because too many files have changed in this diff Show More