mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-27 11:39:46 +00:00
Compare commits
5 Commits
570.124.04
...
575.51.03
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e00332b05f | ||
|
|
4159579888 | ||
|
|
e8113f665d | ||
|
|
c5e439fea4 | ||
|
|
25bef4626e |
@@ -86,7 +86,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.124.04\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"575.51.03\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
|
||||
@@ -71,6 +71,31 @@ else
|
||||
CC ?= cc
|
||||
LD ?= ld
|
||||
OBJDUMP ?= objdump
|
||||
AWK ?= awk
|
||||
# Bake the following awk program in a string. The program is needed to add C++
|
||||
# to the languages excluded from BTF generation.
|
||||
#
|
||||
# Also, unconditionally return success (0) from the awk program, rather than
|
||||
# propagating pahole's return status (with 'exit system(pahole_cmd)'), to
|
||||
# workaround an DW_TAG_rvalue_reference_type error in
|
||||
# kernel/nvidia-modeset.ko.
|
||||
#
|
||||
# BEGIN {
|
||||
# pahole_cmd = "pahole"
|
||||
# for (i = 1; i < ARGC; i++) {
|
||||
# if (ARGV[i] ~ /--lang_exclude=/) {
|
||||
# pahole_cmd = pahole_cmd sprintf(" %s,c++", ARGV[i])
|
||||
# } else {
|
||||
# pahole_cmd = pahole_cmd sprintf(" %s", ARGV[i])
|
||||
# }
|
||||
# }
|
||||
# system(pahole_cmd)
|
||||
# }
|
||||
PAHOLE_AWK_PROGRAM = BEGIN { pahole_cmd = \"pahole\"; for (i = 1; i < ARGC; i++) { if (ARGV[i] ~ /--lang_exclude=/) { pahole_cmd = pahole_cmd sprintf(\" %s,c++\", ARGV[i]); } else { pahole_cmd = pahole_cmd sprintf(\" %s\", ARGV[i]); } } system(pahole_cmd); }
|
||||
# If scripts/pahole-flags.sh is not present in the kernel tree, add PAHOLE and
|
||||
# PAHOLE_AWK_PROGRAM assignments to PAHOLE_VARIABLES; otherwise assign the
|
||||
# empty string to PAHOLE_VARIABLES.
|
||||
PAHOLE_VARIABLES=$(if $(wildcard $(KERNEL_SOURCES)/scripts/pahole-flags.sh),,"PAHOLE=$(AWK) '$(PAHOLE_AWK_PROGRAM)'")
|
||||
|
||||
ifndef ARCH
|
||||
ARCH := $(shell uname -m | sed -e 's/i.86/i386/' \
|
||||
@@ -86,7 +111,7 @@ else
|
||||
ifneq ($(filter $(ARCH),i386 x86_64),)
|
||||
KERNEL_ARCH = x86
|
||||
else
|
||||
ifeq ($(filter $(ARCH),arm64 powerpc),)
|
||||
ifeq ($(filter $(ARCH),arm64 powerpc riscv),)
|
||||
$(error Unsupported architecture $(ARCH))
|
||||
endif
|
||||
endif
|
||||
@@ -112,7 +137,8 @@ else
|
||||
|
||||
.PHONY: modules module clean clean_conftest modules_install
|
||||
modules clean modules_install:
|
||||
@$(MAKE) "LD=$(LD)" "CC=$(CC)" "OBJDUMP=$(OBJDUMP)" $(KBUILD_PARAMS) $@
|
||||
@$(MAKE) "LD=$(LD)" "CC=$(CC)" "OBJDUMP=$(OBJDUMP)" \
|
||||
$(PAHOLE_VARIABLES) $(KBUILD_PARAMS) $@
|
||||
@if [ "$@" = "modules" ]; then \
|
||||
for module in $(NV_KERNEL_MODULES); do \
|
||||
if [ -x split-object-file.sh ]; then \
|
||||
|
||||
35
kernel-open/common/inc/dce_rm_client_ipc.h
Normal file
35
kernel-open/common/inc/dce_rm_client_ipc.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef _OS_DCE_CLIENT_IPC_H_
|
||||
#define _OS_DCE_CLIENT_IPC_H_
|
||||
|
||||
// RM IPC Client Types
|
||||
|
||||
#define DCE_CLIENT_RM_IPC_TYPE_SYNC 0x0
|
||||
#define DCE_CLIENT_RM_IPC_TYPE_EVENT 0x1
|
||||
#define DCE_CLIENT_RM_IPC_TYPE_MAX 0x2
|
||||
|
||||
void dceclientHandleAsyncRpcCallback(NvU32 handle, NvU32 interfaceType,
|
||||
NvU32 msgLength, void *data,
|
||||
void *usrCtx);
|
||||
#endif
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -36,8 +36,7 @@
|
||||
#include "nv-timer.h"
|
||||
#include "nv-time.h"
|
||||
#include "nv-chardev-numbers.h"
|
||||
|
||||
#define NV_KERNEL_NAME "Linux"
|
||||
#include "nv-platform.h"
|
||||
|
||||
#ifndef AUTOCONF_INCLUDED
|
||||
#if defined(NV_GENERATED_AUTOCONF_H_PRESENT)
|
||||
@@ -239,7 +238,7 @@ NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32,
|
||||
#undef NV_SET_PAGES_UC_PRESENT
|
||||
#endif
|
||||
|
||||
#if !defined(NVCPU_AARCH64) && !defined(NVCPU_PPC64LE) && !defined(NVCPU_RISCV64)
|
||||
#if !defined(NVCPU_AARCH64) && !defined(NVCPU_RISCV64)
|
||||
#if !defined(NV_SET_MEMORY_UC_PRESENT) && !defined(NV_SET_PAGES_UC_PRESENT)
|
||||
#error "This driver requires the ability to change memory types!"
|
||||
#endif
|
||||
@@ -345,8 +344,6 @@ extern int nv_pat_mode;
|
||||
|
||||
#define NV_PAGE_COUNT(page) \
|
||||
((unsigned int)page_count(page))
|
||||
#define NV_GET_PAGE_COUNT(page_ptr) \
|
||||
(NV_PAGE_COUNT(NV_GET_PAGE_STRUCT(page_ptr->phys_addr)))
|
||||
#define NV_GET_PAGE_FLAGS(page_ptr) \
|
||||
(NV_GET_PAGE_STRUCT(page_ptr->phys_addr)->flags)
|
||||
|
||||
@@ -405,7 +402,7 @@ typedef enum
|
||||
NV_MEMORY_TYPE_DEVICE_MMIO, /* All kinds of MMIO referred by NVRM e.g. BARs and MCFG of device */
|
||||
} nv_memory_type_t;
|
||||
|
||||
#if defined(NVCPU_AARCH64) || defined(NVCPU_PPC64LE) || defined(NVCPU_RISCV64)
|
||||
#if defined(NVCPU_AARCH64) || defined(NVCPU_RISCV64)
|
||||
#define NV_ALLOW_WRITE_COMBINING(mt) 1
|
||||
#elif defined(NVCPU_X86_64)
|
||||
#if defined(NV_ENABLE_PAT_SUPPORT)
|
||||
@@ -463,10 +460,7 @@ static inline void *nv_vmalloc(unsigned long size)
|
||||
#else
|
||||
void *ptr = __vmalloc(size, GFP_KERNEL);
|
||||
#endif
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
}
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -483,10 +477,7 @@ static inline void *nv_ioremap(NvU64 phys, NvU64 size)
|
||||
#else
|
||||
void *ptr = ioremap(phys, size);
|
||||
#endif
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
}
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -502,29 +493,12 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
|
||||
ptr = ioremap_cache_shared(phys, size);
|
||||
#elif defined(NV_IOREMAP_CACHE_PRESENT)
|
||||
ptr = ioremap_cache(phys, size);
|
||||
#elif defined(NVCPU_PPC64LE)
|
||||
//
|
||||
// ioremap_cache() has been only implemented correctly for ppc64le with
|
||||
// commit f855b2f544d6 in April 2017 (kernel 4.12+). Internally, the kernel
|
||||
// does provide a default implementation of ioremap_cache() that would be
|
||||
// incorrect for our use (creating an uncached mapping) before the
|
||||
// referenced commit, but that implementation is not exported and the
|
||||
// NV_IOREMAP_CACHE_PRESENT conftest doesn't pick it up, and we end up in
|
||||
// this #elif branch.
|
||||
//
|
||||
// At the same time, ppc64le have supported ioremap_prot() since May 2011
|
||||
// (commit 40f1ce7fb7e8, kernel 3.0+) and that covers all kernels we
|
||||
// support on power.
|
||||
//
|
||||
ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
|
||||
#else
|
||||
return nv_ioremap(phys, size);
|
||||
#endif
|
||||
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
}
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -539,10 +513,8 @@ static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
|
||||
return nv_ioremap_nocache(phys, size);
|
||||
#endif
|
||||
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
}
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -562,22 +534,19 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
#define NV_KMALLOC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kmalloc(size, NV_GFP_KERNEL); \
|
||||
if (ptr) \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KZALLOC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kzalloc(size, NV_GFP_KERNEL); \
|
||||
if (ptr) \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KMALLOC_ATOMIC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kmalloc(size, NV_GFP_ATOMIC); \
|
||||
if (ptr) \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#if defined(__GFP_RETRY_MAYFAIL)
|
||||
@@ -591,8 +560,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
#define NV_KMALLOC_NO_OOM(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kmalloc(size, NV_GFP_NO_OOM); \
|
||||
if (ptr) \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KFREE(ptr, size) \
|
||||
@@ -625,9 +593,9 @@ static inline pgprot_t nv_sme_clr(pgprot_t prot)
|
||||
#endif // __sme_clr
|
||||
}
|
||||
|
||||
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
|
||||
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot)
|
||||
{
|
||||
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
|
||||
pgprot_t prot = __pgprot(pgprot_val(vm_prot));
|
||||
|
||||
#if defined(pgprot_decrypted)
|
||||
return pgprot_decrypted(prot);
|
||||
@@ -648,41 +616,6 @@ static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static inline NvUPtr nv_vmap(struct page **pages, NvU32 page_count,
|
||||
NvBool cached, NvBool unencrypted)
|
||||
{
|
||||
void *ptr;
|
||||
pgprot_t prot = PAGE_KERNEL;
|
||||
#if defined(NVCPU_X86_64)
|
||||
#if defined(PAGE_KERNEL_NOENC)
|
||||
if (unencrypted)
|
||||
{
|
||||
prot = cached ? nv_adjust_pgprot(PAGE_KERNEL_NOENC, 0) :
|
||||
nv_adjust_pgprot(NV_PAGE_KERNEL_NOCACHE_NOENC, 0);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
prot = cached ? PAGE_KERNEL : PAGE_KERNEL_NOCACHE;
|
||||
}
|
||||
#elif defined(NVCPU_AARCH64)
|
||||
prot = cached ? PAGE_KERNEL : NV_PGPROT_UNCACHED(PAGE_KERNEL);
|
||||
#endif
|
||||
/* All memory cached in PPC64LE; can't honor 'cached' input. */
|
||||
ptr = vmap(pages, page_count, VM_MAP, prot);
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, page_count * PAGE_SIZE);
|
||||
}
|
||||
return (NvUPtr)ptr;
|
||||
}
|
||||
|
||||
static inline void nv_vunmap(NvUPtr vaddr, NvU32 page_count)
|
||||
{
|
||||
vunmap((void *)vaddr);
|
||||
NV_MEMDBG_REMOVE((void *)vaddr, page_count * PAGE_SIZE);
|
||||
}
|
||||
|
||||
#if defined(NV_GET_NUM_PHYSPAGES_PRESENT)
|
||||
#define NV_NUM_PHYSPAGES get_num_physpages()
|
||||
#else
|
||||
@@ -707,6 +640,47 @@ static inline void nv_vunmap(NvUPtr vaddr, NvU32 page_count)
|
||||
|
||||
#define NV_NUM_CPUS() num_possible_cpus()
|
||||
|
||||
#define NV_HAVE_MEMORY_ENCRYPT_DECRYPT 0
|
||||
|
||||
#if defined(NVCPU_X86_64) && \
|
||||
NV_IS_EXPORT_SYMBOL_GPL_set_memory_encrypted && \
|
||||
NV_IS_EXPORT_SYMBOL_GPL_set_memory_decrypted
|
||||
#undef NV_HAVE_MEMORY_ENCRYPT_DECRYPT
|
||||
#define NV_HAVE_MEMORY_ENCRYPT_DECRYPT 1
|
||||
#endif
|
||||
|
||||
static inline void nv_set_memory_decrypted_zeroed(NvBool unencrypted,
|
||||
unsigned long virt_addr,
|
||||
int num_native_pages,
|
||||
size_t size)
|
||||
{
|
||||
if (virt_addr == 0)
|
||||
return;
|
||||
|
||||
#if NV_HAVE_MEMORY_ENCRYPT_DECRYPT
|
||||
if (unencrypted)
|
||||
{
|
||||
set_memory_decrypted(virt_addr, num_native_pages);
|
||||
memset((void *)virt_addr, 0, size);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_set_memory_encrypted(NvBool unencrypted,
|
||||
unsigned long virt_addr,
|
||||
int num_native_pages)
|
||||
{
|
||||
if (virt_addr == 0)
|
||||
return;
|
||||
|
||||
#if NV_HAVE_MEMORY_ENCRYPT_DECRYPT
|
||||
if (unencrypted)
|
||||
{
|
||||
set_memory_encrypted(virt_addr, num_native_pages);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
|
||||
{
|
||||
#if defined(NV_PHYS_TO_DMA_PRESENT)
|
||||
@@ -887,94 +861,42 @@ typedef void irqreturn_t;
|
||||
(((addr) >> NV_RM_PAGE_SHIFT) == \
|
||||
(((addr) + (size) - 1) >> NV_RM_PAGE_SHIFT)))
|
||||
|
||||
/*
|
||||
* The kernel may have a workaround for this, by providing a method to isolate
|
||||
* a single 4K page in a given mapping.
|
||||
*/
|
||||
#if (PAGE_SIZE > NV_RM_PAGE_SIZE) && defined(NVCPU_PPC64LE) && defined(NV_PAGE_4K_PFN)
|
||||
#define NV_4K_PAGE_ISOLATION_PRESENT
|
||||
#define NV_4K_PAGE_ISOLATION_MMAP_ADDR(addr) \
|
||||
((NvP64)((void*)(((addr) >> NV_RM_PAGE_SHIFT) << PAGE_SHIFT)))
|
||||
#define NV_4K_PAGE_ISOLATION_MMAP_LEN(size) PAGE_SIZE
|
||||
#define NV_4K_PAGE_ISOLATION_ACCESS_START(addr) \
|
||||
((NvP64)((void*)((addr) & ~NV_RM_PAGE_MASK)))
|
||||
#define NV_4K_PAGE_ISOLATION_ACCESS_LEN(addr, size) \
|
||||
((((addr) & NV_RM_PAGE_MASK) + size + NV_RM_PAGE_MASK) & \
|
||||
~NV_RM_PAGE_MASK)
|
||||
#define NV_PROT_4K_PAGE_ISOLATION NV_PAGE_4K_PFN
|
||||
#endif
|
||||
|
||||
static inline int nv_remap_page_range(struct vm_area_struct *vma,
|
||||
unsigned long virt_addr, NvU64 phys_addr, NvU64 size, pgprot_t prot)
|
||||
{
|
||||
int ret = -1;
|
||||
|
||||
#if defined(NV_4K_PAGE_ISOLATION_PRESENT) && defined(NV_PROT_4K_PAGE_ISOLATION)
|
||||
if ((size == PAGE_SIZE) &&
|
||||
((pgprot_val(prot) & NV_PROT_4K_PAGE_ISOLATION) != 0))
|
||||
{
|
||||
/*
|
||||
* remap_4k_pfn() hardcodes the length to a single OS page, and checks
|
||||
* whether applying the page isolation workaround will cause PTE
|
||||
* corruption (in which case it will fail, and this is an unsupported
|
||||
* configuration).
|
||||
*/
|
||||
#if defined(NV_HASH__REMAP_4K_PFN_PRESENT)
|
||||
ret = hash__remap_4k_pfn(vma, virt_addr, (phys_addr >> PAGE_SHIFT), prot);
|
||||
#else
|
||||
ret = remap_4k_pfn(vma, virt_addr, (phys_addr >> PAGE_SHIFT), prot);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
ret = remap_pfn_range(vma, virt_addr, (phys_addr >> PAGE_SHIFT), size,
|
||||
return remap_pfn_range(vma, virt_addr, (phys_addr >> PAGE_SHIFT), size,
|
||||
prot);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int nv_io_remap_page_range(struct vm_area_struct *vma,
|
||||
NvU64 phys_addr, NvU64 size, NvU32 extra_prot, NvU64 start)
|
||||
NvU64 phys_addr, NvU64 size, NvU64 start)
|
||||
{
|
||||
int ret = -1;
|
||||
#if !defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL)
|
||||
ret = nv_remap_page_range(vma, start, phys_addr, size,
|
||||
nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
|
||||
nv_adjust_pgprot(vma->vm_page_prot));
|
||||
#else
|
||||
ret = io_remap_pfn_range(vma, start, (phys_addr >> PAGE_SHIFT),
|
||||
size, nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
|
||||
size, nv_adjust_pgprot(vma->vm_page_prot));
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline vm_fault_t nv_insert_pfn(struct vm_area_struct *vma,
|
||||
NvU64 virt_addr, NvU64 pfn, NvU32 extra_prot)
|
||||
NvU64 virt_addr, NvU64 pfn)
|
||||
{
|
||||
/*
|
||||
* vm_insert_pfn{,_prot} replaced with vmf_insert_pfn{,_prot} in Linux 4.20
|
||||
*/
|
||||
#if defined(NV_VMF_INSERT_PFN_PROT_PRESENT)
|
||||
return vmf_insert_pfn_prot(vma, virt_addr, pfn,
|
||||
__pgprot(pgprot_val(vma->vm_page_prot) | extra_prot));
|
||||
__pgprot(pgprot_val(vma->vm_page_prot)));
|
||||
#else
|
||||
int ret = -EINVAL;
|
||||
/*
|
||||
* Only PPC64LE (NV_4K_PAGE_ISOLATION_PRESENT) requires extra_prot to be
|
||||
* used when remapping.
|
||||
*
|
||||
* vm_insert_pfn_prot() was added in Linux 4.4, whereas POWER9 support
|
||||
* was added in Linux 4.8.
|
||||
*
|
||||
* Rather than tampering with the vma to make use of extra_prot with
|
||||
* vm_insert_pfn() on older kernels, for now, just fail in this case, as
|
||||
* it's not expected to be used currently.
|
||||
*/
|
||||
#if defined(NV_VM_INSERT_PFN_PROT_PRESENT)
|
||||
ret = vm_insert_pfn_prot(vma, virt_addr, pfn,
|
||||
__pgprot(pgprot_val(vma->vm_page_prot) | extra_prot));
|
||||
#elif !defined(NV_4K_PAGE_ISOLATION_PRESENT)
|
||||
__pgprot(pgprot_val(vma->vm_page_prot)));
|
||||
#else
|
||||
ret = vm_insert_pfn(vma, virt_addr, pfn);
|
||||
#endif
|
||||
switch (ret)
|
||||
@@ -1160,11 +1082,6 @@ static inline void nv_kmem_cache_free_stack(nvidia_stack_t *stack)
|
||||
typedef struct nvidia_pte_s {
|
||||
NvU64 phys_addr;
|
||||
unsigned long virt_addr;
|
||||
NvU64 dma_addr;
|
||||
#ifdef CONFIG_XEN
|
||||
unsigned int guest_pfn;
|
||||
#endif
|
||||
unsigned int page_count;
|
||||
} nvidia_pte_t;
|
||||
|
||||
#if defined(CONFIG_DMA_SHARED_BUFFER)
|
||||
@@ -1205,6 +1122,7 @@ typedef struct nv_alloc_s {
|
||||
NvS32 node_id; /* Node id for memory allocation when node is set in flags */
|
||||
void *import_priv;
|
||||
struct sg_table *import_sgt;
|
||||
dma_addr_t dma_handle; /* dma handle used by dma_alloc_coherent(), dma_free_coherent() */
|
||||
} nv_alloc_t;
|
||||
|
||||
/**
|
||||
@@ -1430,6 +1348,23 @@ struct os_wait_queue {
|
||||
struct completion q;
|
||||
};
|
||||
|
||||
/*!
|
||||
* @brief Mapping between clock names and clock handles.
|
||||
*
|
||||
* TEGRA_DISP_WHICH_CLK_MAX: maximum number of clocks
|
||||
* defined in below enum.
|
||||
*
|
||||
* arch/nvalloc/unix/include/nv.h
|
||||
* enum TEGRASOC_WHICH_CLK_MAX;
|
||||
*
|
||||
*/
|
||||
typedef struct nvsoc_clks_s {
|
||||
struct {
|
||||
struct clk *handles;
|
||||
const char *clkName;
|
||||
} clk[TEGRASOC_WHICH_CLK_MAX];
|
||||
} nvsoc_clks_t;
|
||||
|
||||
/*
|
||||
* To report error in msi/msix when unhandled count reaches a threshold
|
||||
*/
|
||||
@@ -1589,6 +1524,8 @@ typedef struct nv_linux_state_s {
|
||||
nv_acpi_t* nv_acpi_object;
|
||||
#endif
|
||||
|
||||
nvsoc_clks_t soc_clk_handles;
|
||||
|
||||
/* Lock serializing ISRs for different SOC vectors */
|
||||
nv_spinlock_t soc_isr_lock;
|
||||
void *soc_bh_mutex;
|
||||
@@ -1788,12 +1725,10 @@ static inline struct kmem_cache *nv_kmem_cache_create(const char *name, unsigned
|
||||
*/
|
||||
static inline NV_STATUS nv_check_gpu_state(nv_state_t *nv)
|
||||
{
|
||||
#if !defined(NVCPU_PPC64LE)
|
||||
if (NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv))
|
||||
{
|
||||
return NV_ERR_GPU_IS_LOST;
|
||||
}
|
||||
#endif
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -26,8 +26,7 @@
|
||||
|
||||
#include "nv-linux.h"
|
||||
|
||||
#if (defined(CONFIG_X86_LOCAL_APIC) || defined(NVCPU_AARCH64) || \
|
||||
defined(NVCPU_PPC64LE)) && \
|
||||
#if (defined(CONFIG_X86_LOCAL_APIC) || defined(NVCPU_AARCH64)) && \
|
||||
(defined(CONFIG_PCI_MSI) || defined(CONFIG_PCI_USE_VECTOR))
|
||||
#define NV_LINUX_PCIE_MSI_SUPPORTED
|
||||
#endif
|
||||
|
||||
36
kernel-open/common/inc/nv-platform.h
Normal file
36
kernel-open/common/inc/nv-platform.h
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NV_PLATFORM_H
|
||||
#define NV_PLATFORM_H
|
||||
|
||||
#include "nv-linux.h"
|
||||
|
||||
irqreturn_t nvidia_isr (int, void *);
|
||||
irqreturn_t nvidia_isr_kthread_bh (int, void *);
|
||||
|
||||
#define NV_SUPPORTS_PLATFORM_DEVICE 0
|
||||
|
||||
#define NV_SUPPORTS_PLATFORM_DISPLAY_DEVICE 0
|
||||
|
||||
#endif
|
||||
@@ -41,7 +41,7 @@ void nv_procfs_remove_gpu (nv_linux_state_t *);
|
||||
|
||||
int nvidia_mmap (struct file *, struct vm_area_struct *);
|
||||
int nvidia_mmap_helper (nv_state_t *, nv_linux_file_private_t *, nvidia_stack_t *, struct vm_area_struct *, void *);
|
||||
int nv_encode_caching (pgprot_t *, NvU32, NvU32);
|
||||
int nv_encode_caching (pgprot_t *, NvU32, nv_memory_type_t);
|
||||
void nv_revoke_gpu_mappings_locked(nv_state_t *);
|
||||
|
||||
NvUPtr nv_vm_map_pages (struct page **, NvU32, NvBool, NvBool);
|
||||
|
||||
@@ -168,6 +168,15 @@ typedef enum _TEGRASOC_WHICH_CLK
|
||||
TEGRASOC_WHICH_CLK_PLLA_DISP,
|
||||
TEGRASOC_WHICH_CLK_PLLA_DISPHUB,
|
||||
TEGRASOC_WHICH_CLK_PLLA,
|
||||
TEGRASOC_WHICH_CLK_EMC,
|
||||
TEGRASOC_WHICH_CLK_GPU_FIRST,
|
||||
TEGRASOC_WHICH_CLK_GPU_SYS = TEGRASOC_WHICH_CLK_GPU_FIRST,
|
||||
TEGRASOC_WHICH_CLK_GPU_NVD,
|
||||
TEGRASOC_WHICH_CLK_GPU_UPROC,
|
||||
TEGRASOC_WHICH_CLK_GPU_GPC0,
|
||||
TEGRASOC_WHICH_CLK_GPU_GPC1,
|
||||
TEGRASOC_WHICH_CLK_GPU_GPC2,
|
||||
TEGRASOC_WHICH_CLK_GPU_LAST = TEGRASOC_WHICH_CLK_GPU_GPC2,
|
||||
TEGRASOC_WHICH_CLK_MAX, // TEGRASOC_WHICH_CLK_MAX is defined for boundary checks only.
|
||||
} TEGRASOC_WHICH_CLK;
|
||||
|
||||
@@ -283,7 +292,6 @@ typedef struct nv_usermap_access_params_s
|
||||
MemoryArea memArea;
|
||||
NvU64 access_start;
|
||||
NvU64 access_size;
|
||||
NvU64 remap_prot_extra;
|
||||
NvBool contig;
|
||||
NvU32 caching;
|
||||
} nv_usermap_access_params_t;
|
||||
@@ -299,7 +307,6 @@ typedef struct nv_alloc_mapping_context_s {
|
||||
MemoryArea memArea;
|
||||
NvU64 access_start;
|
||||
NvU64 access_size;
|
||||
NvU64 remap_prot_extra;
|
||||
NvU32 prot;
|
||||
NvBool valid;
|
||||
NvU32 caching;
|
||||
@@ -498,6 +505,9 @@ typedef struct nv_state_t
|
||||
NvU32 dispIsoStreamId;
|
||||
NvU32 dispNisoStreamId;
|
||||
} iommus;
|
||||
|
||||
/* Console is managed by drm drivers or NVKMS */
|
||||
NvBool client_managed_console;
|
||||
} nv_state_t;
|
||||
|
||||
#define NVFP_TYPE_NONE 0x0
|
||||
@@ -542,9 +552,9 @@ typedef struct UvmGpuNvlinkInfo_tag *nvgpuNvlinkInfo_t;
|
||||
typedef struct UvmGpuEccInfo_tag *nvgpuEccInfo_t;
|
||||
typedef struct UvmGpuFaultInfo_tag *nvgpuFaultInfo_t;
|
||||
typedef struct UvmGpuAccessCntrInfo_tag *nvgpuAccessCntrInfo_t;
|
||||
typedef struct UvmGpuAccessCntrConfig_tag *nvgpuAccessCntrConfig_t;
|
||||
typedef struct UvmGpuInfo_tag nvgpuInfo_t;
|
||||
typedef struct UvmGpuClientInfo_tag nvgpuClientInfo_t;
|
||||
typedef struct UvmGpuAccessCntrConfig_tag nvgpuAccessCntrConfig_t;
|
||||
typedef struct UvmGpuInfo_tag nvgpuInfo_t;
|
||||
typedef struct UvmGpuClientInfo_tag nvgpuClientInfo_t;
|
||||
typedef struct UvmPmaAllocationOptions_tag *nvgpuPmaAllocationOptions_t;
|
||||
typedef struct UvmPmaStatistics_tag *nvgpuPmaStatistics_t;
|
||||
typedef struct UvmGpuMemoryInfo_tag *nvgpuMemoryInfo_t;
|
||||
@@ -564,24 +574,24 @@ typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemor
|
||||
* flags
|
||||
*/
|
||||
|
||||
#define NV_FLAG_OPEN 0x0001
|
||||
#define NV_FLAG_EXCLUDE 0x0002
|
||||
#define NV_FLAG_CONTROL 0x0004
|
||||
// Unused 0x0008
|
||||
#define NV_FLAG_SOC_DISPLAY 0x0010
|
||||
#define NV_FLAG_USES_MSI 0x0020
|
||||
#define NV_FLAG_USES_MSIX 0x0040
|
||||
#define NV_FLAG_PASSTHRU 0x0080
|
||||
#define NV_FLAG_SUSPENDED 0x0100
|
||||
#define NV_FLAG_SOC_IGPU 0x0200
|
||||
#define NV_FLAG_OPEN 0x0001
|
||||
#define NV_FLAG_EXCLUDE 0x0002
|
||||
#define NV_FLAG_CONTROL 0x0004
|
||||
// Unused 0x0008
|
||||
#define NV_FLAG_SOC_DISPLAY 0x0010
|
||||
#define NV_FLAG_USES_MSI 0x0020
|
||||
#define NV_FLAG_USES_MSIX 0x0040
|
||||
#define NV_FLAG_PASSTHRU 0x0080
|
||||
#define NV_FLAG_SUSPENDED 0x0100
|
||||
#define NV_FLAG_SOC_IGPU 0x0200
|
||||
/* To be set when an FLR needs to be triggered after device shut down. */
|
||||
#define NV_FLAG_TRIGGER_FLR 0x0400
|
||||
#define NV_FLAG_PERSISTENT_SW_STATE 0x0800
|
||||
#define NV_FLAG_IN_RECOVERY 0x1000
|
||||
// Unused 0x2000
|
||||
#define NV_FLAG_UNBIND_LOCK 0x4000
|
||||
#define NV_FLAG_TRIGGER_FLR 0x0400
|
||||
#define NV_FLAG_PERSISTENT_SW_STATE 0x0800
|
||||
#define NV_FLAG_IN_RECOVERY 0x1000
|
||||
#define NV_FLAG_PCI_REMOVE_IN_PROGRESS 0x2000
|
||||
#define NV_FLAG_UNBIND_LOCK 0x4000
|
||||
/* To be set when GPU is not present on the bus, to help device teardown */
|
||||
#define NV_FLAG_IN_SURPRISE_REMOVAL 0x8000
|
||||
#define NV_FLAG_IN_SURPRISE_REMOVAL 0x8000
|
||||
|
||||
typedef enum
|
||||
{
|
||||
@@ -795,7 +805,7 @@ NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU64, Nv
|
||||
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvU64, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_free_pages (nv_state_t *, NvU32, NvBool, NvU32, void *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_register_user_pages (nv_state_t *, NvU64, NvU64 *, void *, void **);
|
||||
NV_STATUS NV_API_CALL nv_register_user_pages (nv_state_t *, NvU64, NvU64 *, void *, void **, NvBool);
|
||||
void NV_API_CALL nv_unregister_user_pages (nv_state_t *, NvU64, void **, void **);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_register_peer_io_mem (nv_state_t *, NvU64 *, NvU64, void **);
|
||||
@@ -915,6 +925,15 @@ NV_STATUS NV_API_CALL nv_get_phys_pages (void *, void *, NvU32 *);
|
||||
|
||||
void NV_API_CALL nv_get_disp_smmu_stream_ids (nv_state_t *, NvU32 *, NvU32 *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_clk_get_handles (nv_state_t *);
|
||||
void NV_API_CALL nv_clk_clear_handles (nv_state_t *);
|
||||
NV_STATUS NV_API_CALL nv_enable_clk (nv_state_t *, TEGRASOC_WHICH_CLK);
|
||||
void NV_API_CALL nv_disable_clk (nv_state_t *, TEGRASOC_WHICH_CLK);
|
||||
NV_STATUS NV_API_CALL nv_get_curr_freq (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32 *);
|
||||
NV_STATUS NV_API_CALL nv_get_max_freq (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32 *);
|
||||
NV_STATUS NV_API_CALL nv_get_min_freq (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32 *);
|
||||
NV_STATUS NV_API_CALL nv_set_freq (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32);
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------
|
||||
*
|
||||
@@ -942,6 +961,7 @@ NvBool NV_API_CALL rm_isr (nvidia_stack_t *, nv_state_t *
|
||||
void NV_API_CALL rm_isr_bh (nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_isr_bh_unlocked (nvidia_stack_t *, nv_state_t *);
|
||||
NvBool NV_API_CALL rm_is_msix_allowed (nvidia_stack_t *, nv_state_t *);
|
||||
NvBool NV_API_CALL rm_wait_for_bar_firewall (nvidia_stack_t *, NvU32 domain, NvU8 bus, NvU8 device, NvU8 function, NvU16 devId);
|
||||
NV_STATUS NV_API_CALL rm_power_management (nvidia_stack_t *, nv_state_t *, nv_pm_action_t);
|
||||
NV_STATUS NV_API_CALL rm_stop_user_channels (nvidia_stack_t *, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_restart_user_channels (nvidia_stack_t *, nv_state_t *);
|
||||
@@ -1040,6 +1060,9 @@ void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
|
||||
|
||||
NvBool NV_API_CALL rm_is_altstack_in_use(void);
|
||||
|
||||
void NV_API_CALL rm_notify_gpu_addition(nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_notify_gpu_removal(nvidia_stack_t *, nv_state_t *);
|
||||
|
||||
/* vGPU VFIO specific functions */
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *,
|
||||
NvU32 *, NvU32 *, NvU32);
|
||||
@@ -1054,7 +1077,7 @@ NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *,
|
||||
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *, NvU32, NvBool *);
|
||||
NV_STATUS NV_API_CALL nv_gpu_unbind_event(nvidia_stack_t *, NvU32, NvBool *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_access_params_t*);
|
||||
NV_STATUS NV_API_CALL nv_check_usermap_access_params(nv_state_t*, const nv_usermap_access_params_t*);
|
||||
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);
|
||||
void NV_API_CALL nv_flush_coherent_cpu_cache_range(nv_state_t *nv, NvU64 cpu_virtual, NvU64 size);
|
||||
|
||||
|
||||
120
kernel-open/common/inc/nv_common_utils.h
Normal file
120
kernel-open/common/inc/nv_common_utils.h
Normal file
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NV_COMMON_UTILS_H__
|
||||
#define __NV_COMMON_UTILS_H__
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvmisc.h"
|
||||
|
||||
#if !defined(TRUE)
|
||||
#define TRUE NV_TRUE
|
||||
#endif
|
||||
|
||||
#if !defined(FALSE)
|
||||
#define FALSE NV_FALSE
|
||||
#endif
|
||||
|
||||
#define NV_IS_UNSIGNED(x) ((__typeof__(x))-1 > 0)
|
||||
|
||||
/* Get the length of a statically-sized array. */
|
||||
#define ARRAY_LEN(_arr) (sizeof(_arr) / sizeof(_arr[0]))
|
||||
|
||||
#define NV_INVALID_HEAD 0xFFFFFFFF
|
||||
|
||||
#define NV_INVALID_CONNECTOR_PHYSICAL_INFORMATION (~0)
|
||||
|
||||
#if !defined(NV_MIN)
|
||||
# define NV_MIN(a,b) (((a)<(b))?(a):(b))
|
||||
#endif
|
||||
|
||||
#define NV_MIN3(a,b,c) NV_MIN(NV_MIN(a, b), c)
|
||||
#define NV_MIN4(a,b,c,d) NV_MIN3(NV_MIN(a,b),c,d)
|
||||
|
||||
#if !defined(NV_MAX)
|
||||
# define NV_MAX(a,b) (((a)>(b))?(a):(b))
|
||||
#endif
|
||||
|
||||
#define NV_MAX3(a,b,c) NV_MAX(NV_MAX(a, b), c)
|
||||
#define NV_MAX4(a,b,c,d) NV_MAX3(NV_MAX(a,b),c,d)
|
||||
|
||||
static inline int NV_LIMIT_VAL_TO_MIN_MAX(int val, int min, int max)
|
||||
{
|
||||
if (val < min) {
|
||||
return min;
|
||||
}
|
||||
if (val > max) {
|
||||
return max;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
#define NV_ROUNDUP_DIV(x,y) ((x) / (y) + (((x) % (y)) ? 1 : 0))
|
||||
|
||||
/*
|
||||
* Macros used for computing palette entries:
|
||||
*
|
||||
* NV_UNDER_REPLICATE(val, source_size, result_size) expands a value
|
||||
* of source_size bits into a value of target_size bits by shifting
|
||||
* the source value into the high bits and replicating the high bits
|
||||
* of the value into the low bits of the result.
|
||||
*
|
||||
* PALETTE_DEPTH_SHIFT(val, w) maps a colormap entry for a component
|
||||
* that has w bits to an appropriate entry in a LUT of 256 entries.
|
||||
*/
|
||||
static inline unsigned int NV_UNDER_REPLICATE(unsigned short val,
|
||||
int source_size,
|
||||
int result_size)
|
||||
{
|
||||
return (val << (result_size - source_size)) |
|
||||
(val >> ((source_size << 1) - result_size));
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned short PALETTE_DEPTH_SHIFT(unsigned short val, int depth)
|
||||
{
|
||||
return NV_UNDER_REPLICATE(val, depth, 8);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use __builtin_ffs where it is supported, or provide an equivalent
|
||||
* implementation for platforms like riscv where it is not.
|
||||
*/
|
||||
#if defined(__GNUC__) && !NVCPU_IS_RISCV64
|
||||
static inline int nv_ffs(int x)
|
||||
{
|
||||
return __builtin_ffs(x);
|
||||
}
|
||||
#else
|
||||
static inline int nv_ffs(int x)
|
||||
{
|
||||
if (x == 0)
|
||||
return 0;
|
||||
|
||||
LOWESTBITIDX_32(x);
|
||||
|
||||
return 1 + x;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NV_COMMON_UTILS_H__ */
|
||||
370
kernel-open/common/inc/nv_dpy_id.h
Normal file
370
kernel-open/common/inc/nv_dpy_id.h
Normal file
@@ -0,0 +1,370 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2010-2014 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
/*
|
||||
* This header file defines the types NVDpyId and NVDpyIdList, as well
|
||||
* as inline functions to manipulate these types. NVDpyId and
|
||||
* NVDpyIdList should be treated as opaque by includers of this header
|
||||
* file.
|
||||
*/
|
||||
|
||||
#ifndef __NV_DPY_ID_H__
|
||||
#define __NV_DPY_ID_H__
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvmisc.h"
|
||||
#include "nv_common_utils.h"
|
||||
#include <nvlimits.h> /* NV_MAX_SUBDEVICES */
|
||||
|
||||
typedef struct {
|
||||
NvU32 opaqueDpyId;
|
||||
} NVDpyId;
|
||||
|
||||
typedef struct {
|
||||
NvU32 opaqueDpyIdList;
|
||||
} NVDpyIdList;
|
||||
|
||||
#define NV_DPY_ID_MAX_SUBDEVICES NV_MAX_SUBDEVICES
|
||||
#define NV_DPY_ID_MAX_DPYS_IN_LIST 32
|
||||
|
||||
/*
|
||||
* For use in combination with nvDpyIdToPrintFormat(); e.g.,
|
||||
*
|
||||
* printf("dpy id: " NV_DPY_ID_PRINT_FORMAT "\n",
|
||||
* nvDpyIdToPrintFormat(dpyId));
|
||||
*
|
||||
* The includer should not make assumptions about the return type of
|
||||
* nvDpyIdToPrintFormat().
|
||||
*/
|
||||
#define NV_DPY_ID_PRINT_FORMAT "0x%08x"
|
||||
|
||||
/* functions to return an invalid DpyId and empty DpyIdList */
|
||||
|
||||
static inline NVDpyId nvInvalidDpyId(void)
|
||||
{
|
||||
NVDpyId dpyId = { 0 };
|
||||
return dpyId;
|
||||
}
|
||||
|
||||
static inline NVDpyIdList nvEmptyDpyIdList(void)
|
||||
{
|
||||
NVDpyIdList dpyIdList = { 0 };
|
||||
return dpyIdList;
|
||||
}
|
||||
|
||||
static inline NVDpyIdList nvAllDpyIdList(void)
|
||||
{
|
||||
NVDpyIdList dpyIdList = { ~0U };
|
||||
return dpyIdList;
|
||||
}
|
||||
|
||||
static inline void
|
||||
nvEmptyDpyIdListSubDeviceArray(NVDpyIdList dpyIdList[NV_DPY_ID_MAX_SUBDEVICES])
|
||||
{
|
||||
int dispIndex;
|
||||
for (dispIndex = 0; dispIndex < NV_DPY_ID_MAX_SUBDEVICES; dispIndex++) {
|
||||
dpyIdList[dispIndex] = nvEmptyDpyIdList();
|
||||
}
|
||||
}
|
||||
|
||||
/* set operations on DpyIds and DpyIdLists: Add, Subtract, Intersect, Xor */
|
||||
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvAddDpyIdToDpyIdList(NVDpyId dpyId, NVDpyIdList dpyIdList)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList |
|
||||
dpyId.opaqueDpyId;
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
/* Passing an invalid display ID makes this function return an empty list. */
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvAddDpyIdToEmptyDpyIdList(NVDpyId dpyId)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyId.opaqueDpyId;
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvAddDpyIdListToDpyIdList(NVDpyIdList dpyIdListA,
|
||||
NVDpyIdList dpyIdListB)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdListB.opaqueDpyIdList |
|
||||
dpyIdListA.opaqueDpyIdList;
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
/* Returns: dpyIdList - dpyId */
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvDpyIdListMinusDpyId(NVDpyIdList dpyIdList, NVDpyId dpyId)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList &
|
||||
(~dpyId.opaqueDpyId);
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
/* Returns: dpyIdListA - dpyIdListB */
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvDpyIdListMinusDpyIdList(NVDpyIdList dpyIdListA,
|
||||
NVDpyIdList dpyIdListB)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdListA.opaqueDpyIdList &
|
||||
(~dpyIdListB.opaqueDpyIdList);
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvIntersectDpyIdAndDpyIdList(NVDpyId dpyId, NVDpyIdList dpyIdList)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList &
|
||||
dpyId.opaqueDpyId;
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvIntersectDpyIdListAndDpyIdList(NVDpyIdList dpyIdListA,
|
||||
NVDpyIdList dpyIdListB)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdListA.opaqueDpyIdList &
|
||||
dpyIdListB.opaqueDpyIdList;
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvXorDpyIdAndDpyIdList(NVDpyId dpyId, NVDpyIdList dpyIdList)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList ^
|
||||
dpyId.opaqueDpyId;
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyIdList nvXorDpyIdListAndDpyIdList(NVDpyIdList dpyIdListA,
|
||||
NVDpyIdList dpyIdListB)
|
||||
{
|
||||
NVDpyIdList tmpDpyIdList;
|
||||
tmpDpyIdList.opaqueDpyIdList = dpyIdListA.opaqueDpyIdList ^
|
||||
dpyIdListB.opaqueDpyIdList;
|
||||
return tmpDpyIdList;
|
||||
}
|
||||
|
||||
|
||||
/* boolean checks */
|
||||
|
||||
static inline NvBool nvDpyIdIsInDpyIdList(NVDpyId dpyId,
|
||||
NVDpyIdList dpyIdList)
|
||||
{
|
||||
return !!(dpyIdList.opaqueDpyIdList & dpyId.opaqueDpyId);
|
||||
}
|
||||
|
||||
static inline NvBool nvDpyIdIsInvalid(NVDpyId dpyId)
|
||||
{
|
||||
return (dpyId.opaqueDpyId == 0);
|
||||
}
|
||||
|
||||
static inline NvBool nvDpyIdListIsEmpty(NVDpyIdList dpyIdList)
|
||||
{
|
||||
return (dpyIdList.opaqueDpyIdList == 0);
|
||||
}
|
||||
|
||||
static inline NvBool
|
||||
nvDpyIdListSubDeviceArrayIsEmpty(NVDpyIdList
|
||||
dpyIdList[NV_DPY_ID_MAX_SUBDEVICES])
|
||||
{
|
||||
int dispIndex;
|
||||
for (dispIndex = 0; dispIndex < NV_DPY_ID_MAX_SUBDEVICES; dispIndex++) {
|
||||
if (!nvDpyIdListIsEmpty(dpyIdList[dispIndex])) {
|
||||
return NV_FALSE;
|
||||
}
|
||||
}
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
|
||||
static inline NvBool nvDpyIdsAreEqual(NVDpyId dpyIdA, NVDpyId dpyIdB)
|
||||
{
|
||||
return (dpyIdA.opaqueDpyId == dpyIdB.opaqueDpyId);
|
||||
}
|
||||
|
||||
static inline NvBool nvDpyIdListsAreEqual(NVDpyIdList dpyIdListA,
|
||||
NVDpyIdList dpyIdListB)
|
||||
{
|
||||
return (dpyIdListA.opaqueDpyIdList == dpyIdListB.opaqueDpyIdList);
|
||||
}
|
||||
|
||||
static inline NvBool nvDpyIdListIsASubSetofDpyIdList(NVDpyIdList dpyIdListA,
|
||||
NVDpyIdList dpyIdListB)
|
||||
{
|
||||
NVDpyIdList intersectedDpyIdList =
|
||||
nvIntersectDpyIdListAndDpyIdList(dpyIdListA, dpyIdListB);
|
||||
|
||||
return nvDpyIdListsAreEqual(intersectedDpyIdList, dpyIdListA);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* retrieve the individual dpyIds from dpyIdList; if dpyId is invalid,
|
||||
* start at the beginning of the list; otherwise, start at the dpyId
|
||||
* after the specified dpyId
|
||||
*/
|
||||
|
||||
static inline __attribute__ ((warn_unused_result))
|
||||
NVDpyId nvNextDpyIdInDpyIdListUnsorted(NVDpyId dpyId, NVDpyIdList dpyIdList)
|
||||
{
|
||||
if (nvDpyIdIsInvalid(dpyId)) {
|
||||
dpyId.opaqueDpyId = 1;
|
||||
} else {
|
||||
dpyId.opaqueDpyId <<= 1;
|
||||
}
|
||||
|
||||
while (dpyId.opaqueDpyId) {
|
||||
|
||||
if (nvDpyIdIsInDpyIdList(dpyId, dpyIdList)) {
|
||||
return dpyId;
|
||||
}
|
||||
|
||||
dpyId.opaqueDpyId <<= 1;
|
||||
}
|
||||
|
||||
/* no dpyIds left in dpyIdlist; return the invalid dpyId */
|
||||
|
||||
return nvInvalidDpyId();
|
||||
}
|
||||
|
||||
#define FOR_ALL_DPY_IDS(_dpyId, _dpyIdList) \
|
||||
for ((_dpyId) = nvNextDpyIdInDpyIdListUnsorted(nvInvalidDpyId(), \
|
||||
(_dpyIdList)); \
|
||||
!nvDpyIdIsInvalid(_dpyId); \
|
||||
(_dpyId) = nvNextDpyIdInDpyIdListUnsorted((_dpyId), \
|
||||
(_dpyIdList)))
|
||||
|
||||
/* report how many dpyIds are in the dpyIdList */
|
||||
|
||||
static inline int nvCountDpyIdsInDpyIdList(NVDpyIdList dpyIdList)
|
||||
{
|
||||
return nvPopCount32(dpyIdList.opaqueDpyIdList);
|
||||
}
|
||||
|
||||
static inline int
|
||||
nvCountDpyIdsInDpyIdListSubDeviceArray(NVDpyIdList
|
||||
dpyIdList[NV_DPY_ID_MAX_SUBDEVICES])
|
||||
{
|
||||
int dispIndex, n = 0;
|
||||
|
||||
for (dispIndex = 0; dispIndex < NV_DPY_ID_MAX_SUBDEVICES; dispIndex++) {
|
||||
n += nvCountDpyIdsInDpyIdList(dpyIdList[dispIndex]);
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/* convert between dpyId/dpyIdList and NV-CONTROL values */
|
||||
|
||||
static inline int nvDpyIdToNvControlVal(NVDpyId dpyId)
|
||||
{
|
||||
return (int) dpyId.opaqueDpyId;
|
||||
}
|
||||
|
||||
static inline int nvDpyIdListToNvControlVal(NVDpyIdList dpyIdList)
|
||||
{
|
||||
return (int) dpyIdList.opaqueDpyIdList;
|
||||
}
|
||||
|
||||
static inline NVDpyId nvNvControlValToDpyId(int val)
|
||||
{
|
||||
NVDpyId dpyId;
|
||||
dpyId.opaqueDpyId = (val == 0) ? 0 : 1 << (nv_ffs(val)-1);
|
||||
return dpyId;
|
||||
}
|
||||
|
||||
static inline NVDpyIdList nvNvControlValToDpyIdList(int val)
|
||||
{
|
||||
NVDpyIdList dpyIdList;
|
||||
dpyIdList.opaqueDpyIdList = val;
|
||||
return dpyIdList;
|
||||
}
|
||||
|
||||
|
||||
/* convert between dpyId and NvU32 */
|
||||
|
||||
static inline NVDpyId nvNvU32ToDpyId(NvU32 val)
|
||||
{
|
||||
NVDpyId dpyId;
|
||||
dpyId.opaqueDpyId = (val == 0) ? 0 : 1 << (nv_ffs(val)-1);
|
||||
return dpyId;
|
||||
}
|
||||
|
||||
static inline NVDpyIdList nvNvU32ToDpyIdList(NvU32 val)
|
||||
{
|
||||
NVDpyIdList dpyIdList;
|
||||
dpyIdList.opaqueDpyIdList = val;
|
||||
return dpyIdList;
|
||||
}
|
||||
|
||||
static inline NvU32 nvDpyIdToNvU32(NVDpyId dpyId)
|
||||
{
|
||||
return dpyId.opaqueDpyId;
|
||||
}
|
||||
|
||||
static inline NvU32 nvDpyIdListToNvU32(NVDpyIdList dpyIdList)
|
||||
{
|
||||
return dpyIdList.opaqueDpyIdList;
|
||||
}
|
||||
|
||||
/* Return the bit position of dpyId: a number in the range [0..31]. */
|
||||
static inline NvU32 nvDpyIdToIndex(NVDpyId dpyId)
|
||||
{
|
||||
return nv_ffs(dpyId.opaqueDpyId) - 1;
|
||||
}
|
||||
|
||||
/* Return a display ID that is not in the list passed in. */
|
||||
|
||||
static inline NVDpyId nvNewDpyId(NVDpyIdList excludeList)
|
||||
{
|
||||
NVDpyId dpyId;
|
||||
if (~excludeList.opaqueDpyIdList == 0) {
|
||||
return nvInvalidDpyId();
|
||||
}
|
||||
dpyId.opaqueDpyId =
|
||||
1U << (nv_ffs(~excludeList.opaqueDpyIdList) - 1);
|
||||
return dpyId;
|
||||
}
|
||||
|
||||
/* See comment for NV_DPY_ID_PRINT_FORMAT. */
|
||||
static inline NvU32 nvDpyIdToPrintFormat(NVDpyId dpyId)
|
||||
{
|
||||
return nvDpyIdToNvU32(dpyId);
|
||||
}
|
||||
|
||||
/* Prevent usage of opaque values. */
|
||||
#define opaqueDpyId __ERROR_ACCESS_ME_VIA_NV_DPY_ID_H
|
||||
#define opaqueDpyIdList __ERROR_ACCESS_ME_VIA_NV_DPY_ID_H
|
||||
|
||||
#endif /* __NV_DPY_ID_H__ */
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -20,8 +20,8 @@
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef __NV_SMG_H__
|
||||
#define __NV_SMG_H__
|
||||
#ifndef __NV_MIG_TYPES_H__
|
||||
#define __NV_MIG_TYPES_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -29,25 +29,12 @@ extern "C" {
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
/*
|
||||
* The simplest required abstraction for accessing RM independent of the
|
||||
* calling component which may be a kernel module or userspace driver.
|
||||
*/
|
||||
typedef NvU32 (*NVSubdevSMGRMControl) (void *ctx, NvU32 object, NvU32 cmd, void *params, NvU32 paramsSize);
|
||||
typedef NvU32 (*NVSubdevSMGRMAlloc) (void *ctx, NvU32 parent, NvU32 object, NvU32 cls, void *allocParams);
|
||||
typedef NvU32 (*NVSubdevSMGRMFree) (void *ctx, NvU32 parent, NvU32 object);
|
||||
typedef NvU32 MIGDeviceId;
|
||||
|
||||
NvBool NVSubdevSMGSetPartition(void *ctx,
|
||||
NvU32 subdevHandle,
|
||||
const char *computeInstUuid,
|
||||
NvU32 gpuInstSubscriptionHdl,
|
||||
NvU32 computeInstSubscriptionHdl,
|
||||
NVSubdevSMGRMControl rmControl,
|
||||
NVSubdevSMGRMAlloc rmAlloc,
|
||||
NVSubdevSMGRMFree rmFree);
|
||||
#define NO_MIG_DEVICE 0L
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NV_SMG_H__ */
|
||||
#endif /* __NV_MIG_TYPES_H__ */
|
||||
@@ -660,14 +660,20 @@ NV_STATUS nvUvmInterfaceServiceDeviceInterruptsRM(uvmGpuDeviceHandle device);
|
||||
RM will propagate the update to all channels using the provided VA space.
|
||||
All channels must be idle when this call is made.
|
||||
|
||||
If the pageDirectory is in system memory then a CPU physical address must be
|
||||
provided. RM will establish and manage the DMA mapping for the
|
||||
pageDirectory.
|
||||
|
||||
Arguments:
|
||||
vaSpace[IN} - VASpace Object
|
||||
physAddress[IN] - Physical address of new page directory
|
||||
physAddress[IN] - Physical address of new page directory. If
|
||||
!bVidMemAperture this is a CPU physical address.
|
||||
numEntries[IN] - Number of entries including previous PDE which will be copied
|
||||
bVidMemAperture[IN] - If set pageDirectory will reside in VidMem aperture else sysmem
|
||||
pasid[IN] - PASID (Process Address Space IDentifier) of the process
|
||||
corresponding to the VA space. Ignored unless the VA space
|
||||
object has ATS enabled.
|
||||
dmaAddress[OUT] - DMA mapping created for physAddress.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_GENERIC
|
||||
@@ -675,7 +681,8 @@ NV_STATUS nvUvmInterfaceServiceDeviceInterruptsRM(uvmGpuDeviceHandle device);
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceSetPageDirectory(uvmGpuAddressSpaceHandle vaSpace,
|
||||
NvU64 physAddress, unsigned numEntries,
|
||||
NvBool bVidMemAperture, NvU32 pasid);
|
||||
NvBool bVidMemAperture, NvU32 pasid,
|
||||
NvU64 *dmaAddress);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceUnsetPageDirectory
|
||||
@@ -1056,7 +1063,7 @@ NV_STATUS nvUvmInterfaceDestroyAccessCntrInfo(uvmGpuDeviceHandle device,
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceEnableAccessCntr(uvmGpuDeviceHandle device,
|
||||
UvmGpuAccessCntrInfo *pAccessCntrInfo,
|
||||
UvmGpuAccessCntrConfig *pAccessCntrConfig);
|
||||
const UvmGpuAccessCntrConfig *pAccessCntrConfig);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceDisableAccessCntr
|
||||
@@ -1862,5 +1869,4 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
|
||||
NV_STATUS nvUvmInterfaceCslLogEncryption(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation,
|
||||
NvU32 bufferSize);
|
||||
|
||||
#endif // _NV_UVM_INTERFACE_H_
|
||||
|
||||
@@ -268,6 +268,7 @@ typedef struct UvmGpuChannelInfo_tag
|
||||
|
||||
// The errorNotifier is filled out when the channel hits an RC error.
|
||||
NvNotification *errorNotifier;
|
||||
|
||||
NvNotification *keyRotationNotifier;
|
||||
|
||||
NvU32 hwRunlistId;
|
||||
@@ -297,6 +298,7 @@ typedef struct UvmGpuChannelInfo_tag
|
||||
NvU64 gpFifoGpuVa;
|
||||
NvU64 gpPutGpuVa;
|
||||
NvU64 gpGetGpuVa;
|
||||
|
||||
// GPU VA of work submission offset is needed in Confidential Computing
|
||||
// so CE channels can ring doorbell of other channels as required for
|
||||
// WLC/LCIC work submission
|
||||
@@ -374,6 +376,9 @@ typedef struct
|
||||
// True if the CE can be used for P2P transactions
|
||||
NvBool p2p:1;
|
||||
|
||||
// True if the CE supports encryption
|
||||
NvBool secure:1;
|
||||
|
||||
// Mask of physical CEs assigned to this LCE
|
||||
//
|
||||
// The value returned by RM for this field may change when a GPU is
|
||||
@@ -1007,17 +1012,17 @@ typedef struct UvmGpuFaultInfo_tag
|
||||
NvU32 replayableFaultMask;
|
||||
|
||||
// Fault buffer CPU mapping
|
||||
void* bufferAddress;
|
||||
//
|
||||
// When Confidential Computing is disabled, the mapping points to the
|
||||
// actual HW fault buffer.
|
||||
//
|
||||
// When Confidential Computing is enabled, the mapping points to a
|
||||
// copy of the HW fault buffer. This "shadow buffer" is maintained
|
||||
// by GSP-RM.
|
||||
void* bufferAddress;
|
||||
|
||||
// Size, in bytes, of the fault buffer pointed by bufferAddress.
|
||||
NvU32 bufferSize;
|
||||
|
||||
// Mapping pointing to the start of the fault buffer metadata containing
|
||||
// a 16Byte authentication tag and a valid byte. Always NULL when
|
||||
// Confidential Computing is disabled.
|
||||
@@ -1103,24 +1108,9 @@ typedef enum
|
||||
UVM_ACCESS_COUNTER_GRANULARITY_16G = 4,
|
||||
} UVM_ACCESS_COUNTER_GRANULARITY;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT_NONE = 1,
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT_QTR = 2,
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT_HALF = 3,
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT_FULL = 4,
|
||||
} UVM_ACCESS_COUNTER_USE_LIMIT;
|
||||
|
||||
typedef struct UvmGpuAccessCntrConfig_tag
|
||||
{
|
||||
NvU32 mimcGranularity;
|
||||
|
||||
NvU32 momcGranularity;
|
||||
|
||||
NvU32 mimcUseLimit;
|
||||
|
||||
NvU32 momcUseLimit;
|
||||
|
||||
NvU32 granularity;
|
||||
NvU32 threshold;
|
||||
} UvmGpuAccessCntrConfig;
|
||||
|
||||
|
||||
37
kernel-open/common/inc/nvi2c.h
Normal file
37
kernel-open/common/inc/nvi2c.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef _NV_I2C_H_
|
||||
#define _NV_I2C_H_
|
||||
|
||||
#define NV_I2C_MSG_WR 0x0000
|
||||
#define NV_I2C_MSG_RD 0x0001
|
||||
|
||||
typedef struct nv_i2c_msg_s
|
||||
{
|
||||
NvU16 addr;
|
||||
NvU16 flags;
|
||||
NvU16 len;
|
||||
NvU8* buf;
|
||||
} nv_i2c_msg_t;
|
||||
|
||||
#endif
|
||||
96
kernel-open/common/inc/nvimpshared.h
Normal file
96
kernel-open/common/inc/nvimpshared.h
Normal file
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/******************************************************************************\
|
||||
* *
|
||||
* Description: *
|
||||
* Accommodates sharing of IMP-related structures between kernel interface *
|
||||
* files and core RM. *
|
||||
* *
|
||||
\******************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <nvtypes.h>
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable:4324)
|
||||
#endif
|
||||
|
||||
//
|
||||
// This file was generated with FINN, an NVIDIA coding tool.
|
||||
// Source file: nvimpshared.finn
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
//
|
||||
// There are only a small number of discrete dramclk frequencies available on
|
||||
// the system. This structure contains IMP-relevant information associated
|
||||
// with a specific dramclk frequency.
|
||||
//
|
||||
typedef struct DRAM_CLK_INSTANCE {
|
||||
NvU32 dram_clk_freq_khz;
|
||||
|
||||
NvU32 mchub_clk_khz;
|
||||
|
||||
NvU32 mc_clk_khz;
|
||||
|
||||
NvU32 max_iso_bw_kbps;
|
||||
|
||||
//
|
||||
// switch_latency_ns is the maximum time required to switch the dramclk
|
||||
// frequency to the frequency specified in dram_clk_freq_khz.
|
||||
//
|
||||
NvU32 switch_latency_ns;
|
||||
} DRAM_CLK_INSTANCE;
|
||||
|
||||
//
|
||||
// This table is used to collect information from other modules that is needed
|
||||
// for RM IMP calculations. (Used on Tegra only.)
|
||||
//
|
||||
typedef struct TEGRA_IMP_IMPORT_DATA {
|
||||
//
|
||||
// max_iso_bw_kbps stores the maximum possible ISO bandwidth available to
|
||||
// display, assuming display is the only active ISO client. (Note that ISO
|
||||
// bandwidth will typically be allocated to multiple clients, so display
|
||||
// will generally not have access to the maximum possible bandwidth.)
|
||||
//
|
||||
NvU32 max_iso_bw_kbps;
|
||||
|
||||
// On Orin, each dram channel is 16 bits wide.
|
||||
NvU32 num_dram_channels;
|
||||
|
||||
//
|
||||
// dram_clk_instance stores entries for all possible dramclk frequencies,
|
||||
// sorted by dramclk frequency in increasing order.
|
||||
//
|
||||
// "24" is expected to be larger than the actual number of required entries
|
||||
// (which is provided by a BPMP API), but it can be increased if necessary.
|
||||
//
|
||||
// num_dram_clk_entries is filled in with the actual number of distinct
|
||||
// dramclk entries.
|
||||
//
|
||||
NvU32 num_dram_clk_entries;
|
||||
DRAM_CLK_INSTANCE dram_clk_instance[24];
|
||||
} TEGRA_IMP_IMPORT_DATA;
|
||||
@@ -640,22 +640,28 @@ enum NvKmsInputColorRange {
|
||||
* If DEFAULT is provided, driver will assume full range for RGB formats
|
||||
* and limited range for YUV formats.
|
||||
*/
|
||||
NVKMS_INPUT_COLORRANGE_DEFAULT = 0,
|
||||
NVKMS_INPUT_COLOR_RANGE_DEFAULT = 0,
|
||||
|
||||
NVKMS_INPUT_COLORRANGE_LIMITED = 1,
|
||||
NVKMS_INPUT_COLOR_RANGE_LIMITED = 1,
|
||||
|
||||
NVKMS_INPUT_COLORRANGE_FULL = 2,
|
||||
NVKMS_INPUT_COLOR_RANGE_FULL = 2,
|
||||
};
|
||||
|
||||
enum NvKmsInputColorSpace {
|
||||
/* Unknown colorspace; no de-gamma will be applied */
|
||||
NVKMS_INPUT_COLORSPACE_NONE = 0,
|
||||
/* Unknown colorspace */
|
||||
NVKMS_INPUT_COLOR_SPACE_NONE = 0,
|
||||
|
||||
/* Linear, Rec.709 [-0.5, 7.5) */
|
||||
NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR = 1,
|
||||
NVKMS_INPUT_COLOR_SPACE_BT601 = 1,
|
||||
NVKMS_INPUT_COLOR_SPACE_BT709 = 2,
|
||||
NVKMS_INPUT_COLOR_SPACE_BT2020 = 3,
|
||||
NVKMS_INPUT_COLOR_SPACE_BT2100 = NVKMS_INPUT_COLOR_SPACE_BT2020,
|
||||
|
||||
/* PQ, Rec.2020 unity */
|
||||
NVKMS_INPUT_COLORSPACE_BT2100_PQ = 2,
|
||||
NVKMS_INPUT_COLOR_SPACE_SCRGB = 4
|
||||
};
|
||||
|
||||
enum NvKmsInputTf {
|
||||
NVKMS_INPUT_TF_LINEAR = 0,
|
||||
NVKMS_INPUT_TF_PQ = 1
|
||||
};
|
||||
|
||||
enum NvKmsOutputColorimetry {
|
||||
|
||||
@@ -24,8 +24,10 @@
|
||||
#if !defined(__NVKMS_KAPI_H__)
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nv_mig_types.h"
|
||||
|
||||
#include "nv-gpu-info.h"
|
||||
#include "nv_dpy_id.h"
|
||||
#include "nvkms-api-types.h"
|
||||
#include "nvkms-format.h"
|
||||
|
||||
@@ -173,12 +175,18 @@ struct NvKmsKapiDeviceResourcesInfo {
|
||||
NvBool supportsSyncpts;
|
||||
|
||||
NvBool requiresVrrSemaphores;
|
||||
|
||||
NvBool supportsInputColorRange;
|
||||
NvBool supportsInputColorSpace;
|
||||
} caps;
|
||||
|
||||
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
|
||||
NvBool supportsICtCp[NVKMS_KAPI_LAYER_MAX];
|
||||
|
||||
struct NvKmsKapiLutCaps lutCaps;
|
||||
|
||||
NvU64 vtFbBaseAddress;
|
||||
NvU64 vtFbSize;
|
||||
};
|
||||
|
||||
#define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
|
||||
@@ -204,6 +212,7 @@ struct NvKmsKapiConnectorInfo {
|
||||
NvU32 numIncompatibleConnectors;
|
||||
NvKmsKapiConnector incompatibleConnectorHandles[NVKMS_KAPI_MAX_CONNECTORS];
|
||||
|
||||
NVDpyIdList dynamicDpyIdList;
|
||||
};
|
||||
|
||||
struct NvKmsKapiStaticDisplayInfo {
|
||||
@@ -222,6 +231,8 @@ struct NvKmsKapiStaticDisplayInfo {
|
||||
NvKmsKapiDisplay possibleCloneHandles[NVKMS_KAPI_MAX_CLONE_DISPLAYS];
|
||||
|
||||
NvU32 headMask;
|
||||
|
||||
NvBool isDpMST;
|
||||
};
|
||||
|
||||
struct NvKmsKapiSyncParams {
|
||||
@@ -260,7 +271,8 @@ struct NvKmsKapiLayerConfig {
|
||||
NvBool enabled;
|
||||
} hdrMetadata;
|
||||
|
||||
enum NvKmsOutputTf tf;
|
||||
enum NvKmsInputTf inputTf;
|
||||
enum NvKmsOutputTf outputTf;
|
||||
|
||||
NvU8 minPresentInterval;
|
||||
NvBool tearing;
|
||||
@@ -272,6 +284,7 @@ struct NvKmsKapiLayerConfig {
|
||||
NvU16 dstWidth, dstHeight;
|
||||
|
||||
enum NvKmsInputColorSpace inputColorSpace;
|
||||
enum NvKmsInputColorRange inputColorRange;
|
||||
|
||||
struct {
|
||||
NvBool enabled;
|
||||
@@ -315,7 +328,10 @@ struct NvKmsKapiLayerRequestedConfig {
|
||||
NvBool dstXYChanged : 1;
|
||||
NvBool dstWHChanged : 1;
|
||||
NvBool cscChanged : 1;
|
||||
NvBool tfChanged : 1;
|
||||
NvBool inputTfChanged : 1;
|
||||
NvBool outputTfChanged : 1;
|
||||
NvBool inputColorSpaceChanged : 1;
|
||||
NvBool inputColorRangeChanged : 1;
|
||||
NvBool hdrMetadataChanged : 1;
|
||||
NvBool matrixOverridesChanged : 1;
|
||||
NvBool ilutChanged : 1;
|
||||
@@ -481,6 +497,8 @@ struct NvKmsKapiEvent {
|
||||
struct NvKmsKapiAllocateDeviceParams {
|
||||
/* [IN] GPU ID obtained from enumerateGpus() */
|
||||
NvU32 gpuId;
|
||||
/* [IN] MIG device if requested */
|
||||
MIGDeviceId migDevice;
|
||||
|
||||
/* [IN] Private data of device allocator */
|
||||
void *privateData;
|
||||
@@ -563,6 +581,11 @@ typedef enum NvKmsKapiRegisterWaiterResultRec {
|
||||
|
||||
typedef void NvKmsKapiSuspendResumeCallbackFunc(NvBool suspend);
|
||||
|
||||
struct NvKmsKapiGpuInfo {
|
||||
nv_gpu_info_t gpuInfo;
|
||||
MIGDeviceId migDevice;
|
||||
};
|
||||
|
||||
struct NvKmsKapiFunctionsTable {
|
||||
|
||||
/*!
|
||||
@@ -586,7 +609,7 @@ struct NvKmsKapiFunctionsTable {
|
||||
*
|
||||
* \return Count of enumerated gpus.
|
||||
*/
|
||||
NvU32 (*enumerateGpus)(nv_gpu_info_t *gpuInfo);
|
||||
NvU32 (*enumerateGpus)(struct NvKmsKapiGpuInfo *kapiGpuInfo);
|
||||
|
||||
/*!
|
||||
* Allocate an NVK device using which you can query/allocate resources on
|
||||
@@ -1559,6 +1582,26 @@ struct NvKmsKapiFunctionsTable {
|
||||
NvS32 index
|
||||
);
|
||||
|
||||
/*!
|
||||
* Check or wait on a head's LUT notifier.
|
||||
*
|
||||
* \param [in] device A device allocated using allocateDevice().
|
||||
*
|
||||
* \param [in] head The head to check for LUT completion.
|
||||
*
|
||||
* \param [in] waitForCompletion If true, wait for the notifier in NvKms
|
||||
* before returning.
|
||||
*
|
||||
* \param [out] complete Returns whether the notifier has completed.
|
||||
*/
|
||||
NvBool
|
||||
(*checkLutNotifier)
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvU32 head,
|
||||
NvBool waitForCompletion
|
||||
);
|
||||
|
||||
/*
|
||||
* Notify NVKMS that the system's framebuffer console has been disabled and
|
||||
* the reserved allocation for the old framebuffer console can be unmapped.
|
||||
|
||||
@@ -701,11 +701,6 @@ nvPrevPow2_U64(const NvU64 x )
|
||||
} \
|
||||
}
|
||||
|
||||
//
|
||||
// Bug 4851259: Newly added functions must be hidden from certain HS-signed
|
||||
// ucode compilers to avoid signature mismatch.
|
||||
//
|
||||
#ifndef NVDEC_1_0
|
||||
/*!
|
||||
* Returns the position of nth set bit in the given mask.
|
||||
*
|
||||
@@ -735,8 +730,6 @@ nvGetNthSetBitIndex32(NvU32 mask, NvU32 n)
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif // NVDEC_1_0
|
||||
|
||||
//
|
||||
// Size to use when declaring variable-sized arrays
|
||||
//
|
||||
@@ -780,12 +773,15 @@ nvGetNthSetBitIndex32(NvU32 mask, NvU32 n)
|
||||
// Returns the offset (in bytes) of 'member' in struct 'type'.
|
||||
#ifndef NV_OFFSETOF
|
||||
#if defined(__GNUC__) && (__GNUC__ > 3)
|
||||
#define NV_OFFSETOF(type, member) ((NvU32)__builtin_offsetof(type, member))
|
||||
#define NV_OFFSETOF(type, member) ((NvUPtr) __builtin_offsetof(type, member))
|
||||
#else
|
||||
#define NV_OFFSETOF(type, member) ((NvU32)(NvU64)&(((type *)0)->member)) // shouldn't we use PtrToUlong? But will need to include windows header.
|
||||
#define NV_OFFSETOF(type, member) ((NvUPtr) &(((type *)0)->member))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Given a pointer and the member it is of the parent struct, return a pointer to the parent struct
|
||||
#define NV_CONTAINEROF(ptr, type, member) ((type *) (((NvUPtr) ptr) - NV_OFFSETOF(type, member)))
|
||||
|
||||
//
|
||||
// Performs a rounded division of b into a (unsigned). For SIGNED version of
|
||||
// NV_ROUNDED_DIV() macro check the comments in bug 769777.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -159,6 +159,11 @@ NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_NOT_READY, 0x00000081, "Nvlink Fabri
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_FAILURE, 0x00000082, "Nvlink Fabric Probe failed")
|
||||
NV_STATUS_CODE(NV_ERR_GPU_MEMORY_ONLINING_FAILURE, 0x00000083, "GPU Memory Onlining failed")
|
||||
NV_STATUS_CODE(NV_ERR_REDUCTION_MANAGER_NOT_AVAILABLE, 0x00000084, "Reduction Manager is not available")
|
||||
NV_STATUS_CODE(NV_ERR_THRESHOLD_CROSSED, 0x00000085, "A fatal threshold has been crossed")
|
||||
NV_STATUS_CODE(NV_ERR_RESOURCE_RETIREMENT_ERROR, 0x00000086, "An error occurred while trying to retire a resource")
|
||||
NV_STATUS_CODE(NV_ERR_FABRIC_STATE_OUT_OF_SYNC, 0x00000087, "NVLink fabric state cached by the driver is out of sync")
|
||||
NV_STATUS_CODE(NV_ERR_BUFFER_FULL, 0x00000088, "Buffer is full")
|
||||
NV_STATUS_CODE(NV_ERR_BUFFER_EMPTY, 0x00000089, "Buffer is empty")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
@@ -169,5 +174,6 @@ NV_STATUS_CODE(NV_WARN_MORE_PROCESSING_REQUIRED, 0x00010005, "WARNING More
|
||||
NV_STATUS_CODE(NV_WARN_NOTHING_TO_DO, 0x00010006, "WARNING Nothing to do")
|
||||
NV_STATUS_CODE(NV_WARN_NULL_OBJECT, 0x00010007, "WARNING NULL object found")
|
||||
NV_STATUS_CODE(NV_WARN_OUT_OF_RANGE, 0x00010008, "WARNING value out of range")
|
||||
NV_STATUS_CODE(NV_WARN_THRESHOLD_CROSSED, 0x00010009, "WARNING Threshold has been crossed")
|
||||
|
||||
#endif /* SDK_NVSTATUSCODES_H */
|
||||
|
||||
@@ -229,6 +229,7 @@ extern NvU64 os_page_mask;
|
||||
extern NvU8 os_page_shift;
|
||||
extern NvBool os_cc_enabled;
|
||||
extern NvBool os_cc_sev_snp_enabled;
|
||||
extern NvBool os_cc_sme_enabled;
|
||||
extern NvBool os_cc_snp_vtom_enabled;
|
||||
extern NvBool os_cc_tdx_enabled;
|
||||
extern NvBool os_dma_buf_enabled;
|
||||
|
||||
387
kernel-open/common/inc/os_dsi_panel_props.h
Normal file
387
kernel-open/common/inc/os_dsi_panel_props.h
Normal file
@@ -0,0 +1,387 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef _OS_DSI_PANEL_PARAMS_H_
|
||||
#define _OS_DSI_PANEL_PARAMS_H_
|
||||
|
||||
#define DSI_GENERIC_LONG_WRITE 0x29
|
||||
#define DSI_DCS_LONG_WRITE 0x39
|
||||
#define DSI_GENERIC_SHORT_WRITE_1_PARAMS 0x13
|
||||
#define DSI_GENERIC_SHORT_WRITE_2_PARAMS 0x23
|
||||
#define DSI_DCS_WRITE_0_PARAM 0x05
|
||||
#define DSI_DCS_WRITE_1_PARAM 0x15
|
||||
#define DSI_DCS_READ_PARAM 0x06
|
||||
#define DSI_DCS_COMPRESSION_MODE 0x07
|
||||
#define DSI_DCS_PPS_LONG_WRITE 0x0A
|
||||
|
||||
#define DSI_DCS_SET_ADDR_MODE 0x36
|
||||
#define DSI_DCS_EXIT_SLEEP_MODE 0x11
|
||||
#define DSI_DCS_ENTER_SLEEP_MODE 0x10
|
||||
#define DSI_DCS_SET_DISPLAY_ON 0x29
|
||||
#define DSI_DCS_SET_DISPLAY_OFF 0x28
|
||||
#define DSI_DCS_SET_TEARING_EFFECT_OFF 0x34
|
||||
#define DSI_DCS_SET_TEARING_EFFECT_ON 0x35
|
||||
#define DSI_DCS_NO_OP 0x0
|
||||
#define DSI_NULL_PKT_NO_DATA 0x9
|
||||
#define DSI_BLANKING_PKT_NO_DATA 0x19
|
||||
#define DSI_DCS_SET_COMPRESSION_METHOD 0xC0
|
||||
|
||||
/* DCS commands for command mode */
|
||||
#define DSI_ENTER_PARTIAL_MODE 0x12
|
||||
#define DSI_SET_PIXEL_FORMAT 0x3A
|
||||
#define DSI_AREA_COLOR_MODE 0x4C
|
||||
#define DSI_SET_PARTIAL_AREA 0x30
|
||||
#define DSI_SET_PAGE_ADDRESS 0x2B
|
||||
#define DSI_SET_ADDRESS_MODE 0x36
|
||||
#define DSI_SET_COLUMN_ADDRESS 0x2A
|
||||
#define DSI_WRITE_MEMORY_START 0x2C
|
||||
#define DSI_WRITE_MEMORY_CONTINUE 0x3C
|
||||
|
||||
#define PKT_ID0(id) ((((id) & 0x3f) << 3) | \
|
||||
(((DSI_ENABLE) & 0x1) << 9))
|
||||
#define PKT_LEN0(len) (((len) & 0x7) << 0)
|
||||
#define PKT_ID1(id) ((((id) & 0x3f) << 13) | \
|
||||
(((DSI_ENABLE) & 0x1) << 19))
|
||||
#define PKT_LEN1(len) (((len) & 0x7) << 10)
|
||||
#define PKT_ID2(id) ((((id) & 0x3f) << 23) | \
|
||||
(((DSI_ENABLE) & 0x1) << 29))
|
||||
#define PKT_LEN2(len) (((len) & 0x7) << 20)
|
||||
#define PKT_ID3(id) ((((id) & 0x3f) << 3) | \
|
||||
(((DSI_ENABLE) & 0x1) << 9))
|
||||
#define PKT_LEN3(len) (((len) & 0x7) << 0)
|
||||
#define PKT_ID4(id) ((((id) & 0x3f) << 13) | \
|
||||
(((DSI_ENABLE) & 0x1) << 19))
|
||||
#define PKT_LEN4(len) (((len) & 0x7) << 10)
|
||||
#define PKT_ID5(id) ((((id) & 0x3f) << 23) | \
|
||||
(((DSI_ENABLE) & 0x1) << 29))
|
||||
#define PKT_LEN5(len) (((len) & 0x7) << 20)
|
||||
#define PKT_LP (((DSI_ENABLE) & 0x1) << 30)
|
||||
#define NUMOF_PKT_SEQ 12
|
||||
|
||||
/* DSI pixel data format, enum values should match with dt-bindings in tegra-panel.h */
|
||||
typedef enum
|
||||
{
|
||||
DSI_PIXEL_FORMAT_16BIT_P,
|
||||
DSI_PIXEL_FORMAT_18BIT_P,
|
||||
DSI_PIXEL_FORMAT_18BIT_NP,
|
||||
DSI_PIXEL_FORMAT_24BIT_P,
|
||||
DSI_PIXEL_FORMAT_8BIT_DSC,
|
||||
DSI_PIXEL_FORMAT_12BIT_DSC,
|
||||
DSI_PIXEL_FORMAT_16BIT_DSC,
|
||||
DSI_PIXEL_FORMAT_10BIT_DSC,
|
||||
DSI_PIXEL_FORMAT_30BIT_P,
|
||||
DSI_PIXEL_FORMAT_36BIT_P,
|
||||
} DSIPIXELFORMAT;
|
||||
|
||||
/* DSI virtual channel number */
|
||||
typedef enum
|
||||
{
|
||||
DSI_VIRTUAL_CHANNEL_0,
|
||||
DSI_VIRTUAL_CHANNEL_1,
|
||||
DSI_VIRTUAL_CHANNEL_2,
|
||||
DSI_VIRTUAL_CHANNEL_3,
|
||||
} DSIVIRTUALCHANNEL;
|
||||
|
||||
/* DSI transmit method for video data */
|
||||
typedef enum
|
||||
{
|
||||
DSI_VIDEO_TYPE_VIDEO_MODE,
|
||||
DSI_VIDEO_TYPE_COMMAND_MODE,
|
||||
} DSIVIDEODATAMODE;
|
||||
|
||||
/* DSI HS clock mode */
|
||||
typedef enum
|
||||
{
|
||||
DSI_VIDEO_CLOCK_CONTINUOUS,
|
||||
DSI_VIDEO_CLOCK_TX_ONLY,
|
||||
} DSICLOCKMODE;
|
||||
|
||||
/* DSI burst mode setting in video mode. Each mode is assigned with a
|
||||
* fixed value. The rationale behind this is to avoid change of these
|
||||
* values, since the calculation of dsi clock depends on them. */
|
||||
typedef enum
|
||||
{
|
||||
DSI_VIDEO_NON_BURST_MODE = 0,
|
||||
DSI_VIDEO_NON_BURST_MODE_WITH_SYNC_END = 1,
|
||||
DSI_VIDEO_BURST_MODE_LOWEST_SPEED = 2,
|
||||
DSI_VIDEO_BURST_MODE_LOW_SPEED = 3,
|
||||
DSI_VIDEO_BURST_MODE_MEDIUM_SPEED = 4,
|
||||
DSI_VIDEO_BURST_MODE_FAST_SPEED = 5,
|
||||
DSI_VIDEO_BURST_MODE_FASTEST_SPEED = 6,
|
||||
} DSIVIDEOBURSTMODE;
|
||||
|
||||
/* DSI Ganged Mode */
|
||||
typedef enum
|
||||
{
|
||||
DSI_GANGED_SYMMETRIC_LEFT_RIGHT = 1,
|
||||
DSI_GANGED_SYMMETRIC_EVEN_ODD = 2,
|
||||
DSI_GANGED_SYMMETRIC_LEFT_RIGHT_OVERLAP = 3,
|
||||
} DSIGANGEDTYPE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
DSI_LINK0,
|
||||
DSI_LINK1,
|
||||
} DSILINKNUM;
|
||||
|
||||
/* DSI Command Packet type */
|
||||
typedef enum
|
||||
{
|
||||
DSI_PACKET_CMD,
|
||||
DSI_DELAY_MS,
|
||||
DSI_GPIO_SET,
|
||||
DSI_SEND_FRAME,
|
||||
DSI_PACKET_VIDEO_VBLANK_CMD,
|
||||
DSI_DELAY_US,
|
||||
} DSICMDPKTTYPE;
|
||||
|
||||
/* DSI Phy type */
|
||||
typedef enum
|
||||
{
|
||||
DSI_DPHY,
|
||||
DSI_CPHY,
|
||||
} DSIPHYTYPE;
|
||||
|
||||
enum {
|
||||
DSI_GPIO_LCD_RESET,
|
||||
DSI_GPIO_PANEL_EN,
|
||||
DSI_GPIO_PANEL_EN_1,
|
||||
DSI_GPIO_BL_ENABLE,
|
||||
DSI_GPIO_BL_PWM,
|
||||
DSI_GPIO_AVDD_AVEE_EN,
|
||||
DSI_GPIO_VDD_1V8_LCD_EN,
|
||||
DSI_GPIO_TE,
|
||||
DSI_GPIO_BRIDGE_EN_0,
|
||||
DSI_GPIO_BRIDGE_EN_1,
|
||||
DSI_GPIO_BRIDGE_REFCLK_EN,
|
||||
DSI_N_GPIO_PANEL, /* add new gpio above this entry */
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
DSI_DISABLE,
|
||||
DSI_ENABLE,
|
||||
};
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU8 cmd_type;
|
||||
NvU8 data_id;
|
||||
union
|
||||
{
|
||||
NvU16 data_len;
|
||||
NvU16 delay_ms;
|
||||
NvU16 delay_us;
|
||||
NvU32 gpio;
|
||||
NvU16 frame_cnt;
|
||||
struct
|
||||
{
|
||||
NvU8 data0;
|
||||
NvU8 data1;
|
||||
} sp;
|
||||
} sp_len_dly;
|
||||
NvU32 *pdata;
|
||||
NvU8 link_id;
|
||||
NvBool club_cmd;
|
||||
} DSI_CMD, *PDSICMD;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU16 t_hsdexit_ns;
|
||||
NvU16 t_hstrail_ns;
|
||||
NvU16 t_datzero_ns;
|
||||
NvU16 t_hsprepare_ns;
|
||||
NvU16 t_hsprebegin_ns;
|
||||
NvU16 t_hspost_ns;
|
||||
|
||||
NvU16 t_clktrail_ns;
|
||||
NvU16 t_clkpost_ns;
|
||||
NvU16 t_clkzero_ns;
|
||||
NvU16 t_tlpx_ns;
|
||||
|
||||
NvU16 t_clkprepare_ns;
|
||||
NvU16 t_clkpre_ns;
|
||||
NvU16 t_wakeup_ns;
|
||||
|
||||
NvU16 t_taget_ns;
|
||||
NvU16 t_tasure_ns;
|
||||
NvU16 t_tago_ns;
|
||||
} DSI_PHY_TIMING_IN_NS;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU32 hActive;
|
||||
NvU32 vActive;
|
||||
NvU32 hFrontPorch;
|
||||
NvU32 vFrontPorch;
|
||||
NvU32 hBackPorch;
|
||||
NvU32 vBackPorch;
|
||||
NvU32 hSyncWidth;
|
||||
NvU32 vSyncWidth;
|
||||
NvU32 hPulsePolarity;
|
||||
NvU32 vPulsePolarity;
|
||||
NvU32 pixelClkRate;
|
||||
} DSITIMINGS, *PDSITIMINGS;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU8 n_data_lanes; /* required */
|
||||
NvU8 pixel_format; /* required */
|
||||
NvU8 refresh_rate; /* required */
|
||||
NvU8 rated_refresh_rate;
|
||||
NvU8 panel_reset; /* required */
|
||||
NvU8 virtual_channel; /* required */
|
||||
NvU8 dsi_instance;
|
||||
NvU16 dsi_panel_rst_gpio;
|
||||
NvU16 dsi_panel_bl_en_gpio;
|
||||
NvU16 dsi_panel_bl_pwm_gpio;
|
||||
NvU16 even_odd_split_width;
|
||||
NvU8 controller_vs;
|
||||
|
||||
NvBool panel_has_frame_buffer; /* required*/
|
||||
|
||||
/* Deprecated. Use DSI_SEND_FRAME panel command instead. */
|
||||
NvBool panel_send_dc_frames;
|
||||
|
||||
DSI_CMD *dsi_init_cmd; /* required */
|
||||
NvU16 n_init_cmd; /* required */
|
||||
NvU32 *dsi_init_cmd_array;
|
||||
NvU32 init_cmd_array_size;
|
||||
NvBool sendInitCmdsEarly;
|
||||
|
||||
DSI_CMD *dsi_early_suspend_cmd;
|
||||
NvU16 n_early_suspend_cmd;
|
||||
NvU32 *dsi_early_suspend_cmd_array;
|
||||
NvU32 early_suspend_cmd_array_size;
|
||||
|
||||
DSI_CMD *dsi_late_resume_cmd;
|
||||
NvU16 n_late_resume_cmd;
|
||||
NvU32 *dsi_late_resume_cmd_array;
|
||||
NvU32 late_resume_cmd_array_size;
|
||||
|
||||
DSI_CMD *dsi_postvideo_cmd;
|
||||
NvU16 n_postvideo_cmd;
|
||||
NvU32 *dsi_postvideo_cmd_array;
|
||||
NvU32 postvideo_cmd_array_size;
|
||||
|
||||
DSI_CMD *dsi_suspend_cmd; /* required */
|
||||
NvU16 n_suspend_cmd; /* required */
|
||||
NvU32 *dsi_suspend_cmd_array;
|
||||
NvU32 suspend_cmd_array_size;
|
||||
|
||||
NvU8 video_data_type; /* required */
|
||||
NvU8 video_clock_mode;
|
||||
NvU8 video_burst_mode;
|
||||
NvU8 ganged_type;
|
||||
NvU16 ganged_overlap;
|
||||
NvBool ganged_swap_links;
|
||||
NvBool ganged_write_to_all_links;
|
||||
NvU8 split_link_type;
|
||||
|
||||
NvU8 suspend_aggr;
|
||||
|
||||
NvU16 panel_buffer_size_byte;
|
||||
NvU16 panel_reset_timeout_msec;
|
||||
|
||||
NvBool hs_cmd_mode_supported;
|
||||
NvBool hs_cmd_mode_on_blank_supported;
|
||||
NvBool enable_hs_clock_on_lp_cmd_mode;
|
||||
NvBool no_pkt_seq_eot; /* 1st generation panel may not
|
||||
* support eot. Don't set it for
|
||||
* most panels.*/
|
||||
const NvU32 *pktSeq;
|
||||
NvU32 *pktSeq_array;
|
||||
NvU32 pktSeq_array_size;
|
||||
NvBool skip_dsi_pkt_header;
|
||||
NvBool power_saving_suspend;
|
||||
NvBool suspend_stop_stream_late;
|
||||
NvBool dsi2lvds_bridge_enable;
|
||||
NvBool dsi2edp_bridge_enable;
|
||||
|
||||
NvU32 max_panel_freq_khz;
|
||||
NvU32 lp_cmd_mode_freq_khz;
|
||||
NvU32 lp_read_cmd_mode_freq_khz;
|
||||
NvU32 hs_clk_in_lp_cmd_mode_freq_khz;
|
||||
NvU32 burst_mode_freq_khz;
|
||||
NvU32 fpga_freq_khz;
|
||||
|
||||
NvU32 te_gpio;
|
||||
NvBool te_polarity_low;
|
||||
NvBool dsiEnVRR;
|
||||
NvBool dsiVrrPanelSupportsTe;
|
||||
NvBool dsiForceSetTePin;
|
||||
|
||||
int panel_gpio[DSI_N_GPIO_PANEL];
|
||||
NvBool panel_gpio_populated;
|
||||
|
||||
NvU32 dpd_dsi_pads;
|
||||
|
||||
DSI_PHY_TIMING_IN_NS phyTimingNs;
|
||||
|
||||
NvU8 *bl_name;
|
||||
|
||||
NvBool lp00_pre_panel_wakeup;
|
||||
NvBool ulpm_not_supported;
|
||||
NvBool use_video_host_fifo_for_cmd;
|
||||
NvBool dsi_csi_loopback;
|
||||
NvBool set_max_timeout;
|
||||
NvBool use_legacy_dphy_core;
|
||||
// Swap P/N pins polarity of all data lanes
|
||||
NvBool swap_data_lane_polarity;
|
||||
// Swap P/N pins polarity of clock lane
|
||||
NvBool swap_clock_lane_polarity;
|
||||
// Reverse clock polarity for partition A/B. 1st SOT bit goes on negedge of Clock lane
|
||||
NvBool reverse_clock_polarity;
|
||||
// DSI Lane Crossbar. Allocating xbar array for max number of lanes
|
||||
NvBool lane_xbar_exists;
|
||||
NvU32 lane_xbar_ctrl[8];
|
||||
NvU32 refresh_rate_adj;
|
||||
|
||||
NvU8 dsiPhyType;
|
||||
NvBool en_data_scrambling;
|
||||
|
||||
NvU32 dsipll_vco_rate_hz;
|
||||
NvU32 dsipll_clkoutpn_rate_hz;
|
||||
NvU32 dsipll_clkouta_rate_hz;
|
||||
NvU32 vpll0_rate_hz;
|
||||
|
||||
DSITIMINGS dsiTimings;
|
||||
|
||||
// DSC Parameters
|
||||
NvBool dsiDscEnable;
|
||||
NvU32 dsiDscBpp;
|
||||
NvU32 dsiDscNumSlices;
|
||||
NvU32 dsiDscSliceWidth;
|
||||
NvU32 dsiDscSliceHeight;
|
||||
NvBool dsiDscEnBlockPrediction;
|
||||
NvBool dsiDscEnDualDsc;
|
||||
NvU32 dsiDscDecoderMajorVersion;
|
||||
NvU32 dsiDscDecoderMinorVersion;
|
||||
NvBool dsiDscUseCustomPPS;
|
||||
NvU32 dsiDscCustomPPSData[32];
|
||||
|
||||
// Driver allocates memory for PPS cmd to be sent to Panel
|
||||
NvBool ppsCmdMemAllocated;
|
||||
} DSI_PANEL_INFO;
|
||||
|
||||
#endif
|
||||
32
kernel-open/common/inc/os_gpio.h
Normal file
32
kernel-open/common/inc/os_gpio.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef _OS_GPIO_H_
|
||||
#define _OS_GPIO_H_
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_OS_GPIO_FUNC_HOTPLUG_A,
|
||||
NV_OS_GPIO_FUNC_HOTPLUG_B,
|
||||
} NV_OS_GPIO_FUNC_NAMES;
|
||||
|
||||
#endif
|
||||
@@ -81,9 +81,9 @@ NV_STATUS NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_own_access_cntr_intr(nvidia_stack_t *, nvgpuSessionHandle_t, nvgpuAccessCntrInfo_t, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, nvgpuAccessCntrConfig_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, const nvgpuAccessCntrConfig_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_disable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32, NvU64 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_unset_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_nvlink_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuNvlinkInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_p2p_object_create(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuDeviceHandle_t, NvHandle *);
|
||||
|
||||
@@ -662,27 +662,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PCI_GET_DOMAIN_BUS_AND_SLOT_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
hash__remap_4k_pfn)
|
||||
#
|
||||
# Determine if the hash__remap_4k_pfn() function is
|
||||
# present.
|
||||
#
|
||||
# Added by commit 6cc1a0ee4ce2 ("powerpc/mm/radix: Add radix
|
||||
# callback for pmd accessors") in v4.7 (committed 2016-04-29).
|
||||
# Present only in arch/powerpc
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_ASM_BOOK3S_64_HASH_64K_H_PRESENT)
|
||||
#include <linux/mm.h>
|
||||
#include <asm/book3s/64/hash-64k.h>
|
||||
#endif
|
||||
void conftest_hash__remap_4k_pfn(void) {
|
||||
hash__remap_4k_pfn();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_HASH__REMAP_4K_PFN_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
register_cpu_notifier)
|
||||
#
|
||||
# Determine if register_cpu_notifier() is present
|
||||
@@ -1633,7 +1612,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PHYS_TO_DMA_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
|
||||
dma_attr_macros)
|
||||
#
|
||||
# Determine if the NV_DMA_ATTR_SKIP_CPU_SYNC_PRESENT macro present.
|
||||
@@ -2441,6 +2419,45 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_ATOMIC_HELPER_LEGACY_GAMMA_SET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_plane_create_color_properties)
|
||||
#
|
||||
# Determine if the function drm_plane_create_color_properties() is
|
||||
# present.
|
||||
#
|
||||
# Added by commit 80f690e9e3a6 ("drm: Add optional COLOR_ENCODING
|
||||
# and COLOR_RANGE properties to drm_plane") in v4.17 (2018-02-19).
|
||||
#
|
||||
CODE="
|
||||
#include <linux/types.h>
|
||||
#if defined(NV_DRM_DRM_COLOR_MGMT_H_PRESENT)
|
||||
#include <drm/drm_color_mgmt.h>
|
||||
#endif
|
||||
void conftest_drm_plane_create_color_properties(void) {
|
||||
drm_plane_create_color_properties();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_format_info_has_is_yuv)
|
||||
#
|
||||
# Determine if struct drm_format_info has .is_yuv member.
|
||||
#
|
||||
# Added by commit ce2d54619a10 ("drm/fourcc: Add is_yuv field to
|
||||
# drm_format_info to denote if format is yuv") in v4.19
|
||||
# (2018-07-17).
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_FOURCC_H_PRESENT)
|
||||
#include <drm/drm_fourcc.h>
|
||||
#endif
|
||||
int conftest_drm_format_info_has_is_yuv(void) {
|
||||
return offsetof(struct drm_format_info, is_yuv);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_FORMAT_INFO_HAS_IS_YUV" "" "types"
|
||||
;;
|
||||
|
||||
pci_stop_and_remove_bus_device)
|
||||
#
|
||||
# Determine if the pci_stop_and_remove_bus_device() function is present.
|
||||
@@ -3519,60 +3536,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_VM_OPS_FAULT_REMOVED_VMA_ARG" "" "types"
|
||||
;;
|
||||
|
||||
pnv_npu2_init_context)
|
||||
#
|
||||
# Determine if the pnv_npu2_init_context() function is
|
||||
# present and the signature of its callback.
|
||||
#
|
||||
# Added by commit 1ab66d1fbada ("powerpc/powernv: Introduce
|
||||
# address translation services for Nvlink2") in v4.12
|
||||
# (2017-04-03).
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_ASM_POWERNV_H_PRESENT)
|
||||
#include <linux/pci.h>
|
||||
#include <asm/powernv.h>
|
||||
#endif
|
||||
void conftest_pnv_npu2_init_context(void) {
|
||||
pnv_npu2_init_context();
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
if [ -f conftest$$.o ]; then
|
||||
echo "#undef NV_PNV_NPU2_INIT_CONTEXT_PRESENT" | append_conftest "functions"
|
||||
echo "#undef NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID" | append_conftest "functions"
|
||||
rm -f conftest$$.o
|
||||
return
|
||||
fi
|
||||
|
||||
echo "#define NV_PNV_NPU2_INIT_CONTEXT_PRESENT" | append_conftest "functions"
|
||||
|
||||
# Check the callback signature
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_ASM_POWERNV_H_PRESENT)
|
||||
#include <linux/pci.h>
|
||||
#include <asm/powernv.h>
|
||||
#endif
|
||||
|
||||
struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
|
||||
unsigned long flags,
|
||||
void (*cb)(struct npu_context *, void *),
|
||||
void *priv) {
|
||||
return NULL;
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
if [ -f conftest$$.o ]; then
|
||||
echo "#define NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID" | append_conftest "functions"
|
||||
rm -f conftest$$.o
|
||||
return
|
||||
fi
|
||||
|
||||
echo "#undef NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID" | append_conftest "functions"
|
||||
;;
|
||||
|
||||
of_get_ibm_chip_id)
|
||||
#
|
||||
# Determine if the of_get_ibm_chip_id() function is present.
|
||||
@@ -5289,6 +5252,45 @@ compile_test() {
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FOLLOW_PFN_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
follow_pte_arg_vma)
|
||||
#
|
||||
# Determine if the first argument of follow_pte is
|
||||
# mm_struct or vm_area_struct.
|
||||
#
|
||||
# The first argument was changed from mm_struct to vm_area_struct by
|
||||
# commit 29ae7d96d166 ("mm: pass VMA instead of MM to follow_pte()")
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
|
||||
typeof(follow_pte) conftest_follow_pte_has_vma_arg;
|
||||
int conftest_follow_pte_has_vma_arg(struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
pte_t **ptep,
|
||||
spinlock_t **ptl) {
|
||||
return 0;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FOLLOW_PTE_ARG1_VMA" "" "types"
|
||||
;;
|
||||
|
||||
ptep_get)
|
||||
#
|
||||
# Determine if ptep_get() is present.
|
||||
#
|
||||
# ptep_get() was added by commit 481e980a7c19
|
||||
# ("mm: Allow arches to provide ptep_get()")
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
void conftest_ptep_get(void) {
|
||||
ptep_get();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PTEP_GET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_plane_atomic_check_has_atomic_state_arg)
|
||||
#
|
||||
# Determine if drm_plane_helper_funcs::atomic_check takes 'state'
|
||||
@@ -5478,6 +5480,31 @@ compile_test() {
|
||||
fi
|
||||
;;
|
||||
|
||||
of_property_for_each_u32_has_internal_args)
|
||||
#
|
||||
# Determine if the internal arguments for the macro
|
||||
# of_property_for_each_u32() are present.
|
||||
#
|
||||
# Commit 9722c3b66e21 ("of: remove internal arguments from
|
||||
# of_property_for_each_u32()") removes two arguments from
|
||||
# of_property_for_each_u32() which are used internally within
|
||||
# the macro and so do not need to be passed. This change was
|
||||
# made for Linux v6.11.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/of.h>
|
||||
void conftest_of_property_for_each_u32(struct device_node *np,
|
||||
char *propname) {
|
||||
struct property *iparam1;
|
||||
const __be32 *iparam2;
|
||||
u32 val;
|
||||
|
||||
of_property_for_each_u32(np, propname, iparam1, iparam2, val);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_OF_PROPERTY_FOR_EACH_U32_HAS_INTERNAL_ARGS" "" "types"
|
||||
;;
|
||||
|
||||
of_property_read_variable_u8_array)
|
||||
#
|
||||
# Determine if of_property_read_variable_u8_array is present
|
||||
@@ -5574,8 +5601,8 @@ compile_test() {
|
||||
|
||||
of_dma_configure)
|
||||
#
|
||||
# Determine if of_dma_configure() function is present, and how
|
||||
# many arguments it takes.
|
||||
# Determine if of_dma_configure() function is present, if it
|
||||
# returns int, and how many arguments it takes.
|
||||
#
|
||||
# Added by commit 591c1ee465ce ("of: configure the platform
|
||||
# device dma parameters") in v3.16. However, it was a static,
|
||||
@@ -5585,6 +5612,10 @@ compile_test() {
|
||||
# commit 1f5c69aa51f9 ("of: Move of_dma_configure() to device.c
|
||||
# to help re-use") in v4.1.
|
||||
#
|
||||
# Its return type was changed from void to int by commit
|
||||
# 7b07cbefb68d ("iommu: of: Handle IOMMU lookup failure with
|
||||
# deferred probing or error") in v4.12.
|
||||
#
|
||||
# It subsequently began taking a third parameter with commit
|
||||
# 3d6ce86ee794 ("drivers: remove force dma flag from buses")
|
||||
# in v4.18.
|
||||
@@ -5609,6 +5640,7 @@ compile_test() {
|
||||
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT" | append_conftest "functions"
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
|
||||
else
|
||||
echo "#define NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
|
||||
|
||||
@@ -5627,6 +5659,26 @@ compile_test() {
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 3" | append_conftest "functions"
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
|
||||
#include <linux/of_device.h>
|
||||
#endif
|
||||
|
||||
int conftest_of_dma_configure_has_int_return_type(void) {
|
||||
return of_dma_configure(NULL, NULL, false);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
|
||||
else
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
|
||||
fi
|
||||
|
||||
return
|
||||
fi
|
||||
|
||||
@@ -5645,6 +5697,26 @@ compile_test() {
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 2" | append_conftest "functions"
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
|
||||
#include <linux/of_device.h>
|
||||
#endif
|
||||
|
||||
int conftest_of_dma_configure_has_int_return_type(void) {
|
||||
return of_dma_configure(NULL, NULL);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
|
||||
else
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
|
||||
fi
|
||||
|
||||
return
|
||||
fi
|
||||
fi
|
||||
@@ -7523,6 +7595,34 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_FOLIO_TEST_SWAPCACHE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
platform_driver_struct_remove_returns_void)
|
||||
#
|
||||
# Determine if the 'platform_driver' structure 'remove' function
|
||||
# pointer returns void.
|
||||
#
|
||||
# Commit 0edb555a65d1 ("platform: Make platform_driver::remove()
|
||||
# return void") updated the platform_driver structure 'remove'
|
||||
# callback to return void instead of int in Linux v6.11-rc1.
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/platform_device.h>
|
||||
int conftest_platform_driver_struct_remove_returns_void(struct platform_device *pdev,
|
||||
struct platform_driver *driver) {
|
||||
return driver->remove(pdev);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
|
||||
echo "#undef NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" | append_conftest "types"
|
||||
else
|
||||
echo "#define NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" | append_conftest "types"
|
||||
fi
|
||||
;;
|
||||
|
||||
module_import_ns_takes_constant)
|
||||
#
|
||||
# Determine if the MODULE_IMPORT_NS macro takes a string literal
|
||||
@@ -7540,6 +7640,62 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_MODULE_IMPORT_NS_TAKES_CONSTANT" "" "generic"
|
||||
;;
|
||||
|
||||
assign_str)
|
||||
#
|
||||
# Determine whether the __assign_str() macro, used in tracepoint
|
||||
# event definitions, has the 'src' parameter.
|
||||
#
|
||||
# The 'src' parameter was removed by commit 2c92ca849fcc
|
||||
# ("tracing/treewide: Remove second parameter of __assign_str()") in
|
||||
# v6.10.
|
||||
#
|
||||
# The expected usage of __assign_str() inside the TRACE_EVENT()
|
||||
# macro, which involves multiple include passes and assumes it is
|
||||
# in a header file, requires a non-standard conftest approach of
|
||||
# producing both a header and a C file.
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM conftest
|
||||
|
||||
#if !defined(_TRACE_CONFTEST_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_CONFTEST_H
|
||||
#include <linux/tracepoint.h>
|
||||
TRACE_EVENT(conftest,
|
||||
TP_PROTO(const char *s),
|
||||
TP_ARGS(s),
|
||||
TP_STRUCT__entry(__string(s, s)),
|
||||
TP_fast_assign(__assign_str(s);),
|
||||
TP_printk(\"%s\", __get_str(s))
|
||||
);
|
||||
#endif
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_INCLUDE_PATH .
|
||||
#define TRACE_INCLUDE_FILE conftest$$
|
||||
#include <trace/define_trace.h>
|
||||
" > conftest$$.h
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include \"conftest$$.h\"
|
||||
|
||||
void conftest_assign_str(void) {
|
||||
trace_conftest(\"conftest\");
|
||||
}
|
||||
" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c >/dev/null 2>&1
|
||||
rm -f conftest$$.c conftest$$.h
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
|
||||
echo "#define NV_ASSIGN_STR_ARGUMENT_COUNT 1" | append_conftest "functions"
|
||||
else
|
||||
echo "#define NV_ASSIGN_STR_ARGUMENT_COUNT 2" | append_conftest "functions"
|
||||
fi
|
||||
;;
|
||||
|
||||
drm_driver_has_date)
|
||||
#
|
||||
@@ -7565,6 +7721,33 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_DRIVER_HAS_DATE" "" "types"
|
||||
;;
|
||||
|
||||
drm_connector_helper_funcs_mode_valid_has_const_mode_arg)
|
||||
#
|
||||
# Determine if the 'mode' pointer argument is const in
|
||||
# drm_connector_helper_funcs::mode_valid.
|
||||
#
|
||||
# The 'mode' pointer argument in
|
||||
# drm_connector_helper_funcs::mode_valid was made const by commit
|
||||
# 26d6fd81916e ("drm/connector: make mode_valid take a const struct
|
||||
# drm_display_mode") in linux-next, expected in v6.15.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_ATOMIC_HELPER_H_PRESENT)
|
||||
#include <drm/drm_atomic_helper.h>
|
||||
#endif
|
||||
|
||||
static int conftest_drm_connector_mode_valid(struct drm_connector *connector,
|
||||
const struct drm_display_mode *mode) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct drm_connector_helper_funcs conftest_drm_connector_helper_funcs = {
|
||||
.mode_valid = conftest_drm_connector_mode_valid,
|
||||
};"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG" "" "types"
|
||||
;;
|
||||
|
||||
# When adding a new conftest entry, please use the correct format for
|
||||
# specifying the relevant upstream Linux kernel commit. Please
|
||||
# avoid specifying -rc kernels, and only use SHAs that actually exist
|
||||
|
||||
@@ -14,6 +14,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_encoder.h \
|
||||
drm/drm_atomic_uapi.h \
|
||||
drm/drm_drv.h \
|
||||
drm/drm_edid.h \
|
||||
drm/drm_fbdev_generic.h \
|
||||
drm/drm_fbdev_ttm.h \
|
||||
drm/drm_client_setup.h \
|
||||
@@ -65,13 +66,10 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/nvhost.h \
|
||||
linux/nvhost_t194.h \
|
||||
linux/host1x-next.h \
|
||||
asm/book3s/64/hash-64k.h \
|
||||
asm/set_memory.h \
|
||||
asm/prom.h \
|
||||
asm/powernv.h \
|
||||
linux/atomic.h \
|
||||
asm/barrier.h \
|
||||
asm/opal-api.h \
|
||||
sound/hdaudio.h \
|
||||
asm/pgtable_types.h \
|
||||
asm/page.h \
|
||||
|
||||
@@ -62,6 +62,20 @@
|
||||
#undef NV_DRM_FENCE_AVAILABLE
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) && \
|
||||
defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_FBDEV_GENERIC_AVAILABLE
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_TTM_SETUP_PRESENT) && \
|
||||
defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
#if IS_ENABLED(CONFIG_DRM_TTM_HELPER)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_FBDEV_TTM_AVAILABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_CLIENT_SETUP_PRESENT) && \
|
||||
(defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT) || \
|
||||
defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT))
|
||||
|
||||
@@ -314,7 +314,11 @@ static int nv_drm_connector_get_modes(struct drm_connector *connector)
|
||||
}
|
||||
|
||||
static int nv_drm_connector_mode_valid(struct drm_connector *connector,
|
||||
#if defined(NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG)
|
||||
const struct drm_display_mode *mode)
|
||||
#else
|
||||
struct drm_display_mode *mode)
|
||||
#endif
|
||||
{
|
||||
struct drm_device *dev = connector->dev;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
|
||||
@@ -372,23 +372,88 @@ cursor_plane_req_config_update(struct drm_plane *plane,
|
||||
old_config.dstY != req_config->dstY;
|
||||
}
|
||||
|
||||
static void free_drm_lut_surface(struct kref *ref)
|
||||
static void release_drm_nvkms_surface(struct nv_drm_nvkms_surface *drm_nvkms_surface)
|
||||
{
|
||||
struct nv_drm_lut_surface *drm_lut_surface =
|
||||
container_of(ref, struct nv_drm_lut_surface, refcount);
|
||||
struct NvKmsKapiDevice *pDevice = drm_lut_surface->pDevice;
|
||||
struct NvKmsKapiDevice *pDevice = drm_nvkms_surface->pDevice;
|
||||
|
||||
BUG_ON(drm_lut_surface->nvkms_surface == NULL);
|
||||
BUG_ON(drm_lut_surface->nvkms_memory == NULL);
|
||||
BUG_ON(drm_lut_surface->buffer == NULL);
|
||||
BUG_ON(drm_nvkms_surface->nvkms_surface == NULL);
|
||||
BUG_ON(drm_nvkms_surface->nvkms_memory == NULL);
|
||||
BUG_ON(drm_nvkms_surface->buffer == NULL);
|
||||
|
||||
nvKms->destroySurface(pDevice, drm_lut_surface->nvkms_surface);
|
||||
nvKms->unmapMemory(pDevice, drm_lut_surface->nvkms_memory,
|
||||
nvKms->destroySurface(pDevice, drm_nvkms_surface->nvkms_surface);
|
||||
nvKms->unmapMemory(pDevice, drm_nvkms_surface->nvkms_memory,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
|
||||
drm_lut_surface->buffer);
|
||||
nvKms->freeMemory(pDevice, drm_lut_surface->nvkms_memory);
|
||||
drm_nvkms_surface->buffer);
|
||||
nvKms->freeMemory(pDevice, drm_nvkms_surface->nvkms_memory);
|
||||
}
|
||||
|
||||
nv_drm_free(drm_lut_surface);
|
||||
static int init_drm_nvkms_surface(struct nv_drm_device *nv_dev,
|
||||
struct nv_drm_nvkms_surface *drm_nvkms_surface,
|
||||
struct nv_drm_nvkms_surface_params *surface_params)
|
||||
{
|
||||
struct NvKmsKapiDevice *pDevice = nv_dev->pDevice;
|
||||
NvU8 compressible = 0; // No compression
|
||||
|
||||
struct NvKmsKapiCreateSurfaceParams params = {};
|
||||
struct NvKmsKapiMemory *surface_mem;
|
||||
struct NvKmsKapiSurface *surface;
|
||||
void *buffer;
|
||||
|
||||
params.format = surface_params->format;
|
||||
params.width = surface_params->width;
|
||||
params.height = surface_params->height;
|
||||
|
||||
/* Allocate displayable memory. */
|
||||
if (nv_dev->hasVideoMemory) {
|
||||
surface_mem =
|
||||
nvKms->allocateVideoMemory(pDevice,
|
||||
NvKmsSurfaceMemoryLayoutPitch,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
|
||||
surface_params->surface_size,
|
||||
&compressible);
|
||||
} else {
|
||||
surface_mem =
|
||||
nvKms->allocateSystemMemory(pDevice,
|
||||
NvKmsSurfaceMemoryLayoutPitch,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
|
||||
surface_params->surface_size,
|
||||
&compressible);
|
||||
}
|
||||
if (surface_mem == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Map memory in order to populate it. */
|
||||
if (!nvKms->mapMemory(pDevice, surface_mem,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
|
||||
&buffer)) {
|
||||
nvKms->freeMemory(pDevice, surface_mem);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
params.planes[0].memory = surface_mem;
|
||||
params.planes[0].offset = 0;
|
||||
params.planes[0].pitch = surface_params->surface_size;
|
||||
|
||||
/* Create surface. */
|
||||
surface = nvKms->createSurface(pDevice, ¶ms);
|
||||
if (surface == NULL) {
|
||||
nvKms->unmapMemory(pDevice, surface_mem,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL, buffer);
|
||||
nvKms->freeMemory(pDevice, surface_mem);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Pack into struct nv_drm_nvkms_surface. */
|
||||
drm_nvkms_surface->pDevice = pDevice;
|
||||
drm_nvkms_surface->nvkms_memory = surface_mem;
|
||||
drm_nvkms_surface->nvkms_surface = surface;
|
||||
drm_nvkms_surface->buffer = buffer;
|
||||
|
||||
/* Init refcount. */
|
||||
kref_init(&drm_nvkms_surface->refcount);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct nv_drm_lut_surface *alloc_drm_lut_surface(
|
||||
@@ -399,86 +464,49 @@ static struct nv_drm_lut_surface *alloc_drm_lut_surface(
|
||||
NvU32 num_vss_header_entries,
|
||||
NvU32 num_entries)
|
||||
{
|
||||
struct NvKmsKapiDevice *pDevice = nv_dev->pDevice;
|
||||
struct nv_drm_lut_surface *drm_lut_surface;
|
||||
NvU8 compressible = 0; // No compression
|
||||
size_t size =
|
||||
const size_t surface_size =
|
||||
(((num_vss_header_entries + num_entries) *
|
||||
NVKMS_LUT_CAPS_LUT_ENTRY_SIZE) + 255) & ~255; // 256-byte aligned
|
||||
|
||||
struct NvKmsKapiMemory *surface_mem;
|
||||
struct NvKmsKapiSurface *surface;
|
||||
struct NvKmsKapiCreateSurfaceParams params = {};
|
||||
NvU16 *lut_data;
|
||||
struct nv_drm_nvkms_surface_params params = {};
|
||||
|
||||
/* Allocate displayable memory. */
|
||||
if (nv_dev->hasVideoMemory) {
|
||||
surface_mem =
|
||||
nvKms->allocateVideoMemory(pDevice,
|
||||
NvKmsSurfaceMemoryLayoutPitch,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
|
||||
size,
|
||||
&compressible);
|
||||
} else {
|
||||
surface_mem =
|
||||
nvKms->allocateSystemMemory(pDevice,
|
||||
NvKmsSurfaceMemoryLayoutPitch,
|
||||
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
|
||||
size,
|
||||
&compressible);
|
||||
}
|
||||
if (surface_mem == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Map memory in order to populate it. */
|
||||
if (!nvKms->mapMemory(pDevice, surface_mem,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
|
||||
(void **) &lut_data)) {
|
||||
nvKms->freeMemory(pDevice, surface_mem);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Create surface. */
|
||||
params.format = NvKmsSurfaceMemoryFormatR16G16B16A16;
|
||||
params.width = num_vss_header_entries + num_entries;
|
||||
params.height = 1;
|
||||
params.planes[0].memory = surface_mem;
|
||||
params.planes[0].offset = 0;
|
||||
params.planes[0].pitch = size;
|
||||
params.surface_size = surface_size;
|
||||
|
||||
surface = nvKms->createSurface(pDevice, ¶ms);
|
||||
if (surface == NULL) {
|
||||
nvKms->unmapMemory(pDevice, surface_mem,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL, (void *) lut_data);
|
||||
nvKms->freeMemory(pDevice, surface_mem);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Pack into struct nv_drm_lut_surface. */
|
||||
drm_lut_surface = nv_drm_calloc(1, sizeof(struct nv_drm_lut_surface));
|
||||
if (drm_lut_surface == NULL) {
|
||||
nvKms->destroySurface(pDevice, surface);
|
||||
nvKms->unmapMemory(pDevice, surface_mem,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL, (void *) lut_data);
|
||||
nvKms->freeMemory(pDevice, surface_mem);
|
||||
return NULL;
|
||||
}
|
||||
drm_lut_surface->pDevice = pDevice;
|
||||
drm_lut_surface->nvkms_memory = surface_mem;
|
||||
drm_lut_surface->nvkms_surface = surface;
|
||||
drm_lut_surface->buffer = lut_data;
|
||||
|
||||
if (init_drm_nvkms_surface(nv_dev, &drm_lut_surface->base, ¶ms) != 0) {
|
||||
nv_drm_free(drm_lut_surface);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
drm_lut_surface->properties.vssSegments = num_vss_header_segments;
|
||||
drm_lut_surface->properties.vssType = vss_type;
|
||||
drm_lut_surface->properties.lutEntries = num_entries;
|
||||
drm_lut_surface->properties.entryFormat = entry_format;
|
||||
|
||||
/* Init refcount. */
|
||||
kref_init(&drm_lut_surface->refcount);
|
||||
|
||||
return drm_lut_surface;
|
||||
}
|
||||
|
||||
static void free_drm_lut_surface(struct kref *ref)
|
||||
{
|
||||
struct nv_drm_nvkms_surface *drm_nvkms_surface =
|
||||
container_of(ref, struct nv_drm_nvkms_surface, refcount);
|
||||
struct nv_drm_lut_surface *drm_lut_surface =
|
||||
container_of(drm_nvkms_surface, struct nv_drm_lut_surface, base);
|
||||
|
||||
// Clean up base
|
||||
release_drm_nvkms_surface(drm_nvkms_surface);
|
||||
|
||||
nv_drm_free(drm_lut_surface);
|
||||
}
|
||||
|
||||
static NvU32 fp32_lut_interp(
|
||||
NvU16 entry0,
|
||||
NvU16 entry1,
|
||||
@@ -582,7 +610,7 @@ static struct nv_drm_lut_surface *create_drm_ilut_surface_vss(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lut_data = (NvU16 *) drm_lut_surface->buffer;
|
||||
lut_data = (NvU16 *) drm_lut_surface->base.buffer;
|
||||
|
||||
/* Calculate VSS header. */
|
||||
if (vss_header_seg_sizes != NULL) {
|
||||
@@ -733,7 +761,7 @@ static struct nv_drm_lut_surface *create_drm_ilut_surface_legacy(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lut_data = (NvU16 *) drm_lut_surface->buffer;
|
||||
lut_data = (NvU16 *) drm_lut_surface->base.buffer;
|
||||
|
||||
/* Fill LUT surface. */
|
||||
for (entry_idx = 0; entry_idx < NVKMS_LUT_ARRAY_SIZE; entry_idx++) {
|
||||
@@ -799,7 +827,7 @@ static struct nv_drm_lut_surface *create_drm_tmo_surface(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lut_data = (NvU16 *) drm_lut_surface->buffer;
|
||||
lut_data = (NvU16 *) drm_lut_surface->base.buffer;
|
||||
|
||||
/* Calculate linear VSS header. */
|
||||
for (entry_idx = 0; entry_idx < NUM_VSS_HEADER_ENTRIES; entry_idx++) {
|
||||
@@ -901,7 +929,7 @@ static struct nv_drm_lut_surface *create_drm_olut_surface_vss(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lut_data = (NvU16 *) drm_lut_surface->buffer;
|
||||
lut_data = (NvU16 *) drm_lut_surface->base.buffer;
|
||||
|
||||
/* Calculate VSS header. */
|
||||
if (vss_header_seg_sizes != NULL) {
|
||||
@@ -1021,7 +1049,7 @@ static struct nv_drm_lut_surface *create_drm_olut_surface_legacy(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lut_data = (NvU16 *) drm_lut_surface->buffer;
|
||||
lut_data = (NvU16 *) drm_lut_surface->base.buffer;
|
||||
|
||||
/* Fill LUT surface. */
|
||||
for (entry_idx = 0; entry_idx < NVKMS_LUT_ARRAY_SIZE; entry_idx++) {
|
||||
@@ -1057,6 +1085,74 @@ update_matrix_override(struct drm_property_blob *blob,
|
||||
return enabled;
|
||||
}
|
||||
|
||||
static enum NvKmsInputColorSpace nv_get_nvkms_input_colorspace(
|
||||
enum nv_drm_input_color_space colorSpace)
|
||||
{
|
||||
switch (colorSpace) {
|
||||
case NV_DRM_INPUT_COLOR_SPACE_NONE:
|
||||
return NVKMS_INPUT_COLOR_SPACE_NONE;
|
||||
case NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR:
|
||||
return NVKMS_INPUT_COLOR_SPACE_BT709;
|
||||
case NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ:
|
||||
return NVKMS_INPUT_COLOR_SPACE_BT2100;
|
||||
default:
|
||||
/* We shouldn't hit this */
|
||||
WARN_ON("Unsupported input colorspace");
|
||||
return NVKMS_INPUT_COLOR_SPACE_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
static enum NvKmsInputTf nv_get_nvkms_input_tf(
|
||||
enum nv_drm_input_color_space colorSpace)
|
||||
{
|
||||
switch (colorSpace) {
|
||||
case NV_DRM_INPUT_COLOR_SPACE_NONE:
|
||||
return NVKMS_INPUT_TF_LINEAR;
|
||||
case NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR:
|
||||
return NVKMS_INPUT_TF_LINEAR;
|
||||
case NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ:
|
||||
return NVKMS_INPUT_TF_PQ;
|
||||
default:
|
||||
/* We shouldn't hit this */
|
||||
WARN_ON("Unsupported input colorspace");
|
||||
return NVKMS_INPUT_TF_LINEAR;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
|
||||
static enum NvKmsInputColorSpace nv_drm_color_encoding_to_nvkms_colorspace(
|
||||
enum drm_color_encoding color_encoding)
|
||||
{
|
||||
switch(color_encoding) {
|
||||
case DRM_COLOR_YCBCR_BT601:
|
||||
return NVKMS_INPUT_COLOR_SPACE_BT601;
|
||||
case DRM_COLOR_YCBCR_BT709:
|
||||
return NVKMS_INPUT_COLOR_SPACE_BT709;
|
||||
case DRM_COLOR_YCBCR_BT2020:
|
||||
return NVKMS_INPUT_COLOR_SPACE_BT2020;
|
||||
default:
|
||||
/* We shouldn't hit this */
|
||||
WARN_ON("Unsupported DRM color_encoding");
|
||||
return NVKMS_INPUT_COLOR_SPACE_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
static enum NvKmsInputColorRange nv_drm_color_range_to_nvkms_color_range(
|
||||
enum drm_color_range color_range)
|
||||
{
|
||||
switch(color_range) {
|
||||
case DRM_COLOR_YCBCR_FULL_RANGE:
|
||||
return NVKMS_INPUT_COLOR_RANGE_FULL;
|
||||
case DRM_COLOR_YCBCR_LIMITED_RANGE:
|
||||
return NVKMS_INPUT_COLOR_RANGE_LIMITED;
|
||||
default:
|
||||
/* We shouldn't hit this */
|
||||
WARN_ON("Unsupported DRM color_range");
|
||||
return NVKMS_INPUT_COLOR_RANGE_DEFAULT;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
plane_req_config_update(struct drm_plane *plane,
|
||||
struct drm_plane_state *plane_state,
|
||||
@@ -1190,8 +1286,37 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
nv_plane->defaultCompositionMode;
|
||||
#endif
|
||||
|
||||
req_config->config.inputColorSpace =
|
||||
nv_drm_plane_state->input_colorspace;
|
||||
#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
|
||||
if ((nv_drm_plane_state->input_colorspace == NV_DRM_INPUT_COLOR_SPACE_NONE) &&
|
||||
nv_drm_format_is_yuv(plane_state->fb->format->format)) {
|
||||
|
||||
if (nv_plane->supportsColorProperties) {
|
||||
req_config->config.inputColorSpace =
|
||||
nv_drm_color_encoding_to_nvkms_colorspace(plane_state->color_encoding);
|
||||
req_config->config.inputColorRange =
|
||||
nv_drm_color_range_to_nvkms_color_range(plane_state->color_range);
|
||||
} else {
|
||||
req_config->config.inputColorSpace = NVKMS_INPUT_COLOR_SPACE_NONE;
|
||||
req_config->config.inputColorRange = NVKMS_INPUT_COLOR_RANGE_DEFAULT;
|
||||
}
|
||||
req_config->config.inputTf = NVKMS_INPUT_TF_LINEAR;
|
||||
} else {
|
||||
#endif
|
||||
req_config->config.inputColorSpace =
|
||||
nv_get_nvkms_input_colorspace(nv_drm_plane_state->input_colorspace);
|
||||
req_config->config.inputColorRange = NVKMS_INPUT_COLOR_RANGE_DEFAULT;
|
||||
req_config->config.inputTf =
|
||||
nv_get_nvkms_input_tf(nv_drm_plane_state->input_colorspace);
|
||||
#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
|
||||
}
|
||||
#endif
|
||||
|
||||
req_config->flags.inputTfChanged =
|
||||
(old_config.inputTf != req_config->config.inputTf);
|
||||
req_config->flags.inputColorSpaceChanged =
|
||||
(old_config.inputColorSpace != req_config->config.inputColorSpace);
|
||||
req_config->flags.inputColorRangeChanged =
|
||||
(old_config.inputColorRange != req_config->config.inputColorRange);
|
||||
|
||||
req_config->config.syncParams.preSyncptSpecified = false;
|
||||
req_config->config.syncParams.postSyncptRequested = false;
|
||||
@@ -1240,10 +1365,10 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
|
||||
switch (info_frame->eotf) {
|
||||
case HDMI_EOTF_SMPTE_ST2084:
|
||||
req_config->config.tf = NVKMS_OUTPUT_TF_PQ;
|
||||
req_config->config.outputTf = NVKMS_OUTPUT_TF_PQ;
|
||||
break;
|
||||
case HDMI_EOTF_TRADITIONAL_GAMMA_SDR:
|
||||
req_config->config.tf =
|
||||
req_config->config.outputTf =
|
||||
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR;
|
||||
break;
|
||||
default:
|
||||
@@ -1254,7 +1379,7 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
req_config->config.hdrMetadata.enabled = true;
|
||||
} else {
|
||||
req_config->config.hdrMetadata.enabled = false;
|
||||
req_config->config.tf = NVKMS_OUTPUT_TF_NONE;
|
||||
req_config->config.outputTf = NVKMS_OUTPUT_TF_NONE;
|
||||
}
|
||||
|
||||
req_config->flags.hdrMetadataChanged =
|
||||
@@ -1264,7 +1389,7 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
&req_config->config.hdrMetadata.val,
|
||||
sizeof(struct NvKmsHDRStaticMetadata)));
|
||||
|
||||
req_config->flags.tfChanged = (old_config.tf != req_config->config.tf);
|
||||
req_config->flags.outputTfChanged = (old_config.outputTf != req_config->config.outputTf);
|
||||
#endif
|
||||
|
||||
req_config->config.matrixOverrides.enabled.lmsCtm =
|
||||
@@ -1295,7 +1420,7 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
|
||||
if (nv_drm_plane_state->degamma_changed) {
|
||||
if (nv_drm_plane_state->degamma_drm_lut_surface != NULL) {
|
||||
kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->refcount,
|
||||
kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->base.refcount,
|
||||
free_drm_lut_surface);
|
||||
nv_drm_plane_state->degamma_drm_lut_surface = NULL;
|
||||
}
|
||||
@@ -1327,7 +1452,7 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
if (nv_drm_plane_state->degamma_drm_lut_surface != NULL) {
|
||||
req_config->config.ilut.enabled = NV_TRUE;
|
||||
req_config->config.ilut.lutSurface =
|
||||
nv_drm_plane_state->degamma_drm_lut_surface->nvkms_surface;
|
||||
nv_drm_plane_state->degamma_drm_lut_surface->base.nvkms_surface;
|
||||
req_config->config.ilut.offset = 0;
|
||||
req_config->config.ilut.vssSegments =
|
||||
nv_drm_plane_state->degamma_drm_lut_surface->properties.vssSegments;
|
||||
@@ -1346,7 +1471,7 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
|
||||
if (nv_drm_plane_state->tmo_changed) {
|
||||
if (nv_drm_plane_state->tmo_drm_lut_surface != NULL) {
|
||||
kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->refcount,
|
||||
kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->base.refcount,
|
||||
free_drm_lut_surface);
|
||||
nv_drm_plane_state->tmo_drm_lut_surface = NULL;
|
||||
}
|
||||
@@ -1363,7 +1488,7 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
if (nv_drm_plane_state->tmo_drm_lut_surface != NULL) {
|
||||
req_config->config.tmo.enabled = NV_TRUE;
|
||||
req_config->config.tmo.lutSurface =
|
||||
nv_drm_plane_state->tmo_drm_lut_surface->nvkms_surface;
|
||||
nv_drm_plane_state->tmo_drm_lut_surface->base.nvkms_surface;
|
||||
req_config->config.tmo.offset = 0;
|
||||
req_config->config.tmo.vssSegments =
|
||||
nv_drm_plane_state->tmo_drm_lut_surface->properties.vssSegments;
|
||||
@@ -1870,7 +1995,7 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
|
||||
nv_plane_state->degamma_drm_lut_surface =
|
||||
nv_old_plane_state->degamma_drm_lut_surface;
|
||||
if (nv_plane_state->degamma_drm_lut_surface) {
|
||||
kref_get(&nv_plane_state->degamma_drm_lut_surface->refcount);
|
||||
kref_get(&nv_plane_state->degamma_drm_lut_surface->base.refcount);
|
||||
}
|
||||
|
||||
nv_plane_state->tmo_lut = nv_old_plane_state->tmo_lut;
|
||||
@@ -1881,7 +2006,7 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
|
||||
nv_plane_state->tmo_drm_lut_surface =
|
||||
nv_old_plane_state->tmo_drm_lut_surface;
|
||||
if (nv_plane_state->tmo_drm_lut_surface) {
|
||||
kref_get(&nv_plane_state->tmo_drm_lut_surface->refcount);
|
||||
kref_get(&nv_plane_state->tmo_drm_lut_surface->base.refcount);
|
||||
}
|
||||
|
||||
return &nv_plane_state->base;
|
||||
@@ -1909,13 +2034,13 @@ static inline void __nv_drm_plane_atomic_destroy_state(
|
||||
|
||||
nv_drm_property_blob_put(nv_drm_plane_state->degamma_lut);
|
||||
if (nv_drm_plane_state->degamma_drm_lut_surface != NULL) {
|
||||
kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->refcount,
|
||||
kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->base.refcount,
|
||||
free_drm_lut_surface);
|
||||
}
|
||||
|
||||
nv_drm_property_blob_put(nv_drm_plane_state->tmo_lut);
|
||||
if (nv_drm_plane_state->tmo_drm_lut_surface != NULL) {
|
||||
kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->refcount,
|
||||
kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->base.refcount,
|
||||
free_drm_lut_surface);
|
||||
}
|
||||
}
|
||||
@@ -2113,7 +2238,7 @@ nv_drm_atomic_crtc_duplicate_state(struct drm_crtc *crtc)
|
||||
}
|
||||
nv_state->regamma_divisor = nv_old_state->regamma_divisor;
|
||||
if (nv_state->regamma_drm_lut_surface) {
|
||||
kref_get(&nv_state->regamma_drm_lut_surface->refcount);
|
||||
kref_get(&nv_state->regamma_drm_lut_surface->base.refcount);
|
||||
}
|
||||
nv_state->regamma_changed = false;
|
||||
|
||||
@@ -2142,7 +2267,7 @@ static void nv_drm_atomic_crtc_destroy_state(struct drm_crtc *crtc,
|
||||
|
||||
nv_drm_property_blob_put(nv_state->regamma_lut);
|
||||
if (nv_state->regamma_drm_lut_surface != NULL) {
|
||||
kref_put(&nv_state->regamma_drm_lut_surface->refcount,
|
||||
kref_put(&nv_state->regamma_drm_lut_surface->base.refcount,
|
||||
free_drm_lut_surface);
|
||||
}
|
||||
|
||||
@@ -2386,7 +2511,7 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
|
||||
|
||||
if (nv_crtc_state->regamma_changed) {
|
||||
if (nv_crtc_state->regamma_drm_lut_surface != NULL) {
|
||||
kref_put(&nv_crtc_state->regamma_drm_lut_surface->refcount,
|
||||
kref_put(&nv_crtc_state->regamma_drm_lut_surface->base.refcount,
|
||||
free_drm_lut_surface);
|
||||
nv_crtc_state->regamma_drm_lut_surface = NULL;
|
||||
}
|
||||
@@ -2417,7 +2542,7 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
|
||||
if (nv_crtc_state->regamma_drm_lut_surface != NULL) {
|
||||
req_config->modeSetConfig.olut.enabled = NV_TRUE;
|
||||
req_config->modeSetConfig.olut.lutSurface =
|
||||
nv_crtc_state->regamma_drm_lut_surface->nvkms_surface;
|
||||
nv_crtc_state->regamma_drm_lut_surface->base.nvkms_surface;
|
||||
req_config->modeSetConfig.olut.offset = 0;
|
||||
req_config->modeSetConfig.olut.vssSegments =
|
||||
nv_crtc_state->regamma_drm_lut_surface->properties.vssSegments;
|
||||
@@ -2521,7 +2646,7 @@ static void nv_drm_plane_install_properties(
|
||||
if (nv_dev->nv_input_colorspace_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_input_colorspace_property,
|
||||
NVKMS_INPUT_COLORSPACE_NONE);
|
||||
NV_DRM_INPUT_COLOR_SPACE_NONE);
|
||||
}
|
||||
|
||||
if (supportsICtCp) {
|
||||
@@ -2531,17 +2656,14 @@ static void nv_drm_plane_install_properties(
|
||||
&plane->base, nv_dev->nv_hdr_output_metadata_property, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* The old DRM_OBJECT_MAX_PROPERTY limit of 24 is too small to
|
||||
* accomodate all of the properties for the ICtCp pipeline.
|
||||
*
|
||||
* Commit 1e13c5644c44 ("drm/drm_mode_object: increase max objects to
|
||||
* accommodate new color props") in Linux v6.8 increased the limit to
|
||||
* 64. To be safe, require this before attaching any properties for the
|
||||
* ICtCp pipeline.
|
||||
*/
|
||||
if (DRM_OBJECT_MAX_PROPERTY >= 64) {
|
||||
/*
|
||||
* Per-plane HDR properties get us dangerously close to the 24 property
|
||||
* limit on kernels that don't support NV_DRM_USE_EXTENDED_PROPERTIES.
|
||||
*/
|
||||
if (NV_DRM_USE_EXTENDED_PROPERTIES) {
|
||||
if (supportsICtCp) {
|
||||
if (nv_dev->nv_plane_lms_ctm_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_lms_ctm_property, 0);
|
||||
@@ -2568,36 +2690,36 @@ static void nv_drm_plane_install_properties(
|
||||
NVKMS_LUT_ARRAY_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nv_dev->nv_plane_blend_ctm_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_blend_ctm_property, 0);
|
||||
}
|
||||
if (nv_dev->nv_plane_blend_ctm_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_blend_ctm_property, 0);
|
||||
}
|
||||
|
||||
if (nv_plane->ilut_caps.supported) {
|
||||
if (nv_plane->ilut_caps.vssSupport == NVKMS_LUT_VSS_SUPPORTED) {
|
||||
if (nv_dev->nv_plane_degamma_tf_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_degamma_tf_property,
|
||||
NV_DRM_TRANSFER_FUNCTION_DEFAULT);
|
||||
if (nv_plane->ilut_caps.supported) {
|
||||
if (nv_plane->ilut_caps.vssSupport == NVKMS_LUT_VSS_SUPPORTED) {
|
||||
if (nv_dev->nv_plane_degamma_tf_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_degamma_tf_property,
|
||||
NV_DRM_TRANSFER_FUNCTION_DEFAULT);
|
||||
}
|
||||
if (nv_dev->nv_plane_degamma_multiplier_property) {
|
||||
/* Default to 1 in S31.32 Sign-Magnitude Format */
|
||||
nv_plane_state->degamma_multiplier = ((uint64_t) 1) << 32;
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_degamma_multiplier_property,
|
||||
nv_plane_state->degamma_multiplier);
|
||||
}
|
||||
}
|
||||
if (nv_dev->nv_plane_degamma_multiplier_property) {
|
||||
/* Default to 1 in S31.32 Sign-Magnitude Format */
|
||||
nv_plane_state->degamma_multiplier = ((uint64_t) 1) << 32;
|
||||
if (nv_dev->nv_plane_degamma_lut_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_degamma_multiplier_property,
|
||||
nv_plane_state->degamma_multiplier);
|
||||
&plane->base, nv_dev->nv_plane_degamma_lut_property, 0);
|
||||
}
|
||||
if (nv_dev->nv_plane_degamma_lut_size_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_degamma_lut_size_property,
|
||||
NVKMS_LUT_ARRAY_SIZE);
|
||||
}
|
||||
}
|
||||
if (nv_dev->nv_plane_degamma_lut_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_degamma_lut_property, 0);
|
||||
}
|
||||
if (nv_dev->nv_plane_degamma_lut_size_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_plane_degamma_lut_size_property,
|
||||
NVKMS_LUT_ARRAY_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2776,6 +2898,29 @@ nv_drm_plane_create(struct drm_device *dev,
|
||||
goto failed_plane_init;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
|
||||
if (pResInfo->caps.supportsInputColorSpace &&
|
||||
pResInfo->caps.supportsInputColorRange) {
|
||||
|
||||
nv_plane->supportsColorProperties = true;
|
||||
|
||||
drm_plane_create_color_properties(
|
||||
plane,
|
||||
NVBIT(DRM_COLOR_YCBCR_BT601) |
|
||||
NVBIT(DRM_COLOR_YCBCR_BT709) |
|
||||
NVBIT(DRM_COLOR_YCBCR_BT2020),
|
||||
NVBIT(DRM_COLOR_YCBCR_FULL_RANGE) |
|
||||
NVBIT(DRM_COLOR_YCBCR_LIMITED_RANGE),
|
||||
DRM_COLOR_YCBCR_BT709,
|
||||
DRM_COLOR_YCBCR_FULL_RANGE
|
||||
);
|
||||
} else {
|
||||
nv_plane->supportsColorProperties = false;
|
||||
}
|
||||
#else
|
||||
nv_plane->supportsColorProperties = false;
|
||||
#endif
|
||||
|
||||
drm_plane_helper_add(plane, &nv_plane_helper_funcs);
|
||||
|
||||
if (plane_type != DRM_PLANE_TYPE_CURSOR) {
|
||||
|
||||
@@ -191,6 +191,13 @@ struct nv_drm_plane {
|
||||
*/
|
||||
uint32_t layer_idx;
|
||||
|
||||
/**
|
||||
* @supportsColorProperties
|
||||
*
|
||||
* If true, supports the COLOR_ENCODING and COLOR_RANGE properties.
|
||||
*/
|
||||
bool supportsColorProperties;
|
||||
|
||||
struct NvKmsLUTCaps ilut_caps;
|
||||
struct NvKmsLUTCaps tmo_caps;
|
||||
};
|
||||
@@ -203,10 +210,23 @@ static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
|
||||
return container_of(plane, struct nv_drm_plane, base);
|
||||
}
|
||||
|
||||
struct nv_drm_lut_surface {
|
||||
struct nv_drm_nvkms_surface {
|
||||
struct NvKmsKapiDevice *pDevice;
|
||||
struct NvKmsKapiMemory *nvkms_memory;
|
||||
struct NvKmsKapiSurface *nvkms_surface;
|
||||
void *buffer;
|
||||
struct kref refcount;
|
||||
};
|
||||
|
||||
struct nv_drm_nvkms_surface_params {
|
||||
NvU32 width;
|
||||
NvU32 height;
|
||||
size_t surface_size;
|
||||
enum NvKmsSurfaceMemoryFormat format;
|
||||
};
|
||||
|
||||
struct nv_drm_lut_surface {
|
||||
struct nv_drm_nvkms_surface base;
|
||||
struct {
|
||||
NvU32 vssSegments;
|
||||
enum NvKmsLUTVssType vssType;
|
||||
@@ -215,14 +235,12 @@ struct nv_drm_lut_surface {
|
||||
enum NvKmsLUTFormat entryFormat;
|
||||
|
||||
} properties;
|
||||
void *buffer;
|
||||
struct kref refcount;
|
||||
};
|
||||
|
||||
struct nv_drm_plane_state {
|
||||
struct drm_plane_state base;
|
||||
s32 __user *fd_user_ptr;
|
||||
enum NvKmsInputColorSpace input_colorspace;
|
||||
enum nv_drm_input_color_space input_colorspace;
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct drm_property_blob *hdr_output_metadata;
|
||||
#endif
|
||||
|
||||
@@ -35,6 +35,8 @@
|
||||
#include "nvidia-drm-gem-nvkms-memory.h"
|
||||
#include "nvidia-drm-gem-user-memory.h"
|
||||
#include "nvidia-drm-gem-dma-buf.h"
|
||||
#include "nvidia-drm-utils.h"
|
||||
#include "nv_dpy_id.h"
|
||||
|
||||
#if defined(NV_DRM_AVAILABLE)
|
||||
|
||||
@@ -90,6 +92,7 @@
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
/*
|
||||
* Commit fcd70cd36b9b ("drm: Split out drm_probe_helper.h")
|
||||
@@ -120,15 +123,15 @@ static int nv_drm_revoke_sub_ownership(struct drm_device *dev);
|
||||
|
||||
static struct nv_drm_device *dev_list = NULL;
|
||||
|
||||
static char* nv_get_input_colorspace_name(
|
||||
enum NvKmsInputColorSpace colorSpace)
|
||||
static const char* nv_get_input_colorspace_name(
|
||||
enum nv_drm_input_color_space colorSpace)
|
||||
{
|
||||
switch (colorSpace) {
|
||||
case NVKMS_INPUT_COLORSPACE_NONE:
|
||||
case NV_DRM_INPUT_COLOR_SPACE_NONE:
|
||||
return "None";
|
||||
case NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR:
|
||||
case NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR:
|
||||
return "scRGB Linear FP16";
|
||||
case NVKMS_INPUT_COLORSPACE_BT2100_PQ:
|
||||
case NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ:
|
||||
return "BT.2100 PQ";
|
||||
default:
|
||||
/* We shoudn't hit this */
|
||||
@@ -284,6 +287,123 @@ done:
|
||||
mutex_unlock(&nv_dev->lock);
|
||||
}
|
||||
|
||||
struct nv_drm_mst_display_info {
|
||||
NvKmsKapiDisplay handle;
|
||||
NvBool isDpMST;
|
||||
char dpAddress[NVKMS_DP_ADDRESS_STRING_LENGTH];
|
||||
};
|
||||
|
||||
/*
|
||||
* Helper function to get DpMST display info.
|
||||
* dpMSTDisplayInfos is allocated dynamically,
|
||||
* so it needs to be freed after finishing the query.
|
||||
*/
|
||||
static int nv_drm_get_mst_display_infos
|
||||
(
|
||||
struct nv_drm_device *nv_dev,
|
||||
NvKmsKapiDisplay hDisplay,
|
||||
struct nv_drm_mst_display_info **dpMSTDisplayInfos,
|
||||
NvU32 *nDynamicDisplays
|
||||
)
|
||||
{
|
||||
struct NvKmsKapiStaticDisplayInfo *displayInfo = NULL;
|
||||
struct NvKmsKapiStaticDisplayInfo *dynamicDisplayInfo = NULL;
|
||||
struct NvKmsKapiConnectorInfo *connectorInfo = NULL;
|
||||
struct nv_drm_mst_display_info *displayInfos = NULL;
|
||||
NvU32 i = 0;
|
||||
int ret = 0;
|
||||
NVDpyId dpyId;
|
||||
*nDynamicDisplays = 0;
|
||||
|
||||
/* Query NvKmsKapiStaticDisplayInfo and NvKmsKapiConnectorInfo */
|
||||
|
||||
if ((displayInfo = nv_drm_calloc(1, sizeof(*displayInfo))) == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if ((dynamicDisplayInfo = nv_drm_calloc(1, sizeof(*dynamicDisplayInfo))) == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!nvKms->getStaticDisplayInfo(nv_dev->pDevice, hDisplay, displayInfo)) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
connectorInfo = nvkms_get_connector_info(nv_dev->pDevice,
|
||||
displayInfo->connectorHandle);
|
||||
|
||||
if (IS_ERR(connectorInfo)) {
|
||||
ret = PTR_ERR(connectorInfo);
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
||||
*nDynamicDisplays = nvCountDpyIdsInDpyIdList(connectorInfo->dynamicDpyIdList);
|
||||
|
||||
if (*nDynamicDisplays == 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
if ((displayInfos = nv_drm_calloc(*nDynamicDisplays, sizeof(*displayInfos))) == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
|
||||
FOR_ALL_DPY_IDS(dpyId, connectorInfo->dynamicDpyIdList) {
|
||||
if (!nvKms->getStaticDisplayInfo(nv_dev->pDevice,
|
||||
nvDpyIdToNvU32(dpyId),
|
||||
dynamicDisplayInfo)) {
|
||||
ret = -EINVAL;
|
||||
nv_drm_free(displayInfos);
|
||||
goto done;
|
||||
}
|
||||
|
||||
displayInfos[i].handle = dynamicDisplayInfo->handle;
|
||||
displayInfos[i].isDpMST = dynamicDisplayInfo->isDpMST;
|
||||
memcpy(displayInfos[i].dpAddress, dynamicDisplayInfo->dpAddress, sizeof(dynamicDisplayInfo->dpAddress));
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
*dpMSTDisplayInfos = displayInfos;
|
||||
|
||||
done:
|
||||
|
||||
nv_drm_free(displayInfo);
|
||||
|
||||
nv_drm_free(dynamicDisplayInfo);
|
||||
|
||||
nv_drm_free(connectorInfo);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nv_drm_disp_cmp (const void *l, const void *r)
|
||||
{
|
||||
struct nv_drm_mst_display_info *l_info = (struct nv_drm_mst_display_info *)l;
|
||||
struct nv_drm_mst_display_info *r_info = (struct nv_drm_mst_display_info *)r;
|
||||
|
||||
return strcmp(l_info->dpAddress, r_info->dpAddress);
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function to sort the dpAddress in terms of string.
|
||||
* This function is to create DRM connectors ID order deterministically.
|
||||
* It's not numerically.
|
||||
*/
|
||||
static void nv_drm_sort_dynamic_displays_by_dp_addr
|
||||
(
|
||||
struct nv_drm_mst_display_info *infos,
|
||||
int nDynamicDisplays
|
||||
)
|
||||
{
|
||||
sort(infos, nDynamicDisplays, sizeof(*infos), nv_drm_disp_cmp, NULL);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Helper function to initialize drm_device::mode_config from
|
||||
* NvKmsKapiDevice's resource information.
|
||||
@@ -365,9 +485,11 @@ static void nv_drm_enumerate_encoders_and_connectors
|
||||
nv_dev,
|
||||
"Failed to enumurate NvKmsKapiDisplay handles");
|
||||
} else {
|
||||
NvU32 i;
|
||||
NvU32 i, j;
|
||||
NvU32 nDynamicDisplays = 0;
|
||||
|
||||
for (i = 0; i < nDisplays; i++) {
|
||||
struct nv_drm_mst_display_info *displayInfos = NULL;
|
||||
struct drm_encoder *encoder =
|
||||
nv_drm_add_encoder(dev, hDisplays[i]);
|
||||
|
||||
@@ -377,6 +499,34 @@ static void nv_drm_enumerate_encoders_and_connectors
|
||||
"Failed to add connector for NvKmsKapiDisplay 0x%08x",
|
||||
hDisplays[i]);
|
||||
}
|
||||
|
||||
if (nv_drm_get_mst_display_infos(nv_dev, hDisplays[i],
|
||||
&displayInfos, &nDynamicDisplays)) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Failed to get dynamic displays");
|
||||
} else if (nDynamicDisplays) {
|
||||
nv_drm_sort_dynamic_displays_by_dp_addr(displayInfos, nDynamicDisplays);
|
||||
|
||||
for (j = 0; j < nDynamicDisplays; j++) {
|
||||
if (displayInfos[j].isDpMST) {
|
||||
struct drm_encoder *mst_encoder =
|
||||
nv_drm_add_encoder(dev, displayInfos[j].handle);
|
||||
|
||||
NV_DRM_DEV_DEBUG_DRIVER(nv_dev, "found DP MST port display handle %u",
|
||||
displayInfos[j].handle);
|
||||
|
||||
if (IS_ERR(mst_encoder)) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Failed to add connector for NvKmsKapiDisplay 0x%08x",
|
||||
displayInfos[j].handle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nv_drm_free(displayInfos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -602,6 +752,7 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
|
||||
memset(&allocateDeviceParams, 0, sizeof(allocateDeviceParams));
|
||||
|
||||
allocateDeviceParams.gpuId = nv_dev->gpu_info.gpu_id;
|
||||
allocateDeviceParams.migDevice = nv_dev->gpu_mig_device;
|
||||
|
||||
allocateDeviceParams.privateData = nv_dev;
|
||||
allocateDeviceParams.eventCallback = nv_drm_event_callback;
|
||||
@@ -672,6 +823,9 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
|
||||
|
||||
nv_dev->requiresVrrSemaphores = resInfo.caps.requiresVrrSemaphores;
|
||||
|
||||
nv_dev->vtFbBaseAddress = resInfo.vtFbBaseAddress;
|
||||
nv_dev->vtFbSize = resInfo.vtFbSize;
|
||||
|
||||
#if defined(NV_DRM_FORMAT_MODIFIERS_PRESENT)
|
||||
gen = nv_dev->pageKindGeneration;
|
||||
kind = nv_dev->genericPageKind;
|
||||
@@ -855,6 +1009,62 @@ static void nv_drm_master_set(struct drm_device *dev,
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
int nv_drm_reset_input_colorspace(struct drm_device *dev)
|
||||
{
|
||||
struct drm_atomic_state *state;
|
||||
struct drm_plane_state *plane_state;
|
||||
struct drm_plane *plane;
|
||||
struct nv_drm_plane_state *nv_drm_plane_state;
|
||||
struct drm_modeset_acquire_ctx ctx;
|
||||
int ret = 0;
|
||||
bool do_reset = false;
|
||||
NvU32 flags = 0;
|
||||
|
||||
state = drm_atomic_state_alloc(dev);
|
||||
if (!state)
|
||||
return -ENOMEM;
|
||||
|
||||
#if defined(DRM_MODESET_ACQUIRE_INTERRUPTIBLE)
|
||||
flags |= DRM_MODESET_ACQUIRE_INTERRUPTIBLE;
|
||||
#endif
|
||||
drm_modeset_acquire_init(&ctx, flags);
|
||||
state->acquire_ctx = &ctx;
|
||||
|
||||
nv_drm_for_each_plane(plane, dev) {
|
||||
plane_state = drm_atomic_get_plane_state(state, plane);
|
||||
if (IS_ERR(plane_state)) {
|
||||
ret = PTR_ERR(plane_state);
|
||||
goto out;
|
||||
}
|
||||
|
||||
nv_drm_plane_state = to_nv_drm_plane_state(plane_state);
|
||||
if (nv_drm_plane_state) {
|
||||
if (nv_drm_plane_state->input_colorspace != NV_DRM_INPUT_COLOR_SPACE_NONE) {
|
||||
nv_drm_plane_state->input_colorspace = NV_DRM_INPUT_COLOR_SPACE_NONE;
|
||||
do_reset = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (do_reset) {
|
||||
ret = drm_atomic_commit(state);
|
||||
}
|
||||
|
||||
out:
|
||||
#if defined(NV_DRM_ATOMIC_STATE_REF_COUNTING_PRESENT)
|
||||
drm_atomic_state_put(state);
|
||||
#else
|
||||
// In case of success, drm_atomic_commit() takes care to cleanup and free state.
|
||||
if (ret != 0) {
|
||||
drm_atomic_state_free(state);
|
||||
}
|
||||
#endif
|
||||
drm_modeset_drop_locks(&ctx);
|
||||
drm_modeset_acquire_fini(&ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_MASTER_DROP_HAS_FROM_RELEASE_ARG)
|
||||
static
|
||||
@@ -898,6 +1108,12 @@ void nv_drm_master_drop(struct drm_device *dev, struct drm_file *file_priv)
|
||||
drm_modeset_unlock_all(dev);
|
||||
|
||||
nvKms->releaseOwnership(nv_dev->pDevice);
|
||||
} else {
|
||||
int err = nv_drm_reset_input_colorspace(dev);
|
||||
if (err != 0) {
|
||||
NV_DRM_DEV_LOG_WARN(nv_dev,
|
||||
"nv_drm_reset_input_colorspace failed with error code: %d !", err);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
@@ -935,6 +1151,7 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
|
||||
}
|
||||
|
||||
params->gpu_id = nv_dev->gpu_info.gpu_id;
|
||||
params->mig_device = nv_dev->gpu_mig_device;
|
||||
params->primary_index = dev->primary->index;
|
||||
params->supports_alloc = false;
|
||||
params->generic_page_kind = 0;
|
||||
@@ -1725,7 +1942,7 @@ static const struct file_operations nv_drm_fops = {
|
||||
|
||||
.llseek = noop_llseek,
|
||||
|
||||
#if defined(NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT)
|
||||
#if defined(FOP_UNSIGNED_OFFSET)
|
||||
.fop_flags = FOP_UNSIGNED_OFFSET,
|
||||
#endif
|
||||
};
|
||||
@@ -1967,16 +2184,16 @@ void nv_drm_update_drm_driver_features(void)
|
||||
/*
|
||||
* Helper function for allocate/register DRM device for given NVIDIA GPU ID.
|
||||
*/
|
||||
void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
void nv_drm_register_drm_device(const struct NvKmsKapiGpuInfo *gpu_info)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = NULL;
|
||||
struct drm_device *dev = NULL;
|
||||
struct device *device = gpu_info->os_device_ptr;
|
||||
struct device *device = gpu_info->gpuInfo.os_device_ptr;
|
||||
bool bus_is_pci;
|
||||
|
||||
DRM_DEBUG(
|
||||
"Registering device for NVIDIA GPU ID 0x08%x",
|
||||
gpu_info->gpu_id);
|
||||
gpu_info->gpuInfo.gpu_id);
|
||||
|
||||
/* Allocate NVIDIA-DRM device */
|
||||
|
||||
@@ -1988,7 +2205,8 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
return;
|
||||
}
|
||||
|
||||
nv_dev->gpu_info = *gpu_info;
|
||||
nv_dev->gpu_info = gpu_info->gpuInfo;
|
||||
nv_dev->gpu_mig_device = gpu_info->migDevice;
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
mutex_init(&nv_dev->lock);
|
||||
@@ -2045,9 +2263,30 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
aperture_remove_conflicting_pci_devices(pdev, nv_drm_driver.name);
|
||||
#endif
|
||||
nvKms->framebufferConsoleDisabled(nv_dev->pDevice);
|
||||
} else {
|
||||
resource_size_t base = (resource_size_t) nv_dev->vtFbBaseAddress;
|
||||
resource_size_t size = (resource_size_t) nv_dev->vtFbSize;
|
||||
|
||||
if (base > 0 && size > 0) {
|
||||
#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_PRESENT)
|
||||
|
||||
#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_DRIVER_ARG)
|
||||
drm_aperture_remove_conflicting_framebuffers(base, size, false, &nv_drm_driver);
|
||||
#elif defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_NO_PRIMARY_ARG)
|
||||
drm_aperture_remove_conflicting_framebuffers(base, size, &nv_drm_driver);
|
||||
#else
|
||||
drm_aperture_remove_conflicting_framebuffers(base, size, false, nv_drm_driver.name);
|
||||
#endif
|
||||
|
||||
#elif defined(NV_APERTURE_REMOVE_CONFLICTING_DEVICES_PRESENT)
|
||||
aperture_remove_conflicting_devices(base, size, nv_drm_driver.name);
|
||||
#endif
|
||||
} else {
|
||||
NV_DRM_DEV_LOG_INFO(nv_dev, "Invalid framebuffer console info");
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CLIENT_AVAILABLE)
|
||||
drm_client_setup(dev, NULL);
|
||||
drm_client_setup(dev, NULL);
|
||||
#elif defined(NV_DRM_FBDEV_TTM_AVAILABLE)
|
||||
drm_fbdev_ttm_setup(dev, 32);
|
||||
#elif defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
@@ -2078,7 +2317,7 @@ failed_drm_alloc:
|
||||
#if defined(NV_LINUX)
|
||||
int nv_drm_probe_devices(void)
|
||||
{
|
||||
nv_gpu_info_t *gpu_info = NULL;
|
||||
struct NvKmsKapiGpuInfo *gpu_info = NULL;
|
||||
NvU32 gpu_count = 0;
|
||||
NvU32 i;
|
||||
|
||||
|
||||
@@ -27,13 +27,15 @@
|
||||
|
||||
#if defined(NV_DRM_AVAILABLE)
|
||||
|
||||
struct NvKmsKapiGpuInfo;
|
||||
|
||||
int nv_drm_probe_devices(void);
|
||||
|
||||
void nv_drm_remove_devices(void);
|
||||
|
||||
void nv_drm_suspend_resume(NvBool suspend);
|
||||
|
||||
void nv_drm_register_drm_device(const nv_gpu_info_t *);
|
||||
void nv_drm_register_drm_device(const struct NvKmsKapiGpuInfo *);
|
||||
|
||||
void nv_drm_update_drm_driver_features(void);
|
||||
|
||||
|
||||
@@ -319,7 +319,7 @@ void nv_drm_handle_dynamic_display_connected(struct nv_drm_device *nv_dev,
|
||||
nv_encoder = get_nv_encoder_from_nvkms_display(dev, hDisplay);
|
||||
|
||||
if (nv_encoder != NULL) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
NV_DRM_DEV_LOG_INFO(
|
||||
nv_dev,
|
||||
"Encoder with NvKmsKapiDisplay 0x%08x already exists.",
|
||||
hDisplay);
|
||||
|
||||
@@ -202,6 +202,43 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
|
||||
params.explicit_layout = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX work around an invalid pitch assumption in DRM.
|
||||
*
|
||||
* The smallest pitch the display hardware allows is 256.
|
||||
*
|
||||
* If a DRM client allocates a 32x32 cursor surface through
|
||||
* DRM_IOCTL_MODE_CREATE_DUMB, we'll correctly round the pitch to 256:
|
||||
*
|
||||
* pitch = round(32width * 4Bpp, 256) = 256
|
||||
*
|
||||
* and then allocate an 8k surface:
|
||||
*
|
||||
* size = pitch * 32height = 8196
|
||||
*
|
||||
* and report the rounded pitch and size back to the client through the
|
||||
* struct drm_mode_create_dumb ioctl params.
|
||||
*
|
||||
* But when the DRM client passes that buffer object handle to
|
||||
* DRM_IOCTL_MODE_CURSOR, the client has no way to specify the pitch. This
|
||||
* path in drm:
|
||||
*
|
||||
* DRM_IOCTL_MODE_CURSOR
|
||||
* drm_mode_cursor_ioctl()
|
||||
* drm_mode_cursor_common()
|
||||
* drm_mode_cursor_universal()
|
||||
*
|
||||
* will implicitly create a framebuffer from the buffer object, and compute
|
||||
* the pitch as width x 32 (without aligning to our minimum pitch).
|
||||
*
|
||||
* Intercept this case and force the pitch back to 256.
|
||||
*/
|
||||
if ((params.width == 32) &&
|
||||
(params.height == 32) &&
|
||||
(params.planes[0].pitch == 128)) {
|
||||
params.planes[0].pitch = 256;
|
||||
}
|
||||
|
||||
/* Create NvKmsKapiSurface */
|
||||
|
||||
nv_fb->pSurface = nvKms->createSurface(nv_dev->pDevice, ¶ms);
|
||||
|
||||
@@ -166,4 +166,37 @@ uint32_t *nv_drm_format_array_alloc(
|
||||
return array;
|
||||
}
|
||||
|
||||
bool nv_drm_format_is_yuv(u32 format)
|
||||
{
|
||||
#if defined(NV_DRM_FORMAT_INFO_HAS_IS_YUV)
|
||||
const struct drm_format_info *format_info = drm_format_info(format);
|
||||
return (format_info != NULL) && format_info->is_yuv;
|
||||
#else
|
||||
switch (format) {
|
||||
case DRM_FORMAT_YUYV:
|
||||
case DRM_FORMAT_UYVY:
|
||||
|
||||
case DRM_FORMAT_NV24:
|
||||
case DRM_FORMAT_NV42:
|
||||
case DRM_FORMAT_NV16:
|
||||
case DRM_FORMAT_NV61:
|
||||
case DRM_FORMAT_NV12:
|
||||
case DRM_FORMAT_NV21:
|
||||
|
||||
#if defined(DRM_FORMAT_P210)
|
||||
case DRM_FORMAT_P210:
|
||||
#endif
|
||||
#if defined(DRM_FORMAT_P010)
|
||||
case DRM_FORMAT_P010:
|
||||
#endif
|
||||
#if defined(DRM_FORMAT_P012)
|
||||
case DRM_FORMAT_P012:
|
||||
#endif
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -38,6 +38,8 @@ uint32_t *nv_drm_format_array_alloc(
|
||||
unsigned int *count,
|
||||
const long unsigned int nvkms_format_mask);
|
||||
|
||||
bool nv_drm_format_is_yuv(u32 format);
|
||||
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
|
||||
#endif /* __NVIDIA_DRM_FORMAT_H__ */
|
||||
|
||||
@@ -308,12 +308,12 @@ static int __nv_drm_nvkms_gem_obj_init(
|
||||
nv_nvkms_memory->pWriteCombinedIORemapAddress = NULL;
|
||||
nv_nvkms_memory->physically_mapped = false;
|
||||
|
||||
if (!nvKms->getMemoryPages(nv_dev->pDevice,
|
||||
if (!nvKms->isVidmem(pMemory) &&
|
||||
!nvKms->getMemoryPages(nv_dev->pDevice,
|
||||
pMemory,
|
||||
&pages,
|
||||
&numPages) &&
|
||||
!nvKms->isVidmem(pMemory)) {
|
||||
/* GetMemoryPages may fail for vidmem allocations,
|
||||
&numPages)) {
|
||||
/* GetMemoryPages will fail for vidmem allocations,
|
||||
* but it should not fail for sysmem allocations. */
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev,
|
||||
"Failed to get memory pages for NvKmsKapiMemory 0x%p",
|
||||
|
||||
@@ -69,6 +69,13 @@
|
||||
|
||||
#endif //NV_DRM_ROTATION_AVAILABLE
|
||||
|
||||
/*
|
||||
* Commit 1e13c5644c44 ("drm/drm_mode_object: increase max objects to
|
||||
* accommodate new color props") in Linux v6.8 increased the pre-object
|
||||
* property limit to from 24 to 64.
|
||||
*/
|
||||
#define NV_DRM_USE_EXTENDED_PROPERTIES (DRM_OBJECT_MAX_PROPERTY >= 64)
|
||||
|
||||
/*
|
||||
* drm_dev_put() is added by commit 9a96f55034e41b4e002b767e9218d55f03bdff7d
|
||||
* (2017-09-26) and drm_dev_unref() is removed by
|
||||
|
||||
@@ -182,6 +182,7 @@ struct drm_nvidia_gem_import_userspace_memory_params {
|
||||
|
||||
struct drm_nvidia_get_dev_info_params {
|
||||
uint32_t gpu_id; /* OUT */
|
||||
uint32_t mig_device; /* OUT */
|
||||
uint32_t primary_index; /* OUT; the "card%d" value */
|
||||
|
||||
uint32_t supports_alloc; /* OUT */
|
||||
|
||||
@@ -677,6 +677,33 @@ int nv_drm_atomic_commit(struct drm_device *dev,
|
||||
"Flip event timeout on head %u", nv_crtc->head);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
|
||||
/*
|
||||
* If the legacy LUT needs to be updated, ensure that the previous LUT
|
||||
* update is complete first.
|
||||
*/
|
||||
if (crtc_state->color_mgmt_changed) {
|
||||
NvBool complete = nvKms->checkLutNotifier(nv_dev->pDevice,
|
||||
nv_crtc->head,
|
||||
!nonblock /* waitForCompletion */);
|
||||
|
||||
/* If checking the LUT notifier failed, assume no LUT notifier is set. */
|
||||
if (!complete) {
|
||||
if (nonblock) {
|
||||
return -EBUSY;
|
||||
} else {
|
||||
/*
|
||||
* checkLutNotifier should wait on the notifier in this
|
||||
* case, so we should only get here if the wait timed out.
|
||||
*/
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"LUT notifier timeout on head %u", nv_crtc->head);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_HELPER_SWAP_STATE_HAS_STALL_ARG)
|
||||
@@ -803,6 +830,19 @@ int nv_drm_atomic_commit(struct drm_device *dev,
|
||||
__nv_drm_handle_flip_event(nv_crtc);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
|
||||
if (crtc_state->color_mgmt_changed) {
|
||||
NvBool complete = nvKms->checkLutNotifier(nv_dev->pDevice,
|
||||
nv_crtc->head,
|
||||
true /* waitForCompletion */);
|
||||
if (!complete) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"LUT notifier timeout on head %u", nv_crtc->head);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -58,16 +58,6 @@ typedef struct nv_timer nv_drm_timer;
|
||||
#error "Need to define kernel timer callback primitives for this OS"
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_FBDEV_GENERIC_AVAILABLE
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_TTM_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_FBDEV_TTM_AVAILABLE
|
||||
#endif
|
||||
|
||||
struct page;
|
||||
|
||||
/* Set to true when the atomic modeset feature is enabled. */
|
||||
|
||||
@@ -85,8 +85,15 @@
|
||||
DRM_DEBUG_DRIVER("[GPU ID 0x%08x] " __fmt, \
|
||||
__dev->gpu_info.gpu_id, ##__VA_ARGS__)
|
||||
|
||||
enum nv_drm_input_color_space {
|
||||
NV_DRM_INPUT_COLOR_SPACE_NONE,
|
||||
NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR,
|
||||
NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ
|
||||
};
|
||||
|
||||
struct nv_drm_device {
|
||||
nv_gpu_info_t gpu_info;
|
||||
MIGDeviceId gpu_mig_device;
|
||||
|
||||
struct drm_device *dev;
|
||||
|
||||
@@ -182,6 +189,9 @@ struct nv_drm_device {
|
||||
struct drm_property *nv_crtc_regamma_divisor_property;
|
||||
|
||||
struct nv_drm_device *next;
|
||||
|
||||
NvU64 vtFbBaseAddress;
|
||||
NvU64 vtFbSize;
|
||||
};
|
||||
|
||||
static inline NvU32 nv_drm_next_display_semaphore(
|
||||
|
||||
@@ -65,6 +65,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_fence_set_error
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_devices
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_pci_devices
|
||||
@@ -74,6 +75,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_client_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_plane_create_color_properties
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_legacy_gamma_set
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_mixed
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pfn_to_pfn_t
|
||||
@@ -133,6 +135,8 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers_has_driver_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers_has_no_primary_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers_has_driver_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_syncobj_features_present
|
||||
@@ -140,8 +144,9 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_framebuffer_obj_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_ctm_3x4_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_lut
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_info_has_is_yuv
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_property_blob_put
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_prime_mmap
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_helper_funcs_mode_valid_has_const_mode_arg
|
||||
|
||||
@@ -102,6 +102,11 @@ MODULE_PARM_DESC(malloc_verbose, "Report information about malloc calls on modul
|
||||
static bool malloc_verbose = false;
|
||||
module_param_named(malloc_verbose, malloc_verbose, bool, 0400);
|
||||
|
||||
MODULE_PARM_DESC(conceal_vrr_caps,
|
||||
"Conceal all display VRR capabilities");
|
||||
static bool conceal_vrr_caps = false;
|
||||
module_param_named(conceal_vrr_caps, conceal_vrr_caps, bool, 0400);
|
||||
|
||||
/* Fail allocating the RM core channel for NVKMS using the i-th method (see
|
||||
* FailAllocCoreChannelMethod). Failures not using the i-th method are ignored. */
|
||||
MODULE_PARM_DESC(fail_alloc_core_channel, "Control testing for hardware core channel allocation failure");
|
||||
@@ -135,7 +140,12 @@ NvBool nvkms_test_fail_alloc_core_channel(
|
||||
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
|
||||
NvBool nvkms_conceal_vrr_caps(void)
|
||||
{
|
||||
return conceal_vrr_caps;
|
||||
}
|
||||
|
||||
NvBool nvkms_output_rounding_fix(void)
|
||||
{
|
||||
return output_rounding_fix;
|
||||
|
||||
@@ -110,6 +110,7 @@ enum FailAllocCoreChannelMethod {
|
||||
};
|
||||
|
||||
NvBool nvkms_test_fail_alloc_core_channel(enum FailAllocCoreChannelMethod method);
|
||||
NvBool nvkms_conceal_vrr_caps(void);
|
||||
NvBool nvkms_output_rounding_fix(void);
|
||||
NvBool nvkms_disable_hdmi_frl(void);
|
||||
NvBool nvkms_disable_vrr_memclk_switch(void);
|
||||
|
||||
@@ -52,7 +52,7 @@ nvidia-modeset-y += $(NVIDIA_MODESET_BINARY_OBJECT_O)
|
||||
# Define nvidia-modeset.ko-specific CFLAGS.
|
||||
#
|
||||
|
||||
NVIDIA_MODESET_CFLAGS += -I$(src)/nvidia-modeset
|
||||
NVIDIA_MODESET_CFLAGS += -I$(src)/nvidia-modeset -I$(src)/common/inc
|
||||
NVIDIA_MODESET_CFLAGS += -UDEBUG -U_DEBUG -DNDEBUG -DNV_BUILD_MODULE_INSTANCES=0
|
||||
|
||||
# Some Android kernels prohibit driver use of filesystem functions like
|
||||
|
||||
@@ -453,35 +453,19 @@ typedef struct nvidia_p2p_rsync_reg_info {
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Gets rsync (GEN-ID) register information associated with the supported
|
||||
* NPUs.
|
||||
*
|
||||
* The caller would use the returned information {GPU device, NPU device,
|
||||
* socket-id, cluster-id} to pick the optimal generation registers to issue
|
||||
* RSYNC (NVLink HW flush).
|
||||
*
|
||||
* The interface allocates structures to return the information, hence
|
||||
* nvidia_p2p_put_rsync_registers() must be called to free the structures.
|
||||
*
|
||||
* Note, cluster-id is hardcoded to zero as early system configurations would
|
||||
* only support cluster mode i.e. all devices would share the same cluster-id
|
||||
* (0). In the future, appropriate kernel support would be needed to query
|
||||
* cluster-ids.
|
||||
*
|
||||
* @param[out] reg_info
|
||||
* A pointer to the rsync reg info structure.
|
||||
* This interface is no longer supported and will always return an error. It
|
||||
* is left in place (for now) to allow third-party callers to build without
|
||||
* any errors.
|
||||
*
|
||||
* @Returns
|
||||
* 0 Upon successful completion. Otherwise, returns negative value.
|
||||
* -ENODEV
|
||||
*/
|
||||
int nvidia_p2p_get_rsync_registers(nvidia_p2p_rsync_reg_info_t **reg_info);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Frees the structures allocated by nvidia_p2p_get_rsync_registers().
|
||||
*
|
||||
* @param[in] reg_info
|
||||
* A pointer to the rsync reg info structure.
|
||||
* This interface is no longer supported. It is left in place (for now) to
|
||||
* allow third-party callers to build without any errors.
|
||||
*/
|
||||
void nvidia_p2p_put_rsync_registers(nvidia_p2p_rsync_reg_info_t *reg_info);
|
||||
|
||||
|
||||
@@ -1,51 +1,31 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2012-2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#ifndef _clc86f_h_
|
||||
#define _clc86f_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
/* class HOPPER_CHANNEL_GPFIFO */
|
||||
/*
|
||||
* Documentation for HOPPER_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
|
||||
* chapter "User Control Registers". It is documented as device NV_UDMA.
|
||||
* The GPFIFO format itself is also documented in dev_pbdma.ref,
|
||||
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
|
||||
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Note there is no .mfs file for this class.
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __gh100_clc86f_h__
|
||||
#define __gh100_clc86f_h__
|
||||
|
||||
#define HOPPER_CHANNEL_GPFIFO_A (0x0000C86F)
|
||||
|
||||
#define NVC86F_TYPEDEF HOPPER_CHANNELChannelGPFifoA
|
||||
|
||||
/* dma flow control data structure */
|
||||
typedef volatile struct Nvc86fControl_struct {
|
||||
NvU32 Ignored00[0x010]; /* 0000-003f*/
|
||||
NvU32 Put; /* put offset, read/write 0040-0043*/
|
||||
@@ -64,54 +44,7 @@ typedef volatile struct Nvc86fControl_struct {
|
||||
NvU32 Ignored05[0x5c];
|
||||
} Nvc86fControl, HopperAControlGPFifo;
|
||||
|
||||
/* fields and values */
|
||||
#define NVC86F_NUMBER_OF_SUBCHANNELS (8)
|
||||
#define NVC86F_SET_OBJECT (0x00000000)
|
||||
#define NVC86F_SET_OBJECT_NVCLASS 15:0
|
||||
#define NVC86F_SET_OBJECT_ENGINE 20:16
|
||||
#define NVC86F_SET_OBJECT_ENGINE_SW 0x0000001f
|
||||
#define NVC86F_ILLEGAL (0x00000004)
|
||||
#define NVC86F_ILLEGAL_HANDLE 31:0
|
||||
#define NVC86F_NOP (0x00000008)
|
||||
#define NVC86F_NOP_HANDLE 31:0
|
||||
#define NVC86F_SEMAPHOREA (0x00000010)
|
||||
#define NVC86F_SEMAPHOREA_OFFSET_UPPER 7:0
|
||||
#define NVC86F_SEMAPHOREB (0x00000014)
|
||||
#define NVC86F_SEMAPHOREB_OFFSET_LOWER 31:2
|
||||
#define NVC86F_SEMAPHOREC (0x00000018)
|
||||
#define NVC86F_SEMAPHOREC_PAYLOAD 31:0
|
||||
#define NVC86F_SEMAPHORED (0x0000001C)
|
||||
#define NVC86F_SEMAPHORED_OPERATION 4:0
|
||||
#define NVC86F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
|
||||
#define NVC86F_SEMAPHORED_OPERATION_RELEASE 0x00000002
|
||||
#define NVC86F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
|
||||
#define NVC86F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
|
||||
#define NVC86F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
|
||||
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH 12:12
|
||||
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
|
||||
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
|
||||
#define NVC86F_SEMAPHORED_RELEASE_WFI 20:20
|
||||
#define NVC86F_SEMAPHORED_RELEASE_WFI_EN 0x00000000
|
||||
#define NVC86F_SEMAPHORED_RELEASE_WFI_DIS 0x00000001
|
||||
#define NVC86F_SEMAPHORED_RELEASE_SIZE 24:24
|
||||
#define NVC86F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
|
||||
#define NVC86F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
|
||||
#define NVC86F_SEMAPHORED_REDUCTION 30:27
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_MIN 0x00000000
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_MAX 0x00000001
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_XOR 0x00000002
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_AND 0x00000003
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_OR 0x00000004
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_ADD 0x00000005
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_INC 0x00000006
|
||||
#define NVC86F_SEMAPHORED_REDUCTION_DEC 0x00000007
|
||||
#define NVC86F_SEMAPHORED_FORMAT 31:31
|
||||
#define NVC86F_SEMAPHORED_FORMAT_SIGNED 0x00000000
|
||||
#define NVC86F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
|
||||
#define NVC86F_NON_STALL_INTERRUPT (0x00000020)
|
||||
#define NVC86F_NON_STALL_INTERRUPT_HANDLE 31:0
|
||||
#define NVC86F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
|
||||
#define NVC86F_FB_FLUSH_HANDLE 31:0
|
||||
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
|
||||
// specifying the page address for a targeted TLB invalidate and the uTLB for
|
||||
// a targeted REPLAY_CANCEL for UVM.
|
||||
@@ -206,67 +139,31 @@ typedef volatile struct Nvc86fControl_struct {
|
||||
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE 23:20
|
||||
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE_RESERVED 0x00000000
|
||||
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE_VIDMEM_ACCESS_BIT_DUMP 0x00000001
|
||||
#define NVC86F_SET_REFERENCE (0x00000050)
|
||||
#define NVC86F_SET_REFERENCE_COUNT 31:0
|
||||
#define NVC86F_SEM_ADDR_LO (0x0000005c)
|
||||
#define NVC86F_SEM_ADDR_LO_OFFSET 31:2
|
||||
#define NVC86F_SEM_ADDR_HI (0x00000060)
|
||||
#define NVC86F_SEM_ADDR_HI_OFFSET 24:0
|
||||
#define NVC86F_SEM_PAYLOAD_LO (0x00000064)
|
||||
#define NVC86F_SEM_PAYLOAD_LO_PAYLOAD 31:0
|
||||
#define NVC86F_SEM_PAYLOAD_HI (0x00000068)
|
||||
#define NVC86F_SEM_PAYLOAD_HI_PAYLOAD 31:0
|
||||
#define NVC86F_SEM_EXECUTE (0x0000006c)
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION 2:0
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ 0x00000002
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ 0x00000003
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_AND 0x00000004
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_NOR 0x00000005
|
||||
#define NVC86F_SEM_EXECUTE_OPERATION_REDUCTION 0x00000006
|
||||
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG 12:12
|
||||
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_WFI 20:20
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_WFI_EN 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
|
||||
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP 25:25
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION 30:27
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IMIN 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IMAX 0x00000001
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IXOR 0x00000002
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IAND 0x00000003
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IOR 0x00000004
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_IADD 0x00000005
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_INC 0x00000006
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_DEC 0x00000007
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT 31:31
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED 0x00000000
|
||||
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED 0x00000001
|
||||
#define NVC86F_WFI (0x00000078)
|
||||
#define NVC86F_WFI_SCOPE 0:0
|
||||
#define NVC86F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
|
||||
#define NVC86F_WFI_SCOPE_CURRENT_VEID 0x00000000
|
||||
#define NVC86F_WFI_SCOPE_ALL 0x00000001
|
||||
#define NVC86F_YIELD (0x00000080)
|
||||
#define NVC86F_YIELD_OP 1:0
|
||||
#define NVC86F_YIELD_OP_NOP 0x00000000
|
||||
#define NVC86F_YIELD_OP_TSG 0x00000003
|
||||
#define NVC86F_CLEAR_FAULTED (0x00000084)
|
||||
// Note: RM provides the HANDLE as an opaque value; the internal detail fields
|
||||
// are intentionally not exposed to the driver through these defines.
|
||||
#define NVC86F_CLEAR_FAULTED_HANDLE 30:0
|
||||
#define NVC86F_CLEAR_FAULTED_TYPE 31:31
|
||||
#define NVC86F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
|
||||
#define NVC86F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
|
||||
#define NVC86F_QUADRO_VERIFY (0x000000a0)
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVC86F_GP_ENTRY__SIZE 8
|
||||
@@ -291,85 +188,4 @@ typedef volatile struct Nvc86fControl_struct {
|
||||
#define NVC86F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
#define NVC86F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE 0x00000004
|
||||
|
||||
/* dma method formats */
|
||||
#define NVC86F_DMA_METHOD_ADDRESS_OLD 12:2
|
||||
#define NVC86F_DMA_METHOD_ADDRESS 11:0
|
||||
#define NVC86F_DMA_SUBDEVICE_MASK 15:4
|
||||
#define NVC86F_DMA_METHOD_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_TERT_OP 17:16
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
|
||||
#define NVC86F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
|
||||
#define NVC86F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
|
||||
#define NVC86F_DMA_METHOD_COUNT_OLD 28:18
|
||||
#define NVC86F_DMA_METHOD_COUNT 28:16
|
||||
#define NVC86F_DMA_IMMD_DATA 28:16
|
||||
#define NVC86F_DMA_SEC_OP 31:29
|
||||
#define NVC86F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
|
||||
#define NVC86F_DMA_SEC_OP_INC_METHOD (0x00000001)
|
||||
#define NVC86F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
|
||||
#define NVC86F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
|
||||
#define NVC86F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
|
||||
#define NVC86F_DMA_SEC_OP_ONE_INC (0x00000005)
|
||||
#define NVC86F_DMA_SEC_OP_RESERVED6 (0x00000006)
|
||||
#define NVC86F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
|
||||
/* dma incrementing method format */
|
||||
#define NVC86F_DMA_INCR_ADDRESS 11:0
|
||||
#define NVC86F_DMA_INCR_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_INCR_COUNT 28:16
|
||||
#define NVC86F_DMA_INCR_OPCODE 31:29
|
||||
#define NVC86F_DMA_INCR_OPCODE_VALUE (0x00000001)
|
||||
#define NVC86F_DMA_INCR_DATA 31:0
|
||||
/* dma non-incrementing method format */
|
||||
#define NVC86F_DMA_NONINCR_ADDRESS 11:0
|
||||
#define NVC86F_DMA_NONINCR_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_NONINCR_COUNT 28:16
|
||||
#define NVC86F_DMA_NONINCR_OPCODE 31:29
|
||||
#define NVC86F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
|
||||
#define NVC86F_DMA_NONINCR_DATA 31:0
|
||||
/* dma increment-once method format */
|
||||
#define NVC86F_DMA_ONEINCR_ADDRESS 11:0
|
||||
#define NVC86F_DMA_ONEINCR_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_ONEINCR_COUNT 28:16
|
||||
#define NVC86F_DMA_ONEINCR_OPCODE 31:29
|
||||
#define NVC86F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
|
||||
#define NVC86F_DMA_ONEINCR_DATA 31:0
|
||||
/* dma no-operation format */
|
||||
#define NVC86F_DMA_NOP (0x00000000)
|
||||
/* dma immediate-data format */
|
||||
#define NVC86F_DMA_IMMD_ADDRESS 11:0
|
||||
#define NVC86F_DMA_IMMD_SUBCHANNEL 15:13
|
||||
#define NVC86F_DMA_IMMD_DATA 28:16
|
||||
#define NVC86F_DMA_IMMD_OPCODE 31:29
|
||||
#define NVC86F_DMA_IMMD_OPCODE_VALUE (0x00000004)
|
||||
/* dma set sub-device mask format */
|
||||
#define NVC86F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC86F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC86F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
|
||||
/* dma store sub-device mask format */
|
||||
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
|
||||
/* dma use sub-device mask format */
|
||||
#define NVC86F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC86F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
|
||||
/* dma end-segment format */
|
||||
#define NVC86F_DMA_ENDSEG_OPCODE 31:29
|
||||
#define NVC86F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
|
||||
/* dma legacy incrementing/non-incrementing formats */
|
||||
#define NVC86F_DMA_ADDRESS 12:2
|
||||
#define NVC86F_DMA_SUBCH 15:13
|
||||
#define NVC86F_DMA_OPCODE3 17:16
|
||||
#define NVC86F_DMA_OPCODE3_NONE (0x00000000)
|
||||
#define NVC86F_DMA_COUNT 28:18
|
||||
#define NVC86F_DMA_OPCODE 31:29
|
||||
#define NVC86F_DMA_OPCODE_METHOD (0x00000000)
|
||||
#define NVC86F_DMA_OPCODE_NONINC_METHOD (0x00000002)
|
||||
#define NVC86F_DMA_DATA 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* _clc86f_h_ */
|
||||
#endif // __gh100_clc86f_h__
|
||||
|
||||
@@ -1,160 +1,46 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 1993-2004 NVIDIA Corporation
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
#ifndef __gh100_clc8b5_h__
|
||||
#define __gh100_clc8b5_h__
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#ifndef _clc8b5_h_
|
||||
#define _clc8b5_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define HOPPER_DMA_COPY_A (0x0000C8B5)
|
||||
|
||||
typedef volatile struct _clc8b5_tag0 {
|
||||
NvV32 Reserved00[0x40];
|
||||
NvV32 Nop; // 0x00000100 - 0x00000103
|
||||
NvV32 Reserved01[0xF];
|
||||
NvV32 PmTrigger; // 0x00000140 - 0x00000143
|
||||
NvV32 Reserved02[0x36];
|
||||
NvV32 SetMonitoredFenceType; // 0x0000021C - 0x0000021F
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseUpper; // 0x00000220 - 0x00000223
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseLower; // 0x00000224 - 0x00000227
|
||||
NvV32 Reserved03[0x6];
|
||||
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
|
||||
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
|
||||
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
|
||||
NvV32 SetSemaphorePayloadUpper; // 0x0000024C - 0x0000024F
|
||||
NvV32 Reserved04[0x1];
|
||||
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
|
||||
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
|
||||
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
|
||||
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
|
||||
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
|
||||
NvV32 Reserved05[0x26];
|
||||
NvV32 LaunchDma; // 0x00000300 - 0x00000303
|
||||
NvV32 Reserved06[0x3F];
|
||||
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
|
||||
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
|
||||
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
|
||||
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
|
||||
NvV32 PitchIn; // 0x00000410 - 0x00000413
|
||||
NvV32 PitchOut; // 0x00000414 - 0x00000417
|
||||
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
|
||||
NvV32 LineCount; // 0x0000041C - 0x0000041F
|
||||
NvV32 Reserved07[0x38];
|
||||
NvV32 SetSecureCopyMode; // 0x00000500 - 0x00000503
|
||||
NvV32 SetDecryptIv0; // 0x00000504 - 0x00000507
|
||||
NvV32 SetDecryptIv1; // 0x00000508 - 0x0000050B
|
||||
NvV32 SetDecryptIv2; // 0x0000050C - 0x0000050F
|
||||
NvV32 Reserved_SetAESCounter; // 0x00000510 - 0x00000513
|
||||
NvV32 SetDecryptAuthTagCompareAddrUpper; // 0x00000514 - 0x00000517
|
||||
NvV32 SetDecryptAuthTagCompareAddrLower; // 0x00000518 - 0x0000051B
|
||||
NvV32 Reserved08[0x5];
|
||||
NvV32 SetEncryptAuthTagAddrUpper; // 0x00000530 - 0x00000533
|
||||
NvV32 SetEncryptAuthTagAddrLower; // 0x00000534 - 0x00000537
|
||||
NvV32 SetEncryptIvAddrUpper; // 0x00000538 - 0x0000053B
|
||||
NvV32 SetEncryptIvAddrLower; // 0x0000053C - 0x0000053F
|
||||
NvV32 Reserved09[0x6F];
|
||||
NvV32 SetMemoryScrubParameters; // 0x000006FC - 0x000006FF
|
||||
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
|
||||
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
|
||||
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
|
||||
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
|
||||
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
|
||||
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
|
||||
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
|
||||
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
|
||||
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
|
||||
NvV32 Reserved10[0x1];
|
||||
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
|
||||
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
|
||||
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
|
||||
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
|
||||
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
|
||||
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
|
||||
NvV32 Reserved11[0x1];
|
||||
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
|
||||
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
|
||||
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
|
||||
NvV32 DstOriginY; // 0x00000750 - 0x00000753
|
||||
NvV32 Reserved12[0x270];
|
||||
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
|
||||
NvV32 Reserved13[0x3BA];
|
||||
} hopper_dma_copy_aControlPio;
|
||||
|
||||
#define NVC8B5_NOP (0x00000100)
|
||||
#define NVC8B5_NOP_PARAMETER 31:0
|
||||
#define NVC8B5_PM_TRIGGER (0x00000140)
|
||||
#define NVC8B5_PM_TRIGGER_V 31:0
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE (0x0000021C)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE 0:0
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE (0x00000000)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE_EXT (0x00000001)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER (0x00000220)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER_UPPER 24:0
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER (0x00000224)
|
||||
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER_LOWER 31:0
|
||||
#define HOPPER_DMA_COPY_A (0x0000C8B5)
|
||||
#define NVC8B5_SET_SEMAPHORE_A (0x00000240)
|
||||
#define NVC8B5_SET_SEMAPHORE_A_UPPER 24:0
|
||||
#define NVC8B5_SET_SEMAPHORE_B (0x00000244)
|
||||
#define NVC8B5_SET_SEMAPHORE_B_LOWER 31:0
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_UPPER (0x0000024C)
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD 31:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_A (0x00000254)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_A_UPPER 24:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_B (0x00000258)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_B_LOWER 31:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C (0x0000025C)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE 2:0
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
|
||||
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE (0x00000260)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET 1:0
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_FLA 9:9
|
||||
#define NVC8B5_SET_DST_PHYS_MODE (0x00000264)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET 1:0
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_FLA 9:9
|
||||
#define NVC8B5_LAUNCH_DMA (0x00000300)
|
||||
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
|
||||
@@ -167,80 +53,41 @@ typedef volatile struct _clc8b5_tag0 {
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_NO_TIMESTAMP (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_WITH_TIMESTAMP (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE (0x00000003)
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
|
||||
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
|
||||
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE 10:10
|
||||
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE 11:11
|
||||
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_TYPE 12:12
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_TYPE 13:13
|
||||
#define NVC8B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDA (0x00000008)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDB (0x00000009)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMIN (0x0000000B)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMAX (0x0000000C)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDC (0x0000000D)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDD (0x0000000E)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDE (0x0000000F)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE 21:20
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_PROT2PROT (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_DEFAULT (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_SECURE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_NONPROT2NONPROT (0x00000002)
|
||||
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_RESERVED (0x00000003)
|
||||
#define NVC8B5_LAUNCH_DMA_VPRMODE 22:22
|
||||
#define NVC8B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE 23:23
|
||||
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC 26:26
|
||||
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE 27:27
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_ONE_WORD (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_TWO_WORD (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
|
||||
#define NVC8B5_OFFSET_IN_UPPER (0x00000400)
|
||||
#define NVC8B5_OFFSET_IN_UPPER_UPPER 24:0
|
||||
#define NVC8B5_OFFSET_IN_LOWER (0x00000404)
|
||||
@@ -249,41 +96,11 @@ typedef volatile struct _clc8b5_tag0 {
|
||||
#define NVC8B5_OFFSET_OUT_UPPER_UPPER 24:0
|
||||
#define NVC8B5_OFFSET_OUT_LOWER (0x0000040C)
|
||||
#define NVC8B5_OFFSET_OUT_LOWER_VALUE 31:0
|
||||
#define NVC8B5_PITCH_IN (0x00000410)
|
||||
#define NVC8B5_PITCH_IN_VALUE 31:0
|
||||
#define NVC8B5_PITCH_OUT (0x00000414)
|
||||
#define NVC8B5_PITCH_OUT_VALUE 31:0
|
||||
#define NVC8B5_LINE_LENGTH_IN (0x00000418)
|
||||
#define NVC8B5_LINE_LENGTH_IN_VALUE 31:0
|
||||
#define NVC8B5_LINE_COUNT (0x0000041C)
|
||||
#define NVC8B5_LINE_COUNT_VALUE 31:0
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE (0x00000500)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_MODE 0:0
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_MODE_ENCRYPT (0x00000000)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_MODE_DECRYPT (0x00000001)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET 20:19
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_PEER_ID 23:21
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_FLA 24:24
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET 26:25
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_PEER_ID 29:27
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_FLA 30:30
|
||||
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_END_OF_COPY 31:31
|
||||
#define NVC8B5_SET_DECRYPT_IV0 (0x00000504)
|
||||
#define NVC8B5_SET_DECRYPT_IV0_VALUE 31:0
|
||||
#define NVC8B5_SET_DECRYPT_IV1 (0x00000508)
|
||||
#define NVC8B5_SET_DECRYPT_IV1_VALUE 31:0
|
||||
#define NVC8B5_SET_DECRYPT_IV2 (0x0000050C)
|
||||
#define NVC8B5_SET_DECRYPT_IV2_VALUE 31:0
|
||||
#define NVC8B5_RESERVED_SET_AESCOUNTER (0x00000510)
|
||||
#define NVC8B5_RESERVED_SET_AESCOUNTER_VALUE 31:0
|
||||
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER (0x00000514)
|
||||
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER (0x00000518)
|
||||
@@ -299,132 +116,18 @@ typedef volatile struct _clc8b5_tag0 {
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS (0x000006FC)
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE 0:0
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_CONST_A (0x00000700)
|
||||
#define NVC8B5_SET_REMAP_CONST_A_V 31:0
|
||||
#define NVC8B5_SET_REMAP_CONST_B (0x00000704)
|
||||
#define NVC8B5_SET_REMAP_CONST_B_V 31:0
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS (0x00000708)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X 2:0
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y 6:4
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z 10:8
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W 14:12
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE (0x0000070C)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC8B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC8B5_SET_DST_WIDTH (0x00000710)
|
||||
#define NVC8B5_SET_DST_WIDTH_V 31:0
|
||||
#define NVC8B5_SET_DST_HEIGHT (0x00000714)
|
||||
#define NVC8B5_SET_DST_HEIGHT_V 31:0
|
||||
#define NVC8B5_SET_DST_DEPTH (0x00000718)
|
||||
#define NVC8B5_SET_DST_DEPTH_V 31:0
|
||||
#define NVC8B5_SET_DST_LAYER (0x0000071C)
|
||||
#define NVC8B5_SET_DST_LAYER_V 31:0
|
||||
#define NVC8B5_SET_DST_ORIGIN (0x00000720)
|
||||
#define NVC8B5_SET_DST_ORIGIN_X 15:0
|
||||
#define NVC8B5_SET_DST_ORIGIN_Y 31:16
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE (0x00000728)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC8B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC8B5_SET_SRC_WIDTH (0x0000072C)
|
||||
#define NVC8B5_SET_SRC_WIDTH_V 31:0
|
||||
#define NVC8B5_SET_SRC_HEIGHT (0x00000730)
|
||||
#define NVC8B5_SET_SRC_HEIGHT_V 31:0
|
||||
#define NVC8B5_SET_SRC_DEPTH (0x00000734)
|
||||
#define NVC8B5_SET_SRC_DEPTH_V 31:0
|
||||
#define NVC8B5_SET_SRC_LAYER (0x00000738)
|
||||
#define NVC8B5_SET_SRC_LAYER_V 31:0
|
||||
#define NVC8B5_SET_SRC_ORIGIN (0x0000073C)
|
||||
#define NVC8B5_SET_SRC_ORIGIN_X 15:0
|
||||
#define NVC8B5_SET_SRC_ORIGIN_Y 31:16
|
||||
#define NVC8B5_SRC_ORIGIN_X (0x00000744)
|
||||
#define NVC8B5_SRC_ORIGIN_X_VALUE 31:0
|
||||
#define NVC8B5_SRC_ORIGIN_Y (0x00000748)
|
||||
#define NVC8B5_SRC_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC8B5_DST_ORIGIN_X (0x0000074C)
|
||||
#define NVC8B5_DST_ORIGIN_X_VALUE 31:0
|
||||
#define NVC8B5_DST_ORIGIN_Y (0x00000750)
|
||||
#define NVC8B5_DST_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC8B5_PM_TRIGGER_END (0x00001114)
|
||||
#define NVC8B5_PM_TRIGGER_END_V 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
#endif // _clc8b5_h
|
||||
|
||||
#endif // __gh100_clc8b5_h__
|
||||
|
||||
@@ -1,84 +1,42 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2012-2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#ifndef _clc96f_h_
|
||||
#define _clc96f_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
/* class BLACKWELL_CHANNEL_GPFIFO */
|
||||
/*
|
||||
* Documentation for BLACKWELL_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
|
||||
* chapter "User Control Registers". It is documented as device NV_UDMA.
|
||||
* The GPFIFO format itself is also documented in dev_pbdma.ref,
|
||||
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
|
||||
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Note there is no .mfs file for this class.
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __gb100_clc96f_h__
|
||||
#define __gb100_clc96f_h__
|
||||
|
||||
#define BLACKWELL_CHANNEL_GPFIFO_A (0x0000C96F)
|
||||
|
||||
#define NVC96F_TYPEDEF BLACKWELL_CHANNELChannelGPFifoA
|
||||
|
||||
/* dma flow control data structure */
|
||||
typedef volatile struct Nvc96fControl_struct {
|
||||
NvU32 Ignored00[0x23]; /* 0000-008b*/
|
||||
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
|
||||
NvU32 Ignored01[0x5c];
|
||||
} Nvc96fControl, BlackwellAControlGPFifo;
|
||||
|
||||
/* fields and values */
|
||||
#define NVC96F_NUMBER_OF_SUBCHANNELS (8)
|
||||
#define NVC96F_SET_OBJECT (0x00000000)
|
||||
#define NVC96F_SET_OBJECT_NVCLASS 15:0
|
||||
#define NVC96F_SET_OBJECT_ENGINE 20:16
|
||||
#define NVC96F_SET_OBJECT_ENGINE_SW 0x0000001f
|
||||
#define NVC96F_NOP (0x00000008)
|
||||
#define NVC96F_NOP_HANDLE 31:0
|
||||
#define NVC96F_NON_STALL_INTERRUPT (0x00000020)
|
||||
#define NVC96F_NON_STALL_INTERRUPT_HANDLE 31:0
|
||||
#define NVC96F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
|
||||
#define NVC96F_FB_FLUSH_HANDLE 31:0
|
||||
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
|
||||
// specifying the page address for a targeted TLB invalidate and the uTLB for
|
||||
// a targeted REPLAY_CANCEL for UVM.
|
||||
// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
|
||||
// rearranged fields.
|
||||
#define NVC96F_MEM_OP_A (0x00000028)
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID 5:0 // only relevant for REPLAY_CANCEL_TARGETED
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE 5:0 // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID 10:6 // only relevant for REPLAY_CANCEL_TARGETED
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE 7:6 // only relevant for invalidates with NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE for invalidating link TLB only, or non-link TLB only or all TLBs
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_ALL_TLBS 0
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_LINK_TLBS 1
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_NON_LINK_TLBS 2
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_RSVRVD 3
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID 8:0 // only relevant for REPLAY_CANCEL_VA_GLOBAL
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR 11:11
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN 0x00000001
|
||||
#define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS 0x00000000
|
||||
@@ -86,9 +44,6 @@ typedef volatile struct Nvc96fControl_struct {
|
||||
#define NVC96F_MEM_OP_B (0x0000002c)
|
||||
#define NVC96F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI 31:0
|
||||
#define NVC96F_MEM_OP_C (0x00000030)
|
||||
#define NVC96F_MEM_OP_C_MEMBAR_TYPE 2:0
|
||||
#define NVC96F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR 0x00000000
|
||||
#define NVC96F_MEM_OP_C_MEMBAR_TYPE_MEMBAR 0x00000001
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB 0:0
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE 0x00000000
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL 0x00000001 // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
|
||||
@@ -97,130 +52,38 @@ typedef volatile struct Nvc96fControl_struct {
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE 0x00000001
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY 4:2 // only relevant if GPC ENABLE
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE 0x00000000
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START 0x00000001
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL 0x00000002
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED 0x00000003
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL 0x00000004
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL 0x00000005
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE 6:5 // only relevant if GPC ENABLE
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE 0x00000000
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY 0x00000001
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE 0x00000002
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE 9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ 0
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE 1
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG 2
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD 3
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK 4
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL 5
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC 6
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL 7
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL 9:7 // Invalidate affects this level and all below
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL 0x00000000 // Invalidate tlb caches at all levels of the page table
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY 0x00000001
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0 0x00000002
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1 0x00000003
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2 0x00000004
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 0x00000005
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 0x00000006
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5 0x00000007
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE 11:10 // only relevant if PDB_ONE
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM 0x00000000
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT 0x00000002
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
|
||||
#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO 31:12 // only relevant if PDB_ONE
|
||||
#define NVC96F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG 19:0
|
||||
// MEM_OP_D MUST be preceded by MEM_OPs A-C.
|
||||
|
||||
#define NVC96F_MEM_OP_D (0x00000034)
|
||||
#define NVC96F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI 26:0 // only relevant if PDB_ONE
|
||||
#define NVC96F_MEM_OP_D_OPERATION 31:27
|
||||
#define NVC96F_MEM_OP_D_OPERATION_MEMBAR 0x00000005
|
||||
#define NVC96F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009
|
||||
#define NVC96F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED 0x0000000a
|
||||
#define NVC96F_MEM_OP_D_OPERATION_MMU_OPERATION 0x0000000b
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
|
||||
// CLEAN_LINES is an alias for Tegra/GPU IP usage
|
||||
#define NVC96F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_NCOH_INVALIDATE 0x00000011
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_COH_INVALIDATE 0x00000012
|
||||
#define NVC96F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015
|
||||
#define NVC96F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR 0x00000016
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE 1:0
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC 0x00000000
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC 0x00000001
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL 0x00000002
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED 0x00000003
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE 2:2
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC 0x00000000
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC 0x00000001
|
||||
#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK 6:3
|
||||
#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE 23:20
|
||||
#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE_RESERVED 0x00000000
|
||||
#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE_VIDMEM_ACCESS_BIT_DUMP 0x00000001
|
||||
|
||||
#define NVC96F_SEM_ADDR_LO (0x0000005c)
|
||||
#define NVC96F_SEM_ADDR_LO_OFFSET 31:2
|
||||
#define NVC96F_SEM_ADDR_HI (0x00000060)
|
||||
#define NVC96F_SEM_ADDR_HI_OFFSET 24:0
|
||||
#define NVC96F_SEM_PAYLOAD_LO (0x00000064)
|
||||
#define NVC96F_SEM_PAYLOAD_LO_PAYLOAD 31:0
|
||||
#define NVC96F_SEM_PAYLOAD_HI (0x00000068)
|
||||
#define NVC96F_SEM_PAYLOAD_HI_PAYLOAD 31:0
|
||||
#define NVC96F_SEM_EXECUTE (0x0000006c)
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION 2:0
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ 0x00000002
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ 0x00000003
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_AND 0x00000004
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_NOR 0x00000005
|
||||
#define NVC96F_SEM_EXECUTE_OPERATION_REDUCTION 0x00000006
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG 12:12
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK 18:18
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK_DIS 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK_EN 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_WFI 20:20
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_WFI_EN 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
|
||||
#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP 25:25
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION 30:27
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IMIN 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IMAX 0x00000001
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IXOR 0x00000002
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IAND 0x00000003
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IOR 0x00000004
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_IADD 0x00000005
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_INC 0x00000006
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_DEC 0x00000007
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT 31:31
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED 0x00000000
|
||||
#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED 0x00000001
|
||||
#define NVC96F_WFI (0x00000078)
|
||||
#define NVC96F_WFI_SCOPE 0:0
|
||||
#define NVC96F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
|
||||
#define NVC96F_WFI_SCOPE_CURRENT_VEID 0x00000000
|
||||
#define NVC96F_WFI_SCOPE_ALL 0x00000001
|
||||
#define NVC96F_YIELD (0x00000080)
|
||||
#define NVC96F_YIELD_OP 1:0
|
||||
#define NVC96F_YIELD_OP_NOP 0x00000000
|
||||
#define NVC96F_YIELD_OP_TSG 0x00000003
|
||||
#define NVC96F_CLEAR_FAULTED (0x00000084)
|
||||
// Note: RM provides the HANDLE as an opaque value; the internal detail fields
|
||||
// are intentionally not exposed to the driver through these defines.
|
||||
#define NVC96F_CLEAR_FAULTED_HANDLE 30:0
|
||||
#define NVC96F_CLEAR_FAULTED_TYPE 31:31
|
||||
#define NVC96F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
|
||||
#define NVC96F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVC96F_GP_ENTRY__SIZE 8
|
||||
@@ -245,85 +108,4 @@ typedef volatile struct Nvc96fControl_struct {
|
||||
#define NVC96F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
#define NVC96F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE 0x00000004
|
||||
|
||||
/* dma method formats */
|
||||
#define NVC96F_DMA_METHOD_ADDRESS_OLD 12:2
|
||||
#define NVC96F_DMA_METHOD_ADDRESS 11:0
|
||||
#define NVC96F_DMA_SUBDEVICE_MASK 15:4
|
||||
#define NVC96F_DMA_METHOD_SUBCHANNEL 15:13
|
||||
#define NVC96F_DMA_TERT_OP 17:16
|
||||
#define NVC96F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
|
||||
#define NVC96F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
|
||||
#define NVC96F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
|
||||
#define NVC96F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
|
||||
#define NVC96F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
|
||||
#define NVC96F_DMA_METHOD_COUNT_OLD 28:18
|
||||
#define NVC96F_DMA_METHOD_COUNT 28:16
|
||||
#define NVC96F_DMA_IMMD_DATA 28:16
|
||||
#define NVC96F_DMA_SEC_OP 31:29
|
||||
#define NVC96F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
|
||||
#define NVC96F_DMA_SEC_OP_INC_METHOD (0x00000001)
|
||||
#define NVC96F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
|
||||
#define NVC96F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
|
||||
#define NVC96F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
|
||||
#define NVC96F_DMA_SEC_OP_ONE_INC (0x00000005)
|
||||
#define NVC96F_DMA_SEC_OP_RESERVED6 (0x00000006)
|
||||
#define NVC96F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
|
||||
/* dma incrementing method format */
|
||||
#define NVC96F_DMA_INCR_ADDRESS 11:0
|
||||
#define NVC96F_DMA_INCR_SUBCHANNEL 15:13
|
||||
#define NVC96F_DMA_INCR_COUNT 28:16
|
||||
#define NVC96F_DMA_INCR_OPCODE 31:29
|
||||
#define NVC96F_DMA_INCR_OPCODE_VALUE (0x00000001)
|
||||
#define NVC96F_DMA_INCR_DATA 31:0
|
||||
/* dma non-incrementing method format */
|
||||
#define NVC96F_DMA_NONINCR_ADDRESS 11:0
|
||||
#define NVC96F_DMA_NONINCR_SUBCHANNEL 15:13
|
||||
#define NVC96F_DMA_NONINCR_COUNT 28:16
|
||||
#define NVC96F_DMA_NONINCR_OPCODE 31:29
|
||||
#define NVC96F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
|
||||
#define NVC96F_DMA_NONINCR_DATA 31:0
|
||||
/* dma increment-once method format */
|
||||
#define NVC96F_DMA_ONEINCR_ADDRESS 11:0
|
||||
#define NVC96F_DMA_ONEINCR_SUBCHANNEL 15:13
|
||||
#define NVC96F_DMA_ONEINCR_COUNT 28:16
|
||||
#define NVC96F_DMA_ONEINCR_OPCODE 31:29
|
||||
#define NVC96F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
|
||||
#define NVC96F_DMA_ONEINCR_DATA 31:0
|
||||
/* dma no-operation format */
|
||||
#define NVC96F_DMA_NOP (0x00000000)
|
||||
/* dma immediate-data format */
|
||||
#define NVC96F_DMA_IMMD_ADDRESS 11:0
|
||||
#define NVC96F_DMA_IMMD_SUBCHANNEL 15:13
|
||||
#define NVC96F_DMA_IMMD_DATA 28:16
|
||||
#define NVC96F_DMA_IMMD_OPCODE 31:29
|
||||
#define NVC96F_DMA_IMMD_OPCODE_VALUE (0x00000004)
|
||||
/* dma set sub-device mask format */
|
||||
#define NVC96F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC96F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC96F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
|
||||
/* dma store sub-device mask format */
|
||||
#define NVC96F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
|
||||
#define NVC96F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC96F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
|
||||
/* dma use sub-device mask format */
|
||||
#define NVC96F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
|
||||
#define NVC96F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
|
||||
/* dma end-segment format */
|
||||
#define NVC96F_DMA_ENDSEG_OPCODE 31:29
|
||||
#define NVC96F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
|
||||
/* dma legacy incrementing/non-incrementing formats */
|
||||
#define NVC96F_DMA_ADDRESS 12:2
|
||||
#define NVC96F_DMA_SUBCH 15:13
|
||||
#define NVC96F_DMA_OPCODE3 17:16
|
||||
#define NVC96F_DMA_OPCODE3_NONE (0x00000000)
|
||||
#define NVC96F_DMA_COUNT 28:18
|
||||
#define NVC96F_DMA_OPCODE 31:29
|
||||
#define NVC96F_DMA_OPCODE_METHOD (0x00000000)
|
||||
#define NVC96F_DMA_OPCODE_NONINC_METHOD (0x00000002)
|
||||
#define NVC96F_DMA_DATA 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* _clc96f_h_ */
|
||||
#endif // __gb100_clc96f_h__
|
||||
|
||||
@@ -1,460 +1,29 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 1993-2004 NVIDIA Corporation
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#ifndef _clc9b5_h_
|
||||
#define _clc9b5_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#ifndef __gb100_clc9b5_h__
|
||||
#define __gb100_clc9b5_h__
|
||||
|
||||
#define BLACKWELL_DMA_COPY_A (0x0000C9B5)
|
||||
|
||||
typedef volatile struct _clc9b5_tag0 {
|
||||
NvV32 Reserved00[0x40];
|
||||
NvV32 Nop; // 0x00000100 - 0x00000103
|
||||
NvV32 Reserved01[0xF];
|
||||
NvV32 PmTrigger; // 0x00000140 - 0x00000143
|
||||
NvV32 Reserved02[0x36];
|
||||
NvV32 SetMonitoredFenceType; // 0x0000021C - 0x0000021F
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseUpper; // 0x00000220 - 0x00000223
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseLower; // 0x00000224 - 0x00000227
|
||||
NvV32 Reserved03[0x6];
|
||||
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
|
||||
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
|
||||
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
|
||||
NvV32 SetSemaphorePayloadUpper; // 0x0000024C - 0x0000024F
|
||||
NvV32 Reserved04[0x1];
|
||||
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
|
||||
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
|
||||
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
|
||||
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
|
||||
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
|
||||
NvV32 Reserved05[0x26];
|
||||
NvV32 LaunchDma; // 0x00000300 - 0x00000303
|
||||
NvV32 Reserved06[0x3F];
|
||||
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
|
||||
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
|
||||
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
|
||||
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
|
||||
NvV32 PitchIn; // 0x00000410 - 0x00000413
|
||||
NvV32 PitchOut; // 0x00000414 - 0x00000417
|
||||
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
|
||||
NvV32 LineCount; // 0x0000041C - 0x0000041F
|
||||
NvV32 Reserved07[0x38];
|
||||
NvV32 SetSecureCopyMode; // 0x00000500 - 0x00000503
|
||||
NvV32 SetDecryptIv0; // 0x00000504 - 0x00000507
|
||||
NvV32 SetDecryptIv1; // 0x00000508 - 0x0000050B
|
||||
NvV32 SetDecryptIv2; // 0x0000050C - 0x0000050F
|
||||
NvV32 Reserved_SetAESCounter; // 0x00000510 - 0x00000513
|
||||
NvV32 SetDecryptAuthTagCompareAddrUpper; // 0x00000514 - 0x00000517
|
||||
NvV32 SetDecryptAuthTagCompareAddrLower; // 0x00000518 - 0x0000051B
|
||||
NvV32 Reserved08[0x5];
|
||||
NvV32 SetEncryptAuthTagAddrUpper; // 0x00000530 - 0x00000533
|
||||
NvV32 SetEncryptAuthTagAddrLower; // 0x00000534 - 0x00000537
|
||||
NvV32 SetEncryptIvAddrUpper; // 0x00000538 - 0x0000053B
|
||||
NvV32 SetEncryptIvAddrLower; // 0x0000053C - 0x0000053F
|
||||
NvV32 Reserved09[0x10];
|
||||
NvV32 SetCompressionParameters; // 0x00000580 - 0x00000583
|
||||
NvV32 SetDecompressOutLength; // 0x00000584 - 0x00000587
|
||||
NvV32 SetDecompressOutLengthAddrUpper; // 0x00000588 - 0x0000058B
|
||||
NvV32 SetDecompressOutLengthAddrLower; // 0x0000058C - 0x0000058F
|
||||
NvV32 SetDecompressChecksum; // 0x00000590 - 0x00000593
|
||||
NvV32 Reserved10[0x5A];
|
||||
NvV32 SetMemoryScrubParameters; // 0x000006FC - 0x000006FF
|
||||
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
|
||||
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
|
||||
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
|
||||
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
|
||||
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
|
||||
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
|
||||
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
|
||||
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
|
||||
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
|
||||
NvV32 Reserved11[0x1];
|
||||
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
|
||||
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
|
||||
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
|
||||
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
|
||||
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
|
||||
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
|
||||
NvV32 Reserved12[0x1];
|
||||
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
|
||||
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
|
||||
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
|
||||
NvV32 DstOriginY; // 0x00000750 - 0x00000753
|
||||
NvV32 Reserved13[0x270];
|
||||
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
|
||||
NvV32 Reserved14[0x3BA];
|
||||
} blackwell_dma_copy_aControlPio;
|
||||
|
||||
#define NVC9B5_NOP (0x00000100)
|
||||
#define NVC9B5_NOP_PARAMETER 31:0
|
||||
#define NVC9B5_PM_TRIGGER (0x00000140)
|
||||
#define NVC9B5_PM_TRIGGER_V 31:0
|
||||
#define NVC9B5_SET_MONITORED_FENCE_TYPE (0x0000021C)
|
||||
#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE 0:0
|
||||
#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE (0x00000000)
|
||||
#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE_EXT (0x00000001)
|
||||
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER (0x00000220)
|
||||
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER_UPPER 24:0
|
||||
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER (0x00000224)
|
||||
#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER_LOWER 31:0
|
||||
#define NVC9B5_SET_SEMAPHORE_A (0x00000240)
|
||||
#define NVC9B5_SET_SEMAPHORE_A_UPPER 24:0
|
||||
#define NVC9B5_SET_SEMAPHORE_B (0x00000244)
|
||||
#define NVC9B5_SET_SEMAPHORE_B_LOWER 31:0
|
||||
#define NVC9B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
|
||||
#define NVC9B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
|
||||
#define NVC9B5_SET_SEMAPHORE_PAYLOAD_UPPER (0x0000024C)
|
||||
#define NVC9B5_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD 31:0
|
||||
#define NVC9B5_SET_RENDER_ENABLE_A (0x00000254)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_A_UPPER 24:0
|
||||
#define NVC9B5_SET_RENDER_ENABLE_B (0x00000258)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_B_LOWER 31:0
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C (0x0000025C)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE 2:0
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
|
||||
#define NVC9B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE (0x00000260)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET 1:0
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC9B5_SET_SRC_PHYS_MODE_FLA 9:9
|
||||
#define NVC9B5_SET_DST_PHYS_MODE (0x00000264)
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_TARGET 1:0
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC9B5_SET_DST_PHYS_MODE_FLA 9:9
|
||||
#define NVC9B5_LAUNCH_DMA (0x00000300)
|
||||
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE 25:25
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_NO_TIMESTAMP (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_WITH_TIMESTAMP (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE (0x00000003)
|
||||
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
|
||||
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
|
||||
#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
|
||||
#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE 10:10
|
||||
#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE 11:11
|
||||
#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_TYPE 12:12
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_DST_TYPE 13:13
|
||||
#define NVC9B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDA (0x00000008)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDB (0x00000009)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMIN (0x0000000B)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMAX (0x0000000C)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDC (0x0000000D)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDD (0x0000000E)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDE (0x0000000F)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE 21:20
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_PROT2PROT (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_DEFAULT (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_SECURE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_NONPROT2NONPROT (0x00000002)
|
||||
#define NVC9B5_LAUNCH_DMA_COPY_TYPE_RESERVED (0x00000003)
|
||||
#define NVC9B5_LAUNCH_DMA_VPRMODE 22:22
|
||||
#define NVC9B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE 23:23
|
||||
#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC9B5_LAUNCH_DMA_DISABLE_PLC 26:26
|
||||
#define NVC9B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_DISABLE_PLC_TRUE (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE 27:27
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_ONE_WORD (0x00000000)
|
||||
#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_TWO_WORD (0x00000001)
|
||||
#define NVC9B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
|
||||
#define NVC9B5_OFFSET_IN_UPPER (0x00000400)
|
||||
#define NVC9B5_OFFSET_IN_UPPER_UPPER 24:0
|
||||
#define NVC9B5_OFFSET_IN_LOWER (0x00000404)
|
||||
#define NVC9B5_OFFSET_IN_LOWER_VALUE 31:0
|
||||
#define NVC9B5_OFFSET_OUT_UPPER (0x00000408)
|
||||
#define NVC9B5_OFFSET_OUT_UPPER_UPPER 24:0
|
||||
#define NVC9B5_OFFSET_OUT_LOWER (0x0000040C)
|
||||
#define NVC9B5_OFFSET_OUT_LOWER_VALUE 31:0
|
||||
#define NVC9B5_PITCH_IN (0x00000410)
|
||||
#define NVC9B5_PITCH_IN_VALUE 31:0
|
||||
#define NVC9B5_PITCH_OUT (0x00000414)
|
||||
#define NVC9B5_PITCH_OUT_VALUE 31:0
|
||||
#define NVC9B5_LINE_LENGTH_IN (0x00000418)
|
||||
#define NVC9B5_LINE_LENGTH_IN_VALUE 31:0
|
||||
#define NVC9B5_LINE_COUNT (0x0000041C)
|
||||
#define NVC9B5_LINE_COUNT_VALUE 31:0
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE (0x00000500)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_MODE 0:0
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_MODE_ENCRYPT (0x00000000)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_MODE_DECRYPT (0x00000001)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET 20:19
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_PEER_ID 23:21
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_FLA 24:24
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET 26:25
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_PEER_ID 29:27
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_FLA 30:30
|
||||
#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_END_OF_COPY 31:31
|
||||
#define NVC9B5_SET_DECRYPT_IV0 (0x00000504)
|
||||
#define NVC9B5_SET_DECRYPT_IV0_VALUE 31:0
|
||||
#define NVC9B5_SET_DECRYPT_IV1 (0x00000508)
|
||||
#define NVC9B5_SET_DECRYPT_IV1_VALUE 31:0
|
||||
#define NVC9B5_SET_DECRYPT_IV2 (0x0000050C)
|
||||
#define NVC9B5_SET_DECRYPT_IV2_VALUE 31:0
|
||||
#define NVC9B5_RESERVED_SET_AESCOUNTER (0x00000510)
|
||||
#define NVC9B5_RESERVED_SET_AESCOUNTER_VALUE 31:0
|
||||
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER (0x00000514)
|
||||
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER (0x00000518)
|
||||
#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER (0x00000530)
|
||||
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER (0x00000534)
|
||||
#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC9B5_SET_ENCRYPT_IV_ADDR_UPPER (0x00000538)
|
||||
#define NVC9B5_SET_ENCRYPT_IV_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC9B5_SET_ENCRYPT_IV_ADDR_LOWER (0x0000053C)
|
||||
#define NVC9B5_SET_ENCRYPT_IV_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS (0x00000580)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION 0:0
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION_DECOMPRESS (0x00000000)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION_COMPRESS (0x00000001)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO 3:1
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_SNAPPY (0x00000000)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_DATA_ONLY (0x00000001)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_BLOCK (0x00000002)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_BLOCK_CHECKSUM (0x00000003)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_DEFLATE (0x00000004)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_SNAPPY_WITH_LONG_FETCH (0x00000005)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM 29:28
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_NONE (0x00000000)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_ADLER32 (0x00000001)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_CRC32 (0x00000002)
|
||||
#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_SNAPPY_CRC (0x00000003)
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH (0x00000584)
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_V 31:0
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_UPPER (0x00000588)
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_UPPER_UPPER 24:0
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_LOWER (0x0000058C)
|
||||
#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_LOWER_LOWER 31:0
|
||||
#define NVC9B5_SET_DECOMPRESS_CHECKSUM (0x00000590)
|
||||
#define NVC9B5_SET_DECOMPRESS_CHECKSUM_V 31:0
|
||||
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS (0x000006FC)
|
||||
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE 0:0
|
||||
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_FALSE (0x00000000)
|
||||
#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_TRUE (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_CONST_A (0x00000700)
|
||||
#define NVC9B5_SET_REMAP_CONST_A_V 31:0
|
||||
#define NVC9B5_SET_REMAP_CONST_B (0x00000704)
|
||||
#define NVC9B5_SET_REMAP_CONST_B_V 31:0
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS (0x00000708)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X 2:0
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y 6:4
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z 10:8
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W 14:12
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
|
||||
#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE (0x0000070C)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC9B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC9B5_SET_DST_WIDTH (0x00000710)
|
||||
#define NVC9B5_SET_DST_WIDTH_V 31:0
|
||||
#define NVC9B5_SET_DST_HEIGHT (0x00000714)
|
||||
#define NVC9B5_SET_DST_HEIGHT_V 31:0
|
||||
#define NVC9B5_SET_DST_DEPTH (0x00000718)
|
||||
#define NVC9B5_SET_DST_DEPTH_V 31:0
|
||||
#define NVC9B5_SET_DST_LAYER (0x0000071C)
|
||||
#define NVC9B5_SET_DST_LAYER_V 31:0
|
||||
#define NVC9B5_SET_DST_ORIGIN (0x00000720)
|
||||
#define NVC9B5_SET_DST_ORIGIN_X 15:0
|
||||
#define NVC9B5_SET_DST_ORIGIN_Y 31:16
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE (0x00000728)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
|
||||
#define NVC9B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
|
||||
#define NVC9B5_SET_SRC_WIDTH (0x0000072C)
|
||||
#define NVC9B5_SET_SRC_WIDTH_V 31:0
|
||||
#define NVC9B5_SET_SRC_HEIGHT (0x00000730)
|
||||
#define NVC9B5_SET_SRC_HEIGHT_V 31:0
|
||||
#define NVC9B5_SET_SRC_DEPTH (0x00000734)
|
||||
#define NVC9B5_SET_SRC_DEPTH_V 31:0
|
||||
#define NVC9B5_SET_SRC_LAYER (0x00000738)
|
||||
#define NVC9B5_SET_SRC_LAYER_V 31:0
|
||||
#define NVC9B5_SET_SRC_ORIGIN (0x0000073C)
|
||||
#define NVC9B5_SET_SRC_ORIGIN_X 15:0
|
||||
#define NVC9B5_SET_SRC_ORIGIN_Y 31:16
|
||||
#define NVC9B5_SRC_ORIGIN_X (0x00000744)
|
||||
#define NVC9B5_SRC_ORIGIN_X_VALUE 31:0
|
||||
#define NVC9B5_SRC_ORIGIN_Y (0x00000748)
|
||||
#define NVC9B5_SRC_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC9B5_DST_ORIGIN_X (0x0000074C)
|
||||
#define NVC9B5_DST_ORIGIN_X_VALUE 31:0
|
||||
#define NVC9B5_DST_ORIGIN_Y (0x00000750)
|
||||
#define NVC9B5_DST_ORIGIN_Y_VALUE 31:0
|
||||
#define NVC9B5_PM_TRIGGER_END (0x00001114)
|
||||
#define NVC9B5_PM_TRIGGER_END_V 31:0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
#endif // _clc9b5_h
|
||||
|
||||
#endif // __gb100_clc9b5_h__
|
||||
|
||||
@@ -151,6 +151,7 @@ typedef volatile struct _clcba2_tag0 {
|
||||
#define NVCBA2_ERROR_SCRUBBER_INSUFFICIENT_PERMISSIONS (0x0000001b)
|
||||
#define NVCBA2_ERROR_SCRUBBER_MUTEX_ACQUIRE_FAILURE (0x0000001c)
|
||||
#define NVCBA2_ERROR_SCRUB_SIZE_MAX_EXCEEDED (0x0000001d)
|
||||
#define NVCBA2_ERROR_SIZE_ZERO (0x0000001e)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
|
||||
@@ -43,4 +43,7 @@
|
||||
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA100 (0x00000000)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA000 (0x00000001)
|
||||
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B (0x0000000B)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B (0x0000000B)
|
||||
#endif /* _ctrl2080mc_h_ */
|
||||
|
||||
@@ -21,6 +21,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rm_mem.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_channel.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_lock.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hal.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_fd_type.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_processors.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree.c
|
||||
@@ -59,7 +60,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_access_counter_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_access_counter_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_fault_buffer.c
|
||||
@@ -96,7 +96,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_heuristics.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_thrashing.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_prefetch.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_ibm.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_faults.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
|
||||
@@ -128,3 +127,4 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_group_tree_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_thread_context_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_file.c
|
||||
|
||||
@@ -50,7 +50,6 @@ NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
|
||||
|
||||
@@ -33,10 +33,12 @@
|
||||
#include "uvm_va_block.h"
|
||||
#include "uvm_tools.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_fd_type.h"
|
||||
#include "uvm_linux_ioctl.h"
|
||||
#include "uvm_hmm.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
#include "uvm_test_file.h"
|
||||
|
||||
#define NVIDIA_UVM_DEVICE_NAME "nvidia-uvm"
|
||||
|
||||
@@ -49,55 +51,9 @@ bool uvm_file_is_nvidia_uvm(struct file *filp)
|
||||
return (filp != NULL) && (filp->f_op == &uvm_fops);
|
||||
}
|
||||
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
|
||||
bool uvm_file_is_nvidia_uvm_va_space(struct file *filp)
|
||||
{
|
||||
unsigned long uptr;
|
||||
uvm_fd_type_t type;
|
||||
void *ptr;
|
||||
|
||||
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
|
||||
|
||||
uptr = atomic_long_read_acquire((atomic_long_t *) (&filp->private_data));
|
||||
type = (uvm_fd_type_t)(uptr & UVM_FD_TYPE_MASK);
|
||||
ptr = (void *)(uptr & ~UVM_FD_TYPE_MASK);
|
||||
BUILD_BUG_ON(UVM_FD_COUNT > UVM_FD_TYPE_MASK + 1);
|
||||
|
||||
switch (type) {
|
||||
case UVM_FD_UNINITIALIZED:
|
||||
case UVM_FD_INITIALIZING:
|
||||
UVM_ASSERT(!ptr);
|
||||
break;
|
||||
|
||||
case UVM_FD_VA_SPACE:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
case UVM_FD_MM:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(struct file) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
default:
|
||||
UVM_ASSERT(0);
|
||||
}
|
||||
|
||||
if (ptr_val)
|
||||
*ptr_val = ptr;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
|
||||
|
||||
if (uvm_fd_type(filp, &ptr) == type)
|
||||
return ptr;
|
||||
else
|
||||
return NULL;
|
||||
return uvm_file_is_nvidia_uvm(filp) && uvm_fd_type(filp, NULL) == UVM_FD_VA_SPACE;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct file *filp)
|
||||
@@ -105,7 +61,6 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_va_space_mm_t *va_space_mm;
|
||||
struct file *uvm_file;
|
||||
uvm_fd_type_t old_fd_type;
|
||||
struct mm_struct *mm;
|
||||
NV_STATUS status;
|
||||
|
||||
@@ -127,14 +82,9 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
|
||||
goto err;
|
||||
}
|
||||
|
||||
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type != UVM_FD_UNINITIALIZED) {
|
||||
status = NV_ERR_IN_USE;
|
||||
status = uvm_fd_type_init(filp);
|
||||
if (status != NV_OK)
|
||||
goto err;
|
||||
}
|
||||
|
||||
va_space_mm = &va_space->va_space_mm;
|
||||
uvm_spin_lock(&va_space_mm->lock);
|
||||
@@ -173,13 +123,13 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
|
||||
break;
|
||||
}
|
||||
uvm_spin_unlock(&va_space_mm->lock);
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)uvm_file | UVM_FD_MM);
|
||||
uvm_fd_type_set(filp, UVM_FD_MM, uvm_file);
|
||||
|
||||
return NV_OK;
|
||||
|
||||
err_release_unlock:
|
||||
uvm_spin_unlock(&va_space_mm->lock);
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
|
||||
uvm_fd_type_set(filp, UVM_FD_UNINITIALIZED, NULL);
|
||||
|
||||
err:
|
||||
if (uvm_file)
|
||||
@@ -240,7 +190,7 @@ static void uvm_release_deferred(void *data)
|
||||
// Since this function is only scheduled to run when uvm_release() fails
|
||||
// to trylock-acquire the pm.lock, the following acquisition attempt
|
||||
// is expected to block this thread, and cause it to remain blocked until
|
||||
// uvm_resume() releases the lock. As a result, the deferred release
|
||||
// uvm_resume() releases the lock. As a result, the deferred release
|
||||
// kthread queue may stall for long periods of time.
|
||||
uvm_down_read(&g_uvm_global.pm.lock);
|
||||
|
||||
@@ -249,12 +199,43 @@ static void uvm_release_deferred(void *data)
|
||||
uvm_up_read(&g_uvm_global.pm.lock);
|
||||
}
|
||||
|
||||
static void uvm_mm_release(struct file *filp, struct file *uvm_file)
|
||||
static void uvm_release_va_space(struct file *filp, uvm_va_space_t *va_space)
|
||||
{
|
||||
int ret;
|
||||
|
||||
filp->private_data = NULL;
|
||||
filp->f_mapping = NULL;
|
||||
|
||||
// Because the kernel discards the status code returned from this release
|
||||
// callback, early exit in case of a pm.lock acquisition failure is not
|
||||
// an option. Instead, the teardown work normally performed synchronously
|
||||
// needs to be scheduled to run after uvm_resume() releases the lock.
|
||||
if (uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
|
||||
uvm_va_space_destroy(va_space);
|
||||
uvm_up_read(&g_uvm_global.pm.lock);
|
||||
}
|
||||
else {
|
||||
// Remove references to this inode from the address_space. This isn't
|
||||
// strictly necessary, as any CPU mappings of this file have already
|
||||
// been destroyed, and va_space->mapping won't be used again. Still,
|
||||
// the va_space survives the inode if its destruction is deferred, in
|
||||
// which case the references are rendered stale.
|
||||
address_space_init_once(va_space->mapping);
|
||||
|
||||
nv_kthread_q_item_init(&va_space->deferred_release_q_item, uvm_release_deferred, va_space);
|
||||
ret = nv_kthread_q_schedule_q_item(&g_uvm_global.deferred_release_q, &va_space->deferred_release_q_item);
|
||||
UVM_ASSERT(ret != 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void uvm_release_mm(struct file *filp, struct file *uvm_file)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(uvm_file);
|
||||
uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
|
||||
struct mm_struct *mm = va_space_mm->mm;
|
||||
|
||||
uvm_kvfree(filp->f_mapping);
|
||||
|
||||
if (uvm_va_space_mm_enabled(va_space)) {
|
||||
uvm_va_space_mm_unregister(va_space);
|
||||
|
||||
@@ -269,46 +250,27 @@ static void uvm_mm_release(struct file *filp, struct file *uvm_file)
|
||||
static int uvm_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
void *ptr;
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_fd_type_t fd_type;
|
||||
int ret;
|
||||
uvm_fd_type_t fd_type = uvm_fd_type(filp, &ptr);
|
||||
|
||||
fd_type = uvm_fd_type(filp, &ptr);
|
||||
UVM_ASSERT(fd_type != UVM_FD_INITIALIZING);
|
||||
if (fd_type == UVM_FD_UNINITIALIZED) {
|
||||
uvm_kvfree(filp->f_mapping);
|
||||
return 0;
|
||||
}
|
||||
else if (fd_type == UVM_FD_MM) {
|
||||
uvm_kvfree(filp->f_mapping);
|
||||
uvm_mm_release(filp, (struct file *)ptr);
|
||||
return 0;
|
||||
}
|
||||
switch (fd_type) {
|
||||
case UVM_FD_UNINITIALIZED:
|
||||
uvm_kvfree(filp->f_mapping);
|
||||
break;
|
||||
|
||||
UVM_ASSERT(fd_type == UVM_FD_VA_SPACE);
|
||||
va_space = (uvm_va_space_t *)ptr;
|
||||
filp->private_data = NULL;
|
||||
filp->f_mapping = NULL;
|
||||
case UVM_FD_VA_SPACE:
|
||||
uvm_release_va_space(filp, (uvm_va_space_t *)ptr);
|
||||
break;
|
||||
|
||||
// Because the kernel discards the status code returned from this release
|
||||
// callback, early exit in case of a pm.lock acquisition failure is not
|
||||
// an option. Instead, the teardown work normally performed synchronously
|
||||
// needs to be scheduled to run after uvm_resume() releases the lock.
|
||||
if (uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
|
||||
uvm_va_space_destroy(va_space);
|
||||
uvm_up_read(&g_uvm_global.pm.lock);
|
||||
}
|
||||
else {
|
||||
// Remove references to this inode from the address_space. This isn't
|
||||
// strictly necessary, as any CPU mappings of this file have already
|
||||
// been destroyed, and va_space->mapping won't be used again. Still,
|
||||
// the va_space survives the inode if its destruction is deferred, in
|
||||
// which case the references are rendered stale.
|
||||
address_space_init_once(va_space->mapping);
|
||||
case UVM_FD_MM:
|
||||
uvm_release_mm(filp, (struct file *)ptr);
|
||||
break;
|
||||
|
||||
nv_kthread_q_item_init(&va_space->deferred_release_q_item, uvm_release_deferred, va_space);
|
||||
ret = nv_kthread_q_schedule_q_item(&g_uvm_global.deferred_release_q, &va_space->deferred_release_q_item);
|
||||
UVM_ASSERT(ret != 0);
|
||||
case UVM_FD_TEST:
|
||||
uvm_test_file_release(filp, (uvm_test_file_t *)ptr);
|
||||
break;
|
||||
|
||||
default:
|
||||
UVM_ASSERT_MSG(0, "Unexpected fd type: %d\n", fd_type);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -829,6 +791,7 @@ static struct vm_operations_struct uvm_vm_ops_device_p2p =
|
||||
|
||||
static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
void *fd_type_ptr;
|
||||
uvm_va_space_t *va_space;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
int ret = 0;
|
||||
@@ -837,9 +800,17 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
if (status != NV_OK)
|
||||
return -nv_status_to_errno(status);
|
||||
|
||||
va_space = uvm_fd_va_space(filp);
|
||||
if (!va_space)
|
||||
return -EBADFD;
|
||||
switch (uvm_fd_type(filp, &fd_type_ptr)) {
|
||||
case UVM_FD_VA_SPACE:
|
||||
va_space = (uvm_va_space_t *)fd_type_ptr;
|
||||
break;
|
||||
|
||||
case UVM_FD_TEST:
|
||||
return uvm_test_file_mmap((uvm_test_file_t *)fd_type_ptr, vma);
|
||||
|
||||
default:
|
||||
return -EBADFD;
|
||||
}
|
||||
|
||||
// When the VA space is associated with an mm, all vmas under the VA space
|
||||
// must come from that mm.
|
||||
@@ -867,8 +838,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
}
|
||||
|
||||
// If the PM lock cannot be acquired, disable the VMA and report success
|
||||
// to the caller. The caller is expected to determine whether the
|
||||
// map operation succeeded via an ioctl() call. This is necessary to
|
||||
// to the caller. The caller is expected to determine whether the
|
||||
// map operation succeeded via an ioctl() call. This is necessary to
|
||||
// safely handle MAP_FIXED, which needs to complete atomically to prevent
|
||||
// the loss of the virtual address range.
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
|
||||
@@ -999,33 +970,40 @@ static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *
|
||||
// attempt to be made. This is safe because other threads will have only had
|
||||
// a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
|
||||
// case.
|
||||
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type == UVM_FD_UNINITIALIZED) {
|
||||
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
|
||||
if (status != NV_OK) {
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
|
||||
return status;
|
||||
}
|
||||
old_fd_type = uvm_fd_type_init_cas(filp);
|
||||
switch (old_fd_type) {
|
||||
case UVM_FD_UNINITIALIZED:
|
||||
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
|
||||
if (status != NV_OK) {
|
||||
uvm_fd_type_set(filp, UVM_FD_UNINITIALIZED, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)va_space | UVM_FD_VA_SPACE);
|
||||
}
|
||||
else if (old_fd_type == UVM_FD_VA_SPACE) {
|
||||
va_space = uvm_va_space_get(filp);
|
||||
uvm_fd_type_set(filp, UVM_FD_VA_SPACE, va_space);
|
||||
break;
|
||||
|
||||
if (params->flags != va_space->initialization_flags)
|
||||
case UVM_FD_VA_SPACE:
|
||||
va_space = uvm_va_space_get(filp);
|
||||
if (params->flags != va_space->initialization_flags)
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
else
|
||||
status = NV_OK;
|
||||
|
||||
break;
|
||||
|
||||
case UVM_FD_MM:
|
||||
case UVM_FD_TEST:
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
else
|
||||
status = NV_OK;
|
||||
}
|
||||
else if (old_fd_type == UVM_FD_MM) {
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(old_fd_type == UVM_FD_INITIALIZING);
|
||||
status = NV_ERR_BUSY_RETRY;
|
||||
break;
|
||||
|
||||
case UVM_FD_INITIALIZING:
|
||||
status = NV_ERR_BUSY_RETRY;
|
||||
break;
|
||||
|
||||
default:
|
||||
UVM_ASSERT(0);
|
||||
status = NV_ERR_INVALID_STATE; // Quiet compiler warnings
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
@@ -1233,19 +1211,8 @@ static int uvm_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
pr_info("Loaded the UVM driver, major device number %d.\n", MAJOR(g_uvm_base_dev));
|
||||
|
||||
if (uvm_enable_builtin_tests)
|
||||
pr_info("Built-in UVM tests are enabled. This is a security risk.\n");
|
||||
|
||||
// After Open RM is released, both the enclosing "#if" and this comment
|
||||
// block should be removed, because the uvm_hmm_is_enabled_system_wide()
|
||||
// check is both necessary and sufficient for reporting functionality.
|
||||
// Until that time, however, we need to avoid advertisting UVM's ability to
|
||||
// enable HMM functionality.
|
||||
|
||||
if (uvm_hmm_is_enabled_system_wide())
|
||||
UVM_INFO_PRINT("HMM (Heterogeneous Memory Management) is enabled in the UVM driver.\n");
|
||||
UVM_INFO_PRINT("Built-in UVM tests are enabled. This is a security risk.\n");
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -1274,8 +1241,6 @@ static void uvm_exit(void)
|
||||
uvm_global_exit();
|
||||
|
||||
uvm_test_unload_state_exit();
|
||||
|
||||
pr_info("Unloaded the UVM driver.\n");
|
||||
}
|
||||
|
||||
static void __exit uvm_exit_entry(void)
|
||||
|
||||
@@ -1430,9 +1430,9 @@ NV_STATUS UvmAllocDeviceP2P(NvProcessorUuid gpuUuid,
|
||||
// UvmMigrate
|
||||
//
|
||||
// Migrates the backing of a given virtual address range to the specified
|
||||
// destination processor. If any page in the VA range is unpopulated, it is
|
||||
// populated at the destination processor. The migrated pages in the VA range
|
||||
// are also mapped on the destination processor.
|
||||
// destination processor's nearest memory. If any page in the VA range is
|
||||
// unpopulated, it is populated at the destination processor. The migrated pages
|
||||
// in the VA range are also mapped on the destination processor.
|
||||
//
|
||||
// Both base and length must be aligned to the smallest page size supported by
|
||||
// the CPU. The VA range must lie within the largest possible virtual address
|
||||
@@ -2207,9 +2207,9 @@ NV_STATUS UvmMapDynamicParallelismRegion(void *base,
|
||||
// allocated via a call to either UvmAlloc or UvmMemMap, or be supported
|
||||
// system-allocated pageable memory. If the input virtual range corresponds to
|
||||
// system-allocated pageable memory and UvmIsPageableMemoryAccessSupported
|
||||
// reports that pageable memory access is supported, the behavior described
|
||||
// below does not take effect, and read duplication will not be enabled for
|
||||
// the input range.
|
||||
// reports that pageable memory access is supported, or if a memoryless
|
||||
// processor is present, the behavior described below does not take effect, and
|
||||
// read duplication will not be enabled for the input range.
|
||||
//
|
||||
// Both base and length must be aligned to the smallest page size supported by
|
||||
// the CPU.
|
||||
@@ -2330,7 +2330,7 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// UvmSetPreferredLocation
|
||||
//
|
||||
// Sets the preferred location for the given virtual address range to be the
|
||||
// specified processor's memory.
|
||||
// specified processor's nearest memory.
|
||||
//
|
||||
// Both base and length must be aligned to the smallest page size supported by
|
||||
// the CPU. The VA range must lie within the largest possible virtual address
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2024 NVIDIA Corporation
|
||||
Copyright (c) 2021-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -38,12 +38,10 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_ada_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) *
|
||||
8)));
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Ada covers 128 TB and that's the minimum size
|
||||
@@ -80,10 +78,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
Copyright (c) 2018-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -38,12 +38,10 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_ampere_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
|
||||
(sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Ampere covers 128 TB and that's the minimum
|
||||
@@ -84,10 +82,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
|
||||
@@ -189,7 +189,7 @@ static bool uvm_api_range_invalid(NvU64 base, NvU64 length)
|
||||
}
|
||||
|
||||
// Some APIs can only enforce 4K alignment as it's the smallest GPU page size
|
||||
// even when the smallest host page is larger (e.g. 64K on ppc64le).
|
||||
// even when the smallest host page is larger.
|
||||
static bool uvm_api_range_invalid_4k(NvU64 base, NvU64 length)
|
||||
{
|
||||
return uvm_api_range_invalid_aligned(base, length, UVM_PAGE_SIZE_4K);
|
||||
|
||||
@@ -42,26 +42,11 @@ void uvm_ats_init(const UvmPlatformInfo *platform_info)
|
||||
uvm_va_space_mm_enabled_system();
|
||||
}
|
||||
|
||||
void uvm_ats_init_va_space(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_init_rwsem(&va_space->ats.lock, UVM_LOCK_ORDER_LEAF);
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_init_va_space(va_space);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
if (UVM_ATS_IBM_SUPPORTED()) {
|
||||
// uvm_ibm_add_gpu() needs to be called even if ATS is disabled since it
|
||||
// sets parent_gpu->npu. Not setting parent_gpu->npu will result in
|
||||
// incorrect NVLink addresses. See dma_addr_to_gpu_addr().
|
||||
|
||||
return uvm_ats_ibm_add_gpu(parent_gpu);
|
||||
}
|
||||
else if (UVM_ATS_SVA_SUPPORTED()) {
|
||||
if (g_uvm_global.ats.enabled)
|
||||
return uvm_ats_sva_add_gpu(parent_gpu);
|
||||
if (g_uvm_global.ats.enabled) {
|
||||
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
|
||||
return uvm_ats_sva_add_gpu(parent_gpu);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
@@ -69,38 +54,25 @@ NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
if (UVM_ATS_IBM_SUPPORTED()) {
|
||||
// uvm_ibm_remove_gpu() needs to be called even if ATS is disabled since
|
||||
// uvm_ibm_add_gpu() is called even in that case and
|
||||
// uvm_ibm_remove_gpu() needs to undo the work done by
|
||||
// uvm_ats_add_gpu() (gpu retained_count etc.).
|
||||
|
||||
uvm_ats_ibm_remove_gpu(parent_gpu);
|
||||
}
|
||||
else if (UVM_ATS_SVA_SUPPORTED()) {
|
||||
if (g_uvm_global.ats.enabled)
|
||||
uvm_ats_sva_remove_gpu(parent_gpu);
|
||||
if (g_uvm_global.ats.enabled) {
|
||||
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
|
||||
uvm_ats_sva_remove_gpu(parent_gpu);
|
||||
}
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
UVM_ASSERT(gpu_va_space);
|
||||
|
||||
if (!gpu_va_space->ats.enabled)
|
||||
return status;
|
||||
return NV_OK;
|
||||
|
||||
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
|
||||
|
||||
uvm_assert_lockable_order(UVM_LOCK_ORDER_MMAP_LOCK);
|
||||
uvm_assert_lockable_order(UVM_LOCK_ORDER_VA_SPACE);
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
status = uvm_ats_ibm_bind_gpu(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
status = uvm_ats_sva_bind_gpu(gpu_va_space);
|
||||
|
||||
return status;
|
||||
return uvm_ats_sva_bind_gpu(gpu_va_space);
|
||||
}
|
||||
|
||||
void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
@@ -110,10 +82,9 @@ void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
if (!gpu_va_space->ats.enabled)
|
||||
return;
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_unbind_gpu(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
uvm_ats_sva_unbind_gpu(gpu_va_space);
|
||||
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
|
||||
|
||||
uvm_ats_sva_unbind_gpu(gpu_va_space);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
@@ -127,6 +98,8 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
if (!gpu_va_space->ats.enabled)
|
||||
return status;
|
||||
|
||||
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
|
||||
|
||||
va_space = gpu_va_space->va_space;
|
||||
UVM_ASSERT(va_space);
|
||||
|
||||
@@ -138,10 +111,7 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
if (uvm_processor_mask_test(&va_space->ats.registered_gpu_va_spaces, gpu_id))
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
status = uvm_ats_ibm_register_gpu_va_space(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
status = uvm_ats_sva_register_gpu_va_space(gpu_va_space);
|
||||
status = uvm_ats_sva_register_gpu_va_space(gpu_va_space);
|
||||
|
||||
if (status == NV_OK)
|
||||
uvm_processor_mask_set(&va_space->ats.registered_gpu_va_spaces, gpu_id);
|
||||
@@ -159,25 +129,14 @@ void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
if (!gpu_va_space->ats.enabled)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
|
||||
|
||||
va_space = gpu_va_space->va_space;
|
||||
gpu_id = gpu_va_space->gpu->id;
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_unregister_gpu_va_space(gpu_va_space);
|
||||
else if (UVM_ATS_SVA_SUPPORTED())
|
||||
uvm_ats_sva_unregister_gpu_va_space(gpu_va_space);
|
||||
uvm_ats_sva_unregister_gpu_va_space(gpu_va_space);
|
||||
|
||||
uvm_va_space_down_write(va_space);
|
||||
uvm_processor_mask_clear(&va_space->ats.registered_gpu_va_spaces, gpu_id);
|
||||
uvm_va_space_up_write(va_space);
|
||||
}
|
||||
|
||||
void uvm_ats_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
|
||||
{
|
||||
// We can only reach here from the mmu_notifier callbacks and these callbacks
|
||||
// wouldn't have been registered if ATS wasn't enabled.
|
||||
UVM_ASSERT(g_uvm_global.ats.enabled);
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_invalidate(va_space, start, end);
|
||||
}
|
||||
|
||||
@@ -26,12 +26,11 @@
|
||||
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "nv_uvm_types.h"
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_ats_sva.h"
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
|
||||
#define UVM_ATS_SUPPORTED() UVM_ATS_SVA_SUPPORTED()
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -43,12 +42,7 @@ typedef struct
|
||||
// being called in ats_compute_residency_mask().
|
||||
uvm_rw_semaphore_t lock;
|
||||
|
||||
union
|
||||
{
|
||||
uvm_ibm_va_space_t ibm;
|
||||
|
||||
uvm_sva_va_space_t sva;
|
||||
};
|
||||
uvm_sva_va_space_t sva;
|
||||
} uvm_ats_va_space_t;
|
||||
|
||||
typedef struct
|
||||
@@ -61,12 +55,7 @@ typedef struct
|
||||
|
||||
NvU32 pasid;
|
||||
|
||||
union
|
||||
{
|
||||
uvm_ibm_gpu_va_space_t ibm;
|
||||
|
||||
uvm_sva_gpu_va_space_t sva;
|
||||
};
|
||||
uvm_sva_gpu_va_space_t sva;
|
||||
} uvm_ats_gpu_va_space_t;
|
||||
|
||||
// Initializes driver-wide ATS state
|
||||
@@ -74,11 +63,6 @@ typedef struct
|
||||
// LOCKING: None
|
||||
void uvm_ats_init(const UvmPlatformInfo *platform_info);
|
||||
|
||||
// Initializes ATS specific GPU state
|
||||
//
|
||||
// LOCKING: None
|
||||
void uvm_ats_init_va_space(uvm_va_space_t *va_space);
|
||||
|
||||
// Enables ATS feature on the GPU.
|
||||
//
|
||||
// LOCKING: g_uvm_global.global lock mutex must be held.
|
||||
@@ -115,8 +99,6 @@ void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space);
|
||||
//
|
||||
// LOCKING: The VA space lock must be held in write mode.
|
||||
// mm has to be retained prior to calling this function.
|
||||
// current->mm->mmap_lock must be held in write mode iff
|
||||
// UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1.
|
||||
NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// Disables ATS access for the gpu_va_space. Prior to calling this function,
|
||||
@@ -124,19 +106,8 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
// accesses in this GPU VA space, and that no ATS fault handling for this
|
||||
// GPU will be attempted.
|
||||
//
|
||||
// LOCKING: This function may block on mmap_lock and will acquire the VA space
|
||||
// lock, so neither lock must be held.
|
||||
// LOCKING: This function will acquire the VA space lock, so it must not be
|
||||
// held.
|
||||
void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// Synchronously invalidate ATS translations cached by GPU TLBs. The
|
||||
// invalidate applies to all GPUs with active GPU VA spaces in va_space, and
|
||||
// covers all pages touching any part of the given range. end is inclusive.
|
||||
//
|
||||
// GMMU translations in the given range are not guaranteed to be
|
||||
// invalidated.
|
||||
//
|
||||
// LOCKING: No locks are required, but this function may be called with
|
||||
// interrupts disabled.
|
||||
void uvm_ats_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end);
|
||||
|
||||
#endif // __UVM_ATS_H__
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2023 NVIDIA Corporation
|
||||
Copyright (c) 2024-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -58,37 +58,6 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
|
||||
bool is_fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
|
||||
bool is_prefetch_faults = (is_fault_service_type && (access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH));
|
||||
uvm_populate_permissions_t populate_permissions = is_fault_service_type ?
|
||||
(write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY) :
|
||||
UVM_POPULATE_PERMISSIONS_INHERIT;
|
||||
|
||||
|
||||
// Request uvm_migrate_pageable() to touch the corresponding page after
|
||||
// population.
|
||||
// Under virtualization ATS provides two translations:
|
||||
// 1) guest virtual -> guest physical
|
||||
// 2) guest physical -> host physical
|
||||
//
|
||||
// The overall ATS translation will fault if either of those translations is
|
||||
// invalid. The pin_user_pages() call within uvm_migrate_pageable() call
|
||||
// below handles translation #1, but not #2. We don't know if we're running
|
||||
// as a guest, but in case we are we can force that translation to be valid
|
||||
// by touching the guest physical address from the CPU. If the translation
|
||||
// is not valid then the access will cause a hypervisor fault. Note that
|
||||
// dma_map_page() can't establish mappings used by GPU ATS SVA translations.
|
||||
// GPU accesses to host physical addresses obtained as a result of the
|
||||
// address translation request uses the CPU address space instead of the
|
||||
// IOMMU address space since the translated host physical address isn't
|
||||
// necessarily an IOMMU address. The only way to establish guest physical to
|
||||
// host physical mapping in the CPU address space is to touch the page from
|
||||
// the CPU.
|
||||
//
|
||||
// We assume that the hypervisor mappings are all VM_PFNMAP, VM_SHARED, and
|
||||
// VM_WRITE, meaning that the mappings are all granted write access on any
|
||||
// fault and that the kernel will never revoke them.
|
||||
// drivers/vfio/pci/vfio_pci_nvlink2.c enforces this. Thus we can assume
|
||||
// that a read fault is always sufficient to also enable write access on the
|
||||
// guest translation.
|
||||
|
||||
uvm_migrate_args_t uvm_migrate_args =
|
||||
{
|
||||
@@ -98,8 +67,8 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
.dst_node_id = ats_context->residency_node,
|
||||
.start = start,
|
||||
.length = length,
|
||||
.populate_permissions = populate_permissions,
|
||||
.touch = is_fault_service_type,
|
||||
.populate_permissions = UVM_POPULATE_PERMISSIONS_INHERIT,
|
||||
.populate_flags = UVM_POPULATE_PAGEABLE_FLAG_SKIP_PROT_CHECK,
|
||||
.skip_mapped = is_fault_service_type,
|
||||
.populate_on_cpu_alloc_failures = is_fault_service_type,
|
||||
.populate_on_migrate_vma_failures = is_fault_service_type,
|
||||
@@ -115,6 +84,13 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
.fail_on_unresolved_sto_errors = !is_fault_service_type || is_prefetch_faults,
|
||||
};
|
||||
|
||||
if (is_fault_service_type) {
|
||||
uvm_migrate_args.populate_permissions = (write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY);
|
||||
|
||||
// If we're faulting, let the GPU access special vmas
|
||||
uvm_migrate_args.populate_flags |= UVM_POPULATE_PAGEABLE_FLAG_ALLOW_SPECIAL;
|
||||
}
|
||||
|
||||
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
|
||||
|
||||
// We are trying to use migrate_vma API in the kernel (if it exists) to
|
||||
@@ -139,9 +115,9 @@ static void flush_tlb_va_region(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate;
|
||||
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.replayable.ats_invalidate;
|
||||
else
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.non_replayable.ats_invalidate;
|
||||
|
||||
if (!ats_invalidate->tlb_batch_pending) {
|
||||
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->tlb_batch);
|
||||
@@ -533,8 +509,20 @@ static NV_STATUS uvm_ats_service_faults_region(uvm_gpu_va_space_t *gpu_va_space,
|
||||
access_type,
|
||||
UVM_ATS_SERVICE_TYPE_FAULTS,
|
||||
ats_context);
|
||||
if (status != NV_OK)
|
||||
if (status != NV_OK) {
|
||||
// This condition can occur if we unexpectedly fault on a vma that
|
||||
// doesn't support faulting (or at least doesn't support
|
||||
// pin_user_pages). This may be an incorrect mapping setup from the
|
||||
// vma's owning driver, a hardware bug, or just that the owning driver
|
||||
// didn't expect a device fault. Either way, we don't want to consider
|
||||
// this a global error so don't propagate it, but also don't indicate
|
||||
// that the faults were serviced. That way the caller knows to cancel
|
||||
// them precisely.
|
||||
if (status == NV_ERR_INVALID_ADDRESS)
|
||||
return NV_OK;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, region);
|
||||
|
||||
@@ -689,12 +677,14 @@ bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_v
|
||||
if (next->node.start <= gmmu_region_base + UVM_GMMU_ATS_GRANULARITY - 1)
|
||||
return true;
|
||||
|
||||
prev = uvm_va_range_container(uvm_range_tree_prev(&va_space->va_range_tree, &next->node));
|
||||
prev = uvm_va_range_gmmu_mappable_prev(next);
|
||||
}
|
||||
else {
|
||||
// No VA range exists after address, so check the last VA range in the
|
||||
// tree.
|
||||
prev = uvm_va_range_container(uvm_range_tree_last(&va_space->va_range_tree));
|
||||
while (prev && !uvm_va_range_is_gmmu_mappable(prev))
|
||||
prev = uvm_va_range_gmmu_mappable_prev(prev);
|
||||
}
|
||||
|
||||
return prev && (prev->node.end >= gmmu_region_base);
|
||||
|
||||
@@ -1,715 +0,0 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2019 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_api.h"
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_va_space_mm.h"
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "uvm_common.h"
|
||||
|
||||
#include <linux/pci.h>
|
||||
|
||||
#if UVM_IBM_NPU_SUPPORTED()
|
||||
|
||||
#include <linux/of.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <asm/pci-bridge.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/mmu_context.h>
|
||||
|
||||
#define NPU_ATSD_REG_MAP_SIZE 32
|
||||
|
||||
// There are three 8-byte registers in each ATSD mapping:
|
||||
#define NPU_ATSD_REG_LAUNCH 0
|
||||
#define NPU_ATSD_REG_AVA 1
|
||||
#define NPU_ATSD_REG_STAT 2
|
||||
|
||||
// Fields within the NPU_ATSD_REG_LAUNCH register:
|
||||
|
||||
// "PRS" (process-scoped) bit. 1 means to limit invalidates to the specified
|
||||
// PASID.
|
||||
#define NPU_ATSD_REG_LAUNCH_PASID_ENABLE 13
|
||||
|
||||
// "PID" field. This specifies the PASID target of the invalidate.
|
||||
#define NPU_ATSD_REG_LAUNCH_PASID_VAL 38
|
||||
|
||||
// "IS" bit. 0 means the specified virtual address range will be invalidated. 1
|
||||
// means all entries will be invalidated.
|
||||
#define NPU_ATSD_REG_LAUNCH_INVAL_ALL 12
|
||||
|
||||
// "AP" field. This encodes the size of a range-based invalidate.
|
||||
#define NPU_ATSD_REG_LAUNCH_INVAL_SIZE 17
|
||||
|
||||
// "No flush" bit. 0 will trigger a flush (membar) from the GPU following the
|
||||
// invalidate, 1 will not.
|
||||
#define NPU_ATSD_REG_LAUNCH_FLUSH_DISABLE 39
|
||||
|
||||
// Helper to iterate over the active NPUs in the given VA space (all NPUs with
|
||||
// GPUs that have GPU VA spaces registered in this VA space).
|
||||
#define for_each_npu_index_in_va_space(npu_index, va_space) \
|
||||
for (({uvm_assert_rwlock_locked(&(va_space)->ats.ibm.rwlock); \
|
||||
(npu_index) = find_first_bit((va_space)->ats.ibm.npu_active_mask, NV_MAX_NPUS);}); \
|
||||
(npu_index) < NV_MAX_NPUS; \
|
||||
(npu_index) = find_next_bit((va_space)->ats.ibm.npu_active_mask, NV_MAX_NPUS, (npu_index) + 1))
|
||||
|
||||
// An invalidate requires operating on one set of registers in each NPU. This
|
||||
// struct tracks which register set (id) is in use per NPU for a given
|
||||
// operation.
|
||||
typedef struct
|
||||
{
|
||||
NvU8 ids[NV_MAX_NPUS];
|
||||
} uvm_atsd_regs_t;
|
||||
|
||||
// Get the index of the input npu pointer within UVM's global npus array
|
||||
static size_t uvm_ibm_npu_index(uvm_ibm_npu_t *npu)
|
||||
{
|
||||
size_t npu_index = npu - &g_uvm_global.npus[0];
|
||||
UVM_ASSERT(npu_index < ARRAY_SIZE(g_uvm_global.npus));
|
||||
return npu_index;
|
||||
}
|
||||
|
||||
// Find an existing NPU matching pci_domain, or return an empty NPU slot if none
|
||||
// is found. Returns NULL if no slots are available.
|
||||
static uvm_ibm_npu_t *uvm_ibm_npu_find(int pci_domain)
|
||||
{
|
||||
size_t i;
|
||||
uvm_ibm_npu_t *npu, *first_free = NULL;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(g_uvm_global.npus); i++) {
|
||||
npu = &g_uvm_global.npus[i];
|
||||
if (npu->num_retained_gpus == 0) {
|
||||
if (!first_free)
|
||||
first_free = npu;
|
||||
}
|
||||
else if (npu->pci_domain == pci_domain) {
|
||||
return npu;
|
||||
}
|
||||
}
|
||||
|
||||
return first_free;
|
||||
}
|
||||
|
||||
static void uvm_ibm_npu_destroy(uvm_ibm_npu_t *npu)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
UVM_ASSERT(npu->num_retained_gpus == 0);
|
||||
UVM_ASSERT(bitmap_empty(npu->atsd_regs.locks, UVM_MAX_ATSD_REGS));
|
||||
|
||||
for (i = 0; i < npu->atsd_regs.count; i++) {
|
||||
UVM_ASSERT(npu->atsd_regs.io_addrs[i]);
|
||||
iounmap(npu->atsd_regs.io_addrs[i]);
|
||||
}
|
||||
|
||||
memset(npu, 0, sizeof(*npu));
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_ibm_npu_init(uvm_ibm_npu_t *npu, struct pci_dev *npu_dev)
|
||||
{
|
||||
struct pci_controller *hose;
|
||||
size_t i, reg_count, reg_size = sizeof(npu->atsd_regs.io_addrs[0]);
|
||||
int ret;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
UVM_ASSERT(npu->num_retained_gpus == 0);
|
||||
UVM_ASSERT(bitmap_empty(npu->atsd_regs.locks, UVM_MAX_ATSD_REGS));
|
||||
|
||||
npu->pci_domain = pci_domain_nr(npu_dev->bus);
|
||||
|
||||
if (!UVM_ATS_IBM_SUPPORTED_IN_DRIVER())
|
||||
return NV_OK;
|
||||
|
||||
hose = pci_bus_to_host(npu_dev->bus);
|
||||
|
||||
ret = of_property_count_elems_of_size(hose->dn, "ibm,mmio-atsd", reg_size);
|
||||
if (ret < 0) {
|
||||
UVM_ERR_PRINT("Failed to query NPU %d ATSD register count: %d\n", npu->pci_domain, ret);
|
||||
return errno_to_nv_status(ret);
|
||||
}
|
||||
|
||||
// For ATS to be enabled globally, we must have NPU ATSD registers
|
||||
reg_count = ret;
|
||||
if (reg_count == 0 || reg_count > UVM_MAX_ATSD_REGS) {
|
||||
UVM_ERR_PRINT("NPU %d has invalid ATSD register count: %zu\n", npu->pci_domain, reg_count);
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
// Map the ATSD registers
|
||||
for (i = 0; i < reg_count; i++) {
|
||||
u64 phys_addr;
|
||||
__be64 __iomem *io_addr;
|
||||
ret = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", i, &phys_addr);
|
||||
UVM_ASSERT(ret == 0);
|
||||
|
||||
io_addr = ioremap(phys_addr, NPU_ATSD_REG_MAP_SIZE);
|
||||
if (!io_addr) {
|
||||
uvm_ibm_npu_destroy(npu);
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
npu->atsd_regs.io_addrs[npu->atsd_regs.count++] = io_addr;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
struct pci_dev *npu_dev = pnv_pci_get_npu_dev(parent_gpu->pci_dev, 0);
|
||||
uvm_ibm_npu_t *npu;
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
if (!npu_dev)
|
||||
return NV_OK;
|
||||
|
||||
npu = uvm_ibm_npu_find(pci_domain_nr(npu_dev->bus));
|
||||
if (!npu) {
|
||||
// If this happens then we can't support the system configuation until
|
||||
// NV_MAX_NPUS is updated. Return the same error as when the number of
|
||||
// GPUs exceeds UVM_MAX_GPUS.
|
||||
UVM_ERR_PRINT("No more NPU slots available, update NV_MAX_NPUS\n");
|
||||
return NV_ERR_INSUFFICIENT_RESOURCES;
|
||||
}
|
||||
|
||||
if (npu->num_retained_gpus == 0) {
|
||||
status = uvm_ibm_npu_init(npu, npu_dev);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
// This npu field could be read concurrently by a thread in the ATSD
|
||||
// invalidate path. We don't need to provide ordering with those threads
|
||||
// because those invalidates won't apply to the GPU being added until a GPU
|
||||
// VA space on this GPU is registered.
|
||||
npu->atsd_regs.num_membars = max(npu->atsd_regs.num_membars, parent_gpu->num_hshub_tlb_invalidate_membars);
|
||||
|
||||
parent_gpu->npu = npu;
|
||||
++npu->num_retained_gpus;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_ibm_npu_t *npu = parent_gpu->npu;
|
||||
uvm_parent_gpu_t *other_parent_gpu;
|
||||
NvU32 num_membars_new = 0;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
if (!npu)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(npu->num_retained_gpus > 0);
|
||||
if (--npu->num_retained_gpus == 0) {
|
||||
uvm_ibm_npu_destroy(npu);
|
||||
}
|
||||
else {
|
||||
// Re-calculate the membar count
|
||||
for_each_parent_gpu(other_parent_gpu) {
|
||||
// The current GPU being removed should've already been removed from
|
||||
// the global list.
|
||||
UVM_ASSERT(other_parent_gpu != parent_gpu);
|
||||
if (other_parent_gpu->npu == npu)
|
||||
num_membars_new = max(num_membars_new, other_parent_gpu->num_hshub_tlb_invalidate_membars);
|
||||
}
|
||||
|
||||
UVM_ASSERT(num_membars_new > 0);
|
||||
npu->atsd_regs.num_membars = num_membars_new;
|
||||
}
|
||||
}
|
||||
|
||||
#if UVM_ATS_IBM_SUPPORTED()
|
||||
|
||||
void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_ibm_va_space_t *ibm_va_space;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
ibm_va_space = &va_space->ats.ibm;
|
||||
|
||||
uvm_rwlock_irqsave_init(&ibm_va_space->rwlock, UVM_LOCK_ORDER_LEAF);
|
||||
}
|
||||
|
||||
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
|
||||
static void npu_release_dummy(struct npu_context *npu_context, void *va_mm)
|
||||
{
|
||||
// See the comment on the call to pnv_npu2_init_context()
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_ats_ibm_register_gpu_va_space_kernel(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
|
||||
struct npu_context *npu_context;
|
||||
|
||||
// pnv_npu2_init_context() registers current->mm with
|
||||
// mmu_notifier_register(). We need that to match the mm we passed to our
|
||||
// own mmu_notifier_register() for this VA space.
|
||||
if (current->mm != va_space->va_space_mm.mm)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
uvm_assert_mmap_lock_locked_write(current->mm);
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
|
||||
// pnv_npu2_init_context() doesn't handle being called multiple times for
|
||||
// the same GPU under the same mm, which could happen if multiple VA spaces
|
||||
// are created in this process. To handle that we pass the VA space pointer
|
||||
// as the callback parameter: the callback values are shared by all devices
|
||||
// under this mm, so pnv_npu2_init_context() enforces that the values match
|
||||
// the ones already registered to the mm.
|
||||
//
|
||||
// Otherwise we don't use the callback, since we have our own callback
|
||||
// registered under the va_space_mm that will be called at the same point
|
||||
// (mmu_notifier release).
|
||||
npu_context = pnv_npu2_init_context(gpu_va_space->gpu->parent->pci_dev,
|
||||
(MSR_DR | MSR_PR | MSR_HV),
|
||||
npu_release_dummy,
|
||||
va_space);
|
||||
if (IS_ERR(npu_context)) {
|
||||
int err = PTR_ERR(npu_context);
|
||||
|
||||
// We'll get -EINVAL if the callback value (va_space) differs from the
|
||||
// one already registered to the npu_context associated with this mm.
|
||||
// That can only happen when multiple VA spaces attempt registration
|
||||
// within the same process, which is disallowed and should return
|
||||
// NV_ERR_NOT_SUPPORTED.
|
||||
if (err == -EINVAL)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
return errno_to_nv_status(err);
|
||||
}
|
||||
|
||||
ibm_gpu_va_space->npu_context = npu_context;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_unregister_gpu_va_space_kernel(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
uvm_gpu_va_space_state_t state;
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_ibm_va_space_t *ibm_va_space;
|
||||
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
|
||||
|
||||
if (!ibm_gpu_va_space->npu_context)
|
||||
return;
|
||||
|
||||
// va_space is guaranteed to not be NULL if ibm_gpu_va_space->npu_context is
|
||||
// not NULL.
|
||||
UVM_ASSERT(va_space);
|
||||
|
||||
state = uvm_gpu_va_space_state(gpu_va_space);
|
||||
UVM_ASSERT(state == UVM_GPU_VA_SPACE_STATE_INIT || state == UVM_GPU_VA_SPACE_STATE_DEAD);
|
||||
|
||||
ibm_va_space = &va_space->ats.ibm;
|
||||
|
||||
// pnv_npu2_destroy_context() may in turn call mmu_notifier_unregister().
|
||||
// If uvm_va_space_mm_shutdown() is concurrently executing in another
|
||||
// thread, mmu_notifier_unregister() will wait for
|
||||
// uvm_va_space_mm_shutdown() to finish. uvm_va_space_mm_shutdown() takes
|
||||
// mmap_lock and the VA space lock, so we can't be holding those locks on
|
||||
// this path.
|
||||
uvm_assert_unlocked_order(UVM_LOCK_ORDER_MMAP_LOCK);
|
||||
uvm_assert_unlocked_order(UVM_LOCK_ORDER_VA_SPACE);
|
||||
|
||||
pnv_npu2_destroy_context(ibm_gpu_va_space->npu_context, gpu_va_space->gpu->parent->pci_dev);
|
||||
ibm_gpu_va_space->npu_context = NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void uvm_ats_ibm_register_gpu_va_space_driver(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
size_t npu_index = uvm_ibm_npu_index(gpu->parent->npu);
|
||||
uvm_ibm_va_space_t *ibm_va_space;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
ibm_va_space = &va_space->ats.ibm;
|
||||
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
|
||||
uvm_write_lock_irqsave(&ibm_va_space->rwlock);
|
||||
|
||||
// If this is the first GPU VA space to use this NPU in the VA space, mark
|
||||
// the NPU as active so invalidates are issued to it.
|
||||
if (ibm_va_space->npu_ref_counts[npu_index] == 0) {
|
||||
// If this is the first active NPU in the entire VA space, we have to
|
||||
// tell the kernel to send TLB invalidations to the IOMMU. See kernel
|
||||
// commit 03b8abedf4f4965e7e9e0d4f92877c42c07ce19f for background.
|
||||
//
|
||||
// This is safe to do without holding mm_users high or mmap_lock.
|
||||
if (bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS))
|
||||
mm_context_add_copro(va_space->va_space_mm.mm);
|
||||
|
||||
UVM_ASSERT(!test_bit(npu_index, ibm_va_space->npu_active_mask));
|
||||
__set_bit(npu_index, ibm_va_space->npu_active_mask);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(test_bit(npu_index, ibm_va_space->npu_active_mask));
|
||||
}
|
||||
|
||||
++ibm_va_space->npu_ref_counts[npu_index];
|
||||
|
||||
// As soon as this lock is dropped, invalidates on this VA space's mm may
|
||||
// begin issuing ATSDs to this NPU.
|
||||
uvm_write_unlock_irqrestore(&ibm_va_space->rwlock);
|
||||
|
||||
ibm_gpu_va_space->did_ibm_driver_init = true;
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_unregister_gpu_va_space_driver(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
size_t npu_index = uvm_ibm_npu_index(gpu->parent->npu);
|
||||
bool do_remove = false;
|
||||
uvm_ibm_va_space_t *ibm_va_space;
|
||||
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
|
||||
|
||||
if (!ibm_gpu_va_space->did_ibm_driver_init)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
ibm_va_space = &va_space->ats.ibm;
|
||||
|
||||
// Note that we aren't holding the VA space lock here, so another thread
|
||||
// could be in uvm_ats_ibm_register_gpu_va_space() for this same GPU right
|
||||
// now. The write lock and ref counts below will handle that case.
|
||||
|
||||
// Once we return from this function with a bit cleared in the
|
||||
// npu_active_mask, we have to guarantee that this VA space no longer
|
||||
// accesses that NPU's ATSD registers. This is needed in case GPU unregister
|
||||
// needs to unmap those registers. We use the reader/writer lock to
|
||||
// guarantee this, which means that invalidations must not access the ATSD
|
||||
// registers outside of the lock.
|
||||
//
|
||||
// Future work: if we could synchronize_srcu() on the mmu_notifier SRCU we
|
||||
// might do that here instead to flush out all invalidates. That would allow
|
||||
// us to avoid taking a read lock in the invalidate path, though we'd have
|
||||
// to be careful when clearing the mask bit relative to the synchronize, and
|
||||
// we'd have to be careful in cases where this thread doesn't hold a
|
||||
// reference to mm_users.
|
||||
uvm_write_lock_irqsave(&ibm_va_space->rwlock);
|
||||
|
||||
UVM_ASSERT(ibm_va_space->npu_ref_counts[npu_index] > 0);
|
||||
UVM_ASSERT(test_bit(npu_index, ibm_va_space->npu_active_mask));
|
||||
|
||||
--ibm_va_space->npu_ref_counts[npu_index];
|
||||
if (ibm_va_space->npu_ref_counts[npu_index] == 0) {
|
||||
__clear_bit(npu_index, ibm_va_space->npu_active_mask);
|
||||
if (bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS))
|
||||
do_remove = true;
|
||||
}
|
||||
|
||||
uvm_write_unlock_irqrestore(&ibm_va_space->rwlock);
|
||||
|
||||
if (do_remove) {
|
||||
// mm_context_remove_copro() must be called outside of the spinlock
|
||||
// because it may issue invalidates across CPUs in this mm. The
|
||||
// coprocessor count is atomically refcounted by that function, so it's
|
||||
// safe to call here even if another thread jumps in with a register and
|
||||
// calls mm_context_add_copro() between this thread's unlock and this
|
||||
// call.
|
||||
UVM_ASSERT(va_space->va_space_mm.mm);
|
||||
mm_context_remove_copro(va_space->va_space_mm.mm);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
|
||||
|
||||
static mm_context_id_t va_space_pasid(uvm_va_space_t *va_space)
|
||||
{
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
UVM_ASSERT(mm);
|
||||
return mm->context.id;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
UVM_ASSERT(gpu_va_space->ats.enabled);
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_INIT);
|
||||
UVM_ASSERT(va_space->va_space_mm.mm);
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
|
||||
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
|
||||
status = uvm_ats_ibm_register_gpu_va_space_kernel(gpu_va_space);
|
||||
#else
|
||||
uvm_ats_ibm_register_gpu_va_space_driver(gpu_va_space);
|
||||
#endif
|
||||
|
||||
gpu_va_space->ats.pasid = (NvU32) va_space_pasid(gpu_va_space->va_space);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
|
||||
uvm_ats_ibm_unregister_gpu_va_space_kernel(gpu_va_space);
|
||||
#else
|
||||
uvm_ats_ibm_unregister_gpu_va_space_driver(gpu_va_space);
|
||||
#endif
|
||||
|
||||
gpu_va_space->ats.pasid = -1U;
|
||||
}
|
||||
|
||||
#if UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
|
||||
|
||||
// Find any available ATSD register set in this NPU and return that index. This
|
||||
// will busy wait until a register set is free.
|
||||
static NvU8 atsd_reg_acquire(uvm_ibm_npu_t *npu)
|
||||
{
|
||||
uvm_spin_loop_t spin;
|
||||
size_t i;
|
||||
bool first = true;
|
||||
|
||||
while (1) {
|
||||
// Using for_each_clear_bit is racy, since the bits could change at any
|
||||
// point. That's ok since we'll either just retry or use a real atomic
|
||||
// to lock the bit. Checking for clear bits first avoids spamming
|
||||
// atomics in the contended case.
|
||||
for_each_clear_bit(i, npu->atsd_regs.locks, npu->atsd_regs.count) {
|
||||
if (!test_and_set_bit_lock(i, npu->atsd_regs.locks))
|
||||
return (NvU8)i;
|
||||
}
|
||||
|
||||
// Back off and try again, avoiding the overhead of initializing the
|
||||
// tracking timers unless we need them.
|
||||
if (first) {
|
||||
uvm_spin_loop_init(&spin);
|
||||
first = false;
|
||||
}
|
||||
else {
|
||||
UVM_SPIN_LOOP(&spin);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void atsd_reg_release(uvm_ibm_npu_t *npu, NvU8 reg)
|
||||
{
|
||||
UVM_ASSERT(reg < npu->atsd_regs.count);
|
||||
UVM_ASSERT(test_bit(reg, npu->atsd_regs.locks));
|
||||
clear_bit_unlock(reg, npu->atsd_regs.locks);
|
||||
}
|
||||
|
||||
static __be64 atsd_reg_read(uvm_ibm_npu_t *npu, NvU8 reg, size_t offset)
|
||||
{
|
||||
__be64 __iomem *io_addr = npu->atsd_regs.io_addrs[reg] + offset;
|
||||
UVM_ASSERT(reg < npu->atsd_regs.count);
|
||||
return __raw_readq(io_addr);
|
||||
}
|
||||
|
||||
static void atsd_reg_write(uvm_ibm_npu_t *npu, NvU8 reg, size_t offset, NvU64 val)
|
||||
{
|
||||
__be64 __iomem *io_addr = npu->atsd_regs.io_addrs[reg] + offset;
|
||||
UVM_ASSERT(reg < npu->atsd_regs.count);
|
||||
__raw_writeq_be(val, io_addr);
|
||||
}
|
||||
|
||||
// Acquire a set of registers in each NPU which is active in va_space
|
||||
static void atsd_regs_acquire(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
|
||||
{
|
||||
size_t i;
|
||||
for_each_npu_index_in_va_space(i, va_space)
|
||||
regs->ids[i] = atsd_reg_acquire(&g_uvm_global.npus[i]);
|
||||
}
|
||||
|
||||
static void atsd_regs_release(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
|
||||
{
|
||||
size_t i;
|
||||
for_each_npu_index_in_va_space(i, va_space)
|
||||
atsd_reg_release(&g_uvm_global.npus[i], regs->ids[i]);
|
||||
}
|
||||
|
||||
// Write the provided value to each NPU active in va_space at the provided
|
||||
// register offset.
|
||||
static void atsd_regs_write(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, size_t offset, NvU64 val)
|
||||
{
|
||||
size_t i;
|
||||
for_each_npu_index_in_va_space(i, va_space)
|
||||
atsd_reg_write(&g_uvm_global.npus[i], regs->ids[i], offset, val);
|
||||
}
|
||||
|
||||
// Wait for all prior operations issued to active NPUs in va_space on the given
|
||||
// registers to finish.
|
||||
static void atsd_regs_wait(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
|
||||
{
|
||||
uvm_spin_loop_t spin;
|
||||
size_t i;
|
||||
for_each_npu_index_in_va_space(i, va_space) {
|
||||
UVM_SPIN_WHILE(atsd_reg_read(&g_uvm_global.npus[i], regs->ids[i], NPU_ATSD_REG_STAT), &spin)
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
// Encode an invalidate targeting the given pasid and the given size for the
|
||||
// NPU_ATSD_REG_LAUNCH register. The target address is encoded separately.
|
||||
//
|
||||
// psize must be one of the MMU_PAGE_* values defined in powerpc's asm/mmu.h. A
|
||||
// psize of MMU_PAGE_COUNT means to invalidate the entire address space.
|
||||
static NvU64 atsd_get_launch_val(mm_context_id_t pasid, int psize)
|
||||
{
|
||||
NvU64 val = 0;
|
||||
|
||||
val |= PPC_BIT(NPU_ATSD_REG_LAUNCH_PASID_ENABLE);
|
||||
val |= pasid << PPC_BITLSHIFT(NPU_ATSD_REG_LAUNCH_PASID_VAL);
|
||||
|
||||
if (psize == MMU_PAGE_COUNT) {
|
||||
val |= PPC_BIT(NPU_ATSD_REG_LAUNCH_INVAL_ALL);
|
||||
}
|
||||
else {
|
||||
// The NPU registers do not support arbitrary sizes
|
||||
UVM_ASSERT(psize == MMU_PAGE_64K || psize == MMU_PAGE_2M || psize == MMU_PAGE_1G);
|
||||
val |= (NvU64)mmu_get_ap(psize) << PPC_BITLSHIFT(NPU_ATSD_REG_LAUNCH_INVAL_SIZE);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
// Return the encoded size to use for an ATSD targeting the given range, in one
|
||||
// of the MMU_PAGE_* values defined in powerpc's asm/mmu.h. A return value of
|
||||
// MMU_PAGE_COUNT means the entire address space must be invalidated.
|
||||
//
|
||||
// start is an in/out parameter. On return start will be set to the aligned
|
||||
// starting address to use for the ATSD. end is inclusive.
|
||||
static int atsd_calc_size(NvU64 *start, NvU64 end)
|
||||
{
|
||||
// ATSDs have high latency, so we prefer to over-invalidate rather than
|
||||
// issue multiple precise invalidates. Supported sizes are only 64K, 2M, and
|
||||
// 1G.
|
||||
|
||||
*start = UVM_ALIGN_DOWN(*start, SZ_64K);
|
||||
end = UVM_ALIGN_DOWN(end, SZ_64K);
|
||||
if (*start == end)
|
||||
return MMU_PAGE_64K;
|
||||
|
||||
*start = UVM_ALIGN_DOWN(*start, SZ_2M);
|
||||
end = UVM_ALIGN_DOWN(end, SZ_2M);
|
||||
if (*start == end)
|
||||
return MMU_PAGE_2M;
|
||||
|
||||
*start = UVM_ALIGN_DOWN(*start, SZ_1G);
|
||||
end = UVM_ALIGN_DOWN(end, SZ_1G);
|
||||
if (*start == end)
|
||||
return MMU_PAGE_1G;
|
||||
|
||||
return MMU_PAGE_COUNT;
|
||||
}
|
||||
|
||||
// Issue an ATSD to all NPUs and wait for completion
|
||||
static void atsd_launch_wait(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, NvU64 val)
|
||||
{
|
||||
atsd_regs_write(va_space, regs, NPU_ATSD_REG_LAUNCH, val);
|
||||
atsd_regs_wait(va_space, regs);
|
||||
}
|
||||
|
||||
// Issue and wait for the required membars following an invalidate
|
||||
static void atsd_issue_membars(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
|
||||
{
|
||||
size_t i;
|
||||
NvU32 num_membars = 0;
|
||||
|
||||
// These membars are issued using ATSDs which target a reserved PASID of 0.
|
||||
// That PASID is valid on the GPU in order for the membar to be valid, but
|
||||
// 0 will never be used by the kernel for an actual address space so the
|
||||
// ATSD won't actually invalidate any entries.
|
||||
NvU64 val = atsd_get_launch_val(0, MMU_PAGE_COUNT);
|
||||
|
||||
for_each_npu_index_in_va_space(i, va_space) {
|
||||
uvm_ibm_npu_t *npu = &g_uvm_global.npus[i];
|
||||
num_membars = max(num_membars, npu->atsd_regs.num_membars);
|
||||
}
|
||||
|
||||
for (i = 0; i < num_membars; i++)
|
||||
atsd_launch_wait(va_space, regs, val);
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_invalidate_all(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
|
||||
{
|
||||
NvU64 val = atsd_get_launch_val(va_space_pasid(va_space), MMU_PAGE_COUNT);
|
||||
atsd_launch_wait(va_space, regs, val);
|
||||
atsd_issue_membars(va_space, regs);
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_invalidate_range(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, NvU64 start, int psize)
|
||||
{
|
||||
NvU64 val = atsd_get_launch_val(va_space_pasid(va_space), psize);
|
||||
|
||||
// Barriers are expensive, so write all address registers first then do a
|
||||
// single barrier for all of them.
|
||||
atsd_regs_write(va_space, regs, NPU_ATSD_REG_AVA, start);
|
||||
eieio();
|
||||
atsd_launch_wait(va_space, regs, val);
|
||||
atsd_issue_membars(va_space, regs);
|
||||
}
|
||||
|
||||
#endif // UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
|
||||
|
||||
void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
|
||||
{
|
||||
#if UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
|
||||
unsigned long irq_flags;
|
||||
uvm_atsd_regs_t regs;
|
||||
NvU64 atsd_start = start;
|
||||
int psize = atsd_calc_size(&atsd_start, end);
|
||||
uvm_ibm_va_space_t *ibm_va_space = &va_space->ats.ibm;
|
||||
|
||||
BUILD_BUG_ON(order_base_2(UVM_MAX_ATSD_REGS) > 8*sizeof(regs.ids[0]));
|
||||
|
||||
// We must hold this lock in at least read mode when accessing NPU
|
||||
// registers. See the comment in uvm_ats_ibm_unregister_gpu_va_space_driver.
|
||||
uvm_read_lock_irqsave(&ibm_va_space->rwlock, irq_flags);
|
||||
|
||||
if (!bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS)) {
|
||||
atsd_regs_acquire(va_space, ®s);
|
||||
|
||||
if (psize == MMU_PAGE_COUNT)
|
||||
uvm_ats_ibm_invalidate_all(va_space, ®s);
|
||||
else
|
||||
uvm_ats_ibm_invalidate_range(va_space, ®s, atsd_start, psize);
|
||||
|
||||
atsd_regs_release(va_space, ®s);
|
||||
}
|
||||
|
||||
uvm_read_unlock_irqrestore(&ibm_va_space->rwlock, irq_flags);
|
||||
#else
|
||||
UVM_ASSERT_MSG(0, "This function should not be called on this kernel version\n");
|
||||
#endif // UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
|
||||
}
|
||||
|
||||
#endif // UVM_ATS_IBM_SUPPORTED
|
||||
#endif // UVM_IBM_NPU_SUPPORTED
|
||||
@@ -1,266 +0,0 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2019 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef __UVM_ATS_IBM_H__
|
||||
#define __UVM_ATS_IBM_H__
|
||||
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_hal_types.h"
|
||||
|
||||
#if defined(NVCPU_PPC64LE) && defined(NV_PNV_PCI_GET_NPU_DEV_PRESENT)
|
||||
#include <asm/mmu.h>
|
||||
#if defined(NV_MAX_NPUS)
|
||||
#define UVM_IBM_NPU_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_IBM_NPU_SUPPORTED() 0
|
||||
#endif
|
||||
#else
|
||||
#define UVM_IBM_NPU_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
#if defined(NV_ASM_OPAL_API_H_PRESENT)
|
||||
// For OPAL_NPU_INIT_CONTEXT
|
||||
#include <asm/opal-api.h>
|
||||
#endif
|
||||
|
||||
// Timeline of kernel changes:
|
||||
//
|
||||
// 0) Before 1ab66d1fbadad86b1f4a9c7857e193af0ee0022c
|
||||
// - No NPU-ATS code existed, nor did the OPAL_NPU_INIT_CONTEXT firmware
|
||||
// call.
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Not defined
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Not defined
|
||||
// - OPAL_NPU_INIT_CONTEXT Not defined
|
||||
// - ATS support type None
|
||||
//
|
||||
// 1) NPU ATS code added: 1ab66d1fbadad86b1f4a9c7857e193af0ee0022c, v4.12
|
||||
// (2017-04-03)
|
||||
// - This commit added initial support for NPU ATS, including the necessary
|
||||
// OPAL firmware calls. This support was developmental and required
|
||||
// several bug fixes before it could be used in production.
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Defined
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Not defined
|
||||
// - OPAL_NPU_INIT_CONTEXT Defined
|
||||
// - ATS support type None
|
||||
//
|
||||
// 2) NPU ATS code fixed: a1409adac748f0db655e096521bbe6904aadeb98, v4.17
|
||||
// (2018-04-11)
|
||||
// - This commit changed the function signature for pnv_npu2_init_context's
|
||||
// callback parameter. Since all required bug fixes went in prior to this
|
||||
// change, we can use the callback signature as a flag to indicate
|
||||
// whether the PPC arch layer in the kernel supports ATS in production.
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Defined
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Defined
|
||||
// - OPAL_NPU_INIT_CONTEXT Defined
|
||||
// - ATS support type Kernel
|
||||
//
|
||||
// 3) NPU ATS code removed: 7eb3cf761927b2687164e182efa675e6c09cfe44, v5.3
|
||||
// (2019-06-25)
|
||||
// - This commit removed NPU-ATS support from the PPC arch layer, so the
|
||||
// driver needs to handle things instead. pnv_npu2_init_context is no
|
||||
// longer present, so we use OPAL_NPU_INIT_CONTEXT to differentiate
|
||||
// between this state and scenario #0.
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Not defined
|
||||
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Not defined
|
||||
// - OPAL_NPU_INIT_CONTEXT Defined
|
||||
// - ATS support type Driver
|
||||
//
|
||||
#if defined(NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID)
|
||||
#define UVM_ATS_IBM_SUPPORTED_IN_KERNEL() 1
|
||||
#define UVM_ATS_IBM_SUPPORTED_IN_DRIVER() 0
|
||||
#elif !defined(NV_PNV_NPU2_INIT_CONTEXT_PRESENT) && defined(OPAL_NPU_INIT_CONTEXT) && UVM_CAN_USE_MMU_NOTIFIERS()
|
||||
#define UVM_ATS_IBM_SUPPORTED_IN_KERNEL() 0
|
||||
#define UVM_ATS_IBM_SUPPORTED_IN_DRIVER() 1
|
||||
#else
|
||||
#define UVM_ATS_IBM_SUPPORTED_IN_KERNEL() 0
|
||||
#define UVM_ATS_IBM_SUPPORTED_IN_DRIVER() 0
|
||||
#endif
|
||||
|
||||
#define UVM_ATS_IBM_SUPPORTED() (UVM_ATS_IBM_SUPPORTED_IN_KERNEL() || UVM_ATS_IBM_SUPPORTED_IN_DRIVER())
|
||||
|
||||
// Maximum number of parallel ATSD register sets per NPU
|
||||
#define UVM_MAX_ATSD_REGS 16
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#if UVM_IBM_NPU_SUPPORTED()
|
||||
// These are the active NPUs in this VA space, that is, all NPUs with
|
||||
// GPUs that have GPU VA spaces registered in this VA space.
|
||||
//
|
||||
// If a bit is clear in npu_active_mask then the corresponding entry of
|
||||
// npu_ref_counts is 0. If a bit is set then the corresponding entry of
|
||||
// npu_ref_counts is greater than 0.
|
||||
NvU32 npu_ref_counts[NV_MAX_NPUS];
|
||||
DECLARE_BITMAP(npu_active_mask, NV_MAX_NPUS);
|
||||
#endif
|
||||
|
||||
// Lock protecting npu_ref_counts and npu_active_mask. Invalidations
|
||||
// take this lock for read. GPU VA space register and unregister take
|
||||
// this lock for write. Since all invalidations take the lock for read
|
||||
// for the duration of the invalidate, taking the lock for write also
|
||||
// flushes all invalidates.
|
||||
//
|
||||
// This is a spinlock because the invalidation code paths may be called
|
||||
// with interrupts disabled, so those paths can't take the VA space
|
||||
// lock. We could use a normal exclusive spinlock instead, but a reader/
|
||||
// writer lock is preferred to allow concurrent invalidates in the same
|
||||
// VA space.
|
||||
uvm_rwlock_irqsave_t rwlock;
|
||||
} uvm_ibm_va_space_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
|
||||
struct npu_context *npu_context;
|
||||
#endif
|
||||
|
||||
// Used on the teardown path to know what to clean up. npu_context acts
|
||||
// as the equivalent flag for kernel-provided support.
|
||||
bool did_ibm_driver_init;
|
||||
} uvm_ibm_gpu_va_space_t;
|
||||
|
||||
struct uvm_ibm_npu_struct
|
||||
{
|
||||
// Number of retained GPUs under this NPU. The other fields in this struct
|
||||
// are only valid if this is non-zero.
|
||||
unsigned int num_retained_gpus;
|
||||
|
||||
// PCI domain containing this NPU. This acts as a unique system-wide ID for
|
||||
// this UVM NPU.
|
||||
int pci_domain;
|
||||
|
||||
// The ATS-related fields are only valid when ATS support is enabled and
|
||||
// UVM_ATS_IBM_SUPPORTED_IN_DRIVER() is 1.
|
||||
struct
|
||||
{
|
||||
// Mapped addresses of the ATSD trigger registers. There may be more
|
||||
// than one set of identical registers per NPU to enable concurrent
|
||||
// invalidates.
|
||||
//
|
||||
// These will not be accessed unless there is a GPU VA space registered
|
||||
// on a GPU under this NPU. They are protected by bit locks in the locks
|
||||
// field.
|
||||
__be64 __iomem *io_addrs[UVM_MAX_ATSD_REGS];
|
||||
|
||||
// Actual number of registers in the io_addrs array
|
||||
size_t count;
|
||||
|
||||
// Bitmask for allocation and locking of the registers. Bit index n
|
||||
// corresponds to io_addrs[n]. A set bit means that index is in use
|
||||
// (locked).
|
||||
DECLARE_BITMAP(locks, UVM_MAX_ATSD_REGS);
|
||||
|
||||
// Max value of any uvm_parent_gpu_t::num_hshub_tlb_invalidate_membars
|
||||
// for all retained GPUs under this NPU.
|
||||
NvU32 num_membars;
|
||||
} atsd_regs;
|
||||
};
|
||||
|
||||
#if UVM_IBM_NPU_SUPPORTED()
|
||||
NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
#else
|
||||
static NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
|
||||
}
|
||||
#endif // UVM_IBM_NPU_SUPPORTED
|
||||
|
||||
#if UVM_ATS_IBM_SUPPORTED()
|
||||
// Initializes IBM specific GPU state.
|
||||
//
|
||||
// LOCKING: None
|
||||
void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space);
|
||||
|
||||
// Enables ATS access for the gpu_va_space on the mm_struct associated with
|
||||
// the VA space (va_space_mm).
|
||||
//
|
||||
// If UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1, NV_ERR_NOT_SUPPORTED is
|
||||
// returned if current->mm does not match va_space_mm.mm or if a GPU VA
|
||||
// space within another VA space has already called this function on the
|
||||
// same mm.
|
||||
//
|
||||
// If UVM_ATS_IBM_SUPPORTED_IN_DRIVER() is 1 there are no such restrictions.
|
||||
//
|
||||
// LOCKING: The VA space lock must be held in write mode.
|
||||
// current->mm->mmap_lock must be held in write mode iff
|
||||
// UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1.
|
||||
NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// Disables ATS access for the gpu_va_space. Prior to calling this function,
|
||||
// the caller must guarantee that the GPU will no longer make any ATS
|
||||
// accesses in this GPU VA space, and that no ATS fault handling for this
|
||||
// GPU will be attempted.
|
||||
//
|
||||
// LOCKING: This function may block on mmap_lock and the VA space lock, so
|
||||
// neither must be held.
|
||||
void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// Synchronously invalidate ATS translations cached by GPU TLBs. The
|
||||
// invalidate applies to all GPUs with active GPU VA spaces in va_space, and
|
||||
// covers all pages touching any part of the given range. end is inclusive.
|
||||
//
|
||||
// GMMU translations in the given range are not guaranteed to be
|
||||
// invalidated.
|
||||
//
|
||||
// LOCKING: No locks are required, but this function may be called with
|
||||
// interrupts disabled.
|
||||
void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end);
|
||||
#else
|
||||
static void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space)
|
||||
{
|
||||
|
||||
}
|
||||
static NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
|
||||
{
|
||||
|
||||
}
|
||||
#endif // UVM_ATS_IBM_SUPPORTED
|
||||
|
||||
static NV_STATUS uvm_ats_ibm_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_ats_ibm_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
#endif // __UVM_ATS_IBM_H__
|
||||
@@ -38,12 +38,10 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_blackwell_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
|
||||
(sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Blackwell covers 64 PB and that's the minimum
|
||||
@@ -83,10 +81,6 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
@@ -106,4 +100,15 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->no_ats_range_required = true;
|
||||
|
||||
parent_gpu->conf_computing.per_channel_key_rotation = true;
|
||||
|
||||
// TODO: Bug 5023085: this should be queried from RM instead of determined
|
||||
// by UVM.
|
||||
if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100 &&
|
||||
parent_gpu->rm_info.gpuImplementation ==
|
||||
NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B)
|
||||
parent_gpu->is_integrated_gpu = true;
|
||||
if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 &&
|
||||
parent_gpu->rm_info.gpuImplementation ==
|
||||
NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B)
|
||||
parent_gpu->is_integrated_gpu = true;
|
||||
}
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
#include "hwref/blackwell/gb100/dev_fault.h"
|
||||
|
||||
static uvm_mmu_mode_hal_t blackwell_mmu_mode_hal;
|
||||
static uvm_mmu_mode_hal_t blackwell_integrated_mmu_mode_hal;
|
||||
|
||||
static NvU32 page_table_depth_blackwell(NvU64 page_size)
|
||||
{
|
||||
@@ -59,35 +60,71 @@ static NvU64 page_sizes_blackwell(void)
|
||||
return UVM_PAGE_SIZE_256G | UVM_PAGE_SIZE_512M | UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
static NvU64 page_sizes_blackwell_integrated(void)
|
||||
{
|
||||
return UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
static uvm_mmu_mode_hal_t *__uvm_hal_mmu_mode_blackwell(uvm_mmu_mode_hal_t *mmu_mode_hal,
|
||||
NvU64 big_page_size)
|
||||
{
|
||||
uvm_mmu_mode_hal_t *hopper_mmu_mode_hal;
|
||||
|
||||
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
|
||||
|
||||
hopper_mmu_mode_hal = uvm_hal_mmu_mode_hopper(big_page_size);
|
||||
UVM_ASSERT(hopper_mmu_mode_hal);
|
||||
|
||||
// The assumption made is that arch_hal->mmu_mode_hal() will be called
|
||||
// under the global lock the first time, so check it here.
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
*mmu_mode_hal = *hopper_mmu_mode_hal;
|
||||
mmu_mode_hal->page_table_depth = page_table_depth_blackwell;
|
||||
|
||||
return mmu_mode_hal;
|
||||
}
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell(NvU64 big_page_size)
|
||||
{
|
||||
static bool initialized = false;
|
||||
|
||||
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
|
||||
|
||||
// TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with
|
||||
// 128K big page size for Pascal+ GPUs
|
||||
if (big_page_size == UVM_PAGE_SIZE_128K)
|
||||
return NULL;
|
||||
|
||||
if (!initialized) {
|
||||
uvm_mmu_mode_hal_t *hopper_mmu_mode_hal = uvm_hal_mmu_mode_hopper(big_page_size);
|
||||
UVM_ASSERT(hopper_mmu_mode_hal);
|
||||
|
||||
// The assumption made is that arch_hal->mmu_mode_hal() will be called
|
||||
// under the global lock the first time, so check it here.
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
blackwell_mmu_mode_hal = *hopper_mmu_mode_hal;
|
||||
blackwell_mmu_mode_hal.page_table_depth = page_table_depth_blackwell;
|
||||
blackwell_mmu_mode_hal.page_sizes = page_sizes_blackwell;
|
||||
uvm_mmu_mode_hal_t *mmu_mode_hal;
|
||||
|
||||
mmu_mode_hal = __uvm_hal_mmu_mode_blackwell(&blackwell_mmu_mode_hal, big_page_size);
|
||||
mmu_mode_hal->page_sizes = page_sizes_blackwell;
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
return &blackwell_mmu_mode_hal;
|
||||
}
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell_integrated(NvU64 big_page_size)
|
||||
{
|
||||
static bool initialized = false;
|
||||
|
||||
// TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with
|
||||
// 128K big page size for Pascal+ GPUs
|
||||
if (big_page_size == UVM_PAGE_SIZE_128K)
|
||||
return NULL;
|
||||
|
||||
if (!initialized) {
|
||||
uvm_mmu_mode_hal_t *mmu_mode_hal;
|
||||
|
||||
mmu_mode_hal = __uvm_hal_mmu_mode_blackwell(&blackwell_integrated_mmu_mode_hal, big_page_size);
|
||||
mmu_mode_hal->page_sizes = page_sizes_blackwell_integrated;
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
return &blackwell_integrated_mmu_mode_hal;
|
||||
}
|
||||
|
||||
NvU16 uvm_hal_blackwell_mmu_client_id_to_utlb_id(NvU16 client_id)
|
||||
{
|
||||
switch (client_id) {
|
||||
|
||||
@@ -65,7 +65,10 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
|
||||
memset(host_ptr, 0, CE_TEST_MEM_SIZE);
|
||||
|
||||
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
|
||||
status = uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, CE_TEST_MEM_SIZE, 0, &mem[i]);
|
||||
uvm_rm_mem_type_t type;
|
||||
|
||||
type = gpu->mem_info.size ? UVM_RM_MEM_TYPE_GPU : UVM_RM_MEM_TYPE_SYS;
|
||||
status = uvm_rm_mem_alloc(gpu, type, CE_TEST_MEM_SIZE, 0, &mem[i]);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
}
|
||||
|
||||
@@ -405,6 +408,7 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
uvm_rm_mem_t *sys_rm_mem = NULL;
|
||||
uvm_rm_mem_t *gpu_rm_mem = NULL;
|
||||
uvm_gpu_address_t gpu_addresses[4] = {0};
|
||||
size_t gpu_addresses_length = 0;
|
||||
size_t size = gpu->big_page.internal_size;
|
||||
static const size_t element_sizes[] = {1, 4, 8};
|
||||
const size_t iterations = 4;
|
||||
@@ -435,7 +439,7 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by sysmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
|
||||
gpu_addresses[0] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
|
||||
gpu_addresses[gpu_addresses_length++] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
for (i = 0; i < iterations; ++i) {
|
||||
@@ -472,21 +476,23 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
// Physical address in sysmem
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &sys_uvm_mem), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(sys_uvm_mem, gpu), done);
|
||||
gpu_addresses[1] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
|
||||
gpu_addresses[gpu_addresses_length++] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
|
||||
|
||||
// Physical address in vidmem
|
||||
mem_params.backing_gpu = gpu;
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
|
||||
gpu_addresses[2] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
|
||||
if (gpu->mem_info.size > 0) {
|
||||
// Physical address in vidmem
|
||||
mem_params.backing_gpu = gpu;
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
|
||||
gpu_addresses[gpu_addresses_length++] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by vidmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
|
||||
gpu_addresses[3] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
|
||||
// Virtual address (in UVM's internal address space) backed by vidmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
|
||||
gpu_addresses[gpu_addresses_length++] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
|
||||
}
|
||||
|
||||
|
||||
for (i = 0; i < iterations; ++i) {
|
||||
for (j = 0; j < ARRAY_SIZE(gpu_addresses); ++j) {
|
||||
for (k = 0; k < ARRAY_SIZE(gpu_addresses); ++k) {
|
||||
for (j = 0; j < gpu_addresses_length; ++j) {
|
||||
for (k = 0; k < gpu_addresses_length; ++k) {
|
||||
for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
|
||||
TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
|
||||
gpu_addresses[k],
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -110,16 +110,22 @@ typedef enum
|
||||
bool uvm_channel_pool_is_p2p(uvm_channel_pool_t *pool)
|
||||
{
|
||||
uvm_channel_manager_t *manager = pool->manager;
|
||||
uvm_gpu_t *gpu = manager->gpu;
|
||||
uvm_gpu_id_t id;
|
||||
|
||||
if (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_GPU] == pool)
|
||||
return true;
|
||||
|
||||
for_each_gpu_id_in_mask(id, &manager->gpu->peer_info.peer_gpu_mask) {
|
||||
if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool)
|
||||
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
|
||||
for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
|
||||
if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool) {
|
||||
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1974,6 +1980,7 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
|
||||
{
|
||||
uvm_channel_pool_t *pool;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_gpu_t *gpu = channel_manager->gpu;
|
||||
uvm_gpu_id_t gpu_id;
|
||||
DECLARE_BITMAP(suspended_pools, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
@@ -1981,7 +1988,9 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
|
||||
// Use bitmap to track which were suspended.
|
||||
bitmap_zero(suspended_pools, channel_manager->num_channel_pools);
|
||||
|
||||
for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
|
||||
pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
|
||||
if (pool && !test_bit(uvm_channel_pool_index_in_channel_manager(pool), suspended_pools)) {
|
||||
status = channel_pool_suspend_p2p(pool);
|
||||
@@ -2014,6 +2023,7 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
|
||||
void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
|
||||
{
|
||||
uvm_channel_pool_t *pool;
|
||||
uvm_gpu_t *gpu = channel_manager->gpu;
|
||||
uvm_gpu_id_t gpu_id;
|
||||
DECLARE_BITMAP(resumed_pools, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
@@ -2021,7 +2031,9 @@ void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
|
||||
// Use bitmap to track which were suspended.
|
||||
bitmap_zero(resumed_pools, channel_manager->num_channel_pools);
|
||||
|
||||
for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
|
||||
pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
|
||||
if (pool && !test_and_set_bit(uvm_channel_pool_index_in_channel_manager(pool), resumed_pools))
|
||||
channel_pool_resume_p2p(pool);
|
||||
@@ -2889,6 +2901,13 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
|
||||
|
||||
static bool ce_is_usable(const UvmGpuCopyEngineCaps *cap)
|
||||
{
|
||||
// When Confidential Computing is enabled, all Copy Engines must support
|
||||
// encryption / decryption, tracked by 'secure' flag. This holds even for
|
||||
// non-CPU-GPU transactions because each channel has an associate semaphore,
|
||||
// and semaphore release must be observable by all processing units.
|
||||
if (g_uvm_global.conf_computing_enabled && !cap->secure)
|
||||
return false;
|
||||
|
||||
return cap->supported && !cap->grce;
|
||||
}
|
||||
|
||||
@@ -3243,9 +3262,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
manager->conf.num_gpfifo_entries = UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT;
|
||||
|
||||
if (manager->conf.num_gpfifo_entries != uvm_channel_num_gpfifo_entries) {
|
||||
pr_info("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
|
||||
uvm_channel_num_gpfifo_entries,
|
||||
manager->conf.num_gpfifo_entries);
|
||||
UVM_INFO_PRINT("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
|
||||
uvm_channel_num_gpfifo_entries,
|
||||
manager->conf.num_gpfifo_entries);
|
||||
}
|
||||
|
||||
// 2- Allocation locations
|
||||
@@ -3285,9 +3304,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
pushbuffer_loc_value = uvm_channel_pushbuffer_loc;
|
||||
if (!is_string_valid_location(pushbuffer_loc_value)) {
|
||||
pushbuffer_loc_value = UVM_CHANNEL_PUSHBUFFER_LOC_DEFAULT;
|
||||
pr_info("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
|
||||
uvm_channel_pushbuffer_loc,
|
||||
pushbuffer_loc_value);
|
||||
UVM_INFO_PRINT("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
|
||||
uvm_channel_pushbuffer_loc,
|
||||
pushbuffer_loc_value);
|
||||
}
|
||||
|
||||
// Override the default value if requested by the user
|
||||
@@ -3297,8 +3316,8 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
// so force the location to sys for now.
|
||||
// TODO: Bug 2904133: Remove the following "if" after the bug is fixed.
|
||||
if (NVCPU_IS_AARCH64) {
|
||||
pr_info("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
|
||||
pushbuffer_loc_value);
|
||||
UVM_INFO_PRINT("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
|
||||
pushbuffer_loc_value);
|
||||
manager->conf.pushbuffer_loc = UVM_BUFFER_LOCATION_SYS;
|
||||
}
|
||||
else {
|
||||
@@ -3310,8 +3329,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
// Only support the knobs for GPFIFO/GPPut on Volta+
|
||||
if (!gpu->parent->gpfifo_in_vidmem_supported) {
|
||||
if (manager->conf.gpput_loc == UVM_BUFFER_LOCATION_SYS) {
|
||||
pr_info("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s instead\n",
|
||||
buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
|
||||
UVM_INFO_PRINT("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s "
|
||||
"instead\n",
|
||||
buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
|
||||
}
|
||||
|
||||
manager->conf.gpfifo_loc = UVM_BUFFER_LOCATION_DEFAULT;
|
||||
@@ -3323,17 +3343,17 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
gpfifo_loc_value = uvm_channel_gpfifo_loc;
|
||||
if (!is_string_valid_location(gpfifo_loc_value)) {
|
||||
gpfifo_loc_value = UVM_CHANNEL_GPFIFO_LOC_DEFAULT;
|
||||
pr_info("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
|
||||
uvm_channel_gpfifo_loc,
|
||||
gpfifo_loc_value);
|
||||
UVM_INFO_PRINT("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
|
||||
uvm_channel_gpfifo_loc,
|
||||
gpfifo_loc_value);
|
||||
}
|
||||
|
||||
gpput_loc_value = uvm_channel_gpput_loc;
|
||||
if (!is_string_valid_location(gpput_loc_value)) {
|
||||
gpput_loc_value = UVM_CHANNEL_GPPUT_LOC_DEFAULT;
|
||||
pr_info("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
|
||||
uvm_channel_gpput_loc,
|
||||
gpput_loc_value);
|
||||
UVM_INFO_PRINT("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
|
||||
uvm_channel_gpput_loc,
|
||||
gpput_loc_value);
|
||||
}
|
||||
|
||||
// On coherent platforms where the GPU does not cache sysmem but the CPU
|
||||
|
||||
@@ -57,6 +57,7 @@ enum {
|
||||
// NULL.
|
||||
void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
|
||||
|
||||
// Long prefix - typically for debugging and tests.
|
||||
#define UVM_PRINT_FUNC_PREFIX(func, prefix, fmt, ...) \
|
||||
func(prefix "%s:%u %s[pid:%d]" fmt, \
|
||||
kbasename(__FILE__), \
|
||||
@@ -65,10 +66,15 @@ void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
|
||||
current->pid, \
|
||||
##__VA_ARGS__)
|
||||
|
||||
// Short prefix - typically for information.
|
||||
#define UVM_PRINT_FUNC_SHORT_PREFIX(func, prefix, fmt, ...) \
|
||||
func(prefix fmt, ##__VA_ARGS__)
|
||||
|
||||
// No prefix - used by kernel panic messages.
|
||||
#define UVM_PRINT_FUNC(func, fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX(func, "", fmt, ##__VA_ARGS__)
|
||||
|
||||
// Check whether UVM_{ERR,DBG,INFO)_PRINT* should be enabled
|
||||
// Check whether UVM_{ERR,DBG)_PRINT* should be enabled.
|
||||
bool uvm_debug_prints_enabled(void);
|
||||
|
||||
// A printing helper like UVM_PRINT_FUNC_PREFIX that only prints if
|
||||
@@ -80,10 +86,10 @@ bool uvm_debug_prints_enabled(void);
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define UVM_ASSERT_PRINT(fmt, ...) \
|
||||
#define UVM_ERR_PRINT_ALWAYS(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
#define UVM_ASSERT_PRINT_RL(fmt, ...) \
|
||||
#define UVM_ERR_PRINT_ALWAYS_RL(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX(printk_ratelimited, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
#define UVM_ERR_PRINT(fmt, ...) \
|
||||
@@ -95,13 +101,16 @@ bool uvm_debug_prints_enabled(void);
|
||||
#define UVM_DBG_PRINT(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
#define UVM_DBG_PRINT_RL(fmt, ...) \
|
||||
#define UVM_DBG_PRINT_RL(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX_CHECK(printk_ratelimited, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
// UVM_INFO_PRINT prints in all modes (including in the release mode.) It is
|
||||
// used for relaying driver-level information, rather than detailed debugging
|
||||
// information; therefore, it does not add the "pretty long prefix".
|
||||
#define UVM_INFO_PRINT(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
UVM_PRINT_FUNC_SHORT_PREFIX(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
|
||||
#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
|
||||
UVM_ERR_PRINT("ERROR: %s : " msg "\n", nvstatusToString(rmStatus), ##__VA_ARGS__)
|
||||
|
||||
#define UVM_PANIC() UVM_PRINT_FUNC(panic, "\n")
|
||||
@@ -134,13 +143,13 @@ void on_uvm_test_fail(void);
|
||||
// Unlike on_uvm_test_fail it provides 'panic' coverity semantics
|
||||
void on_uvm_assert(void);
|
||||
|
||||
#define _UVM_ASSERT_MSG(expr, cond, fmt, ...) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) { \
|
||||
UVM_ASSERT_PRINT("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
|
||||
dump_stack(); \
|
||||
on_uvm_assert(); \
|
||||
} \
|
||||
#define _UVM_ASSERT_MSG(expr, cond, fmt, ...) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) { \
|
||||
UVM_ERR_PRINT_ALWAYS("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
|
||||
dump_stack(); \
|
||||
on_uvm_assert(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// Prevent function calls in expr and the print argument list from being
|
||||
@@ -151,7 +160,8 @@ void on_uvm_assert(void);
|
||||
UVM_NO_PRINT(fmt, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity builds
|
||||
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity
|
||||
// builds.
|
||||
#if UVM_IS_DEBUG() || defined __COVERITY__
|
||||
#define UVM_ASSERT_MSG(expr, fmt, ...) _UVM_ASSERT_MSG(expr, #expr, ": " fmt, ##__VA_ARGS__)
|
||||
#define UVM_ASSERT(expr) _UVM_ASSERT_MSG(expr, #expr, "\n")
|
||||
@@ -174,16 +184,16 @@ extern bool uvm_release_asserts_set_global_error_for_tests;
|
||||
// Given these are enabled for release builds, we need to be more cautious than
|
||||
// in UVM_ASSERT(). Use a ratelimited print and only dump the stack if a module
|
||||
// param is enabled.
|
||||
#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \
|
||||
do { \
|
||||
if (uvm_release_asserts && unlikely(!(expr))) { \
|
||||
UVM_ASSERT_PRINT_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
|
||||
if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
|
||||
uvm_global_set_fatal_error(NV_ERR_INVALID_STATE); \
|
||||
if (uvm_release_asserts_dump_stack) \
|
||||
dump_stack(); \
|
||||
on_uvm_assert(); \
|
||||
} \
|
||||
#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \
|
||||
do { \
|
||||
if (uvm_release_asserts && unlikely(!(expr))) { \
|
||||
UVM_ERR_PRINT_ALWAYS_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
|
||||
if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
|
||||
uvm_global_set_fatal_error(NV_ERR_INVALID_STATE); \
|
||||
if (uvm_release_asserts_dump_stack) \
|
||||
dump_stack(); \
|
||||
on_uvm_assert(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...) _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)
|
||||
@@ -240,15 +250,6 @@ static inline NvBool uvm_ranges_overlap(NvU64 a_start, NvU64 a_end, NvU64 b_star
|
||||
return a_end >= b_start && b_end >= a_start;
|
||||
}
|
||||
|
||||
static int debug_mode(void)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void kmem_cache_destroy_safe(struct kmem_cache **ppCache)
|
||||
{
|
||||
if (ppCache)
|
||||
@@ -326,22 +327,6 @@ typedef struct
|
||||
NvHandle user_object;
|
||||
} uvm_rm_user_object_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING,
|
||||
UVM_FD_VA_SPACE,
|
||||
UVM_FD_MM,
|
||||
UVM_FD_COUNT
|
||||
} uvm_fd_type_t;
|
||||
|
||||
// This should be large enough to fit the valid values from uvm_fd_type_t above.
|
||||
// Note we can't use order_base_2(UVM_FD_COUNT) to define this because our code
|
||||
// coverage tool fails due when the preprocessor expands that to a huge mess of
|
||||
// ternary operators.
|
||||
#define UVM_FD_TYPE_BITS 2
|
||||
#define UVM_FD_TYPE_MASK ((1UL << UVM_FD_TYPE_BITS) - 1)
|
||||
|
||||
// Macro used to compare two values for types that support less than operator.
|
||||
// It returns -1 if a < b, 1 if a > b and 0 if a == 0
|
||||
#define UVM_CMP_DEFAULT(a,b) \
|
||||
@@ -364,37 +349,13 @@ typedef enum
|
||||
// file. A NULL input returns false.
|
||||
bool uvm_file_is_nvidia_uvm(struct file *filp);
|
||||
|
||||
// Returns the type of data filp->private_data contains to and if ptr_val !=
|
||||
// NULL returns the value of the pointer.
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val);
|
||||
|
||||
// Returns the pointer stored in filp->private_data if the type
|
||||
// matches, otherwise returns NULL.
|
||||
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type);
|
||||
|
||||
// Reads the first word in the supplied struct page.
|
||||
static inline void uvm_touch_page(struct page *page)
|
||||
{
|
||||
char *mapping;
|
||||
|
||||
UVM_ASSERT(page);
|
||||
|
||||
mapping = (char *) kmap(page);
|
||||
(void)READ_ONCE(*mapping);
|
||||
kunmap(page);
|
||||
}
|
||||
// Like uvm_file_is_nvidia_uvm(), but further requires that the input file
|
||||
// represent a UVM VA space (has fd type UVM_FD_VA_SPACE).
|
||||
bool uvm_file_is_nvidia_uvm_va_space(struct file *filp);
|
||||
|
||||
// Return true if the VMA is one used by UVM managed allocations.
|
||||
bool uvm_vma_is_managed(struct vm_area_struct *vma);
|
||||
|
||||
static bool uvm_platform_uses_canonical_form_address(void)
|
||||
{
|
||||
if (NVCPU_IS_PPC64LE)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Similar to the GPU MMU HAL num_va_bits(), it returns the CPU's num_va_bits().
|
||||
static NvU32 uvm_cpu_num_va_bits(void)
|
||||
{
|
||||
@@ -410,7 +371,7 @@ static void uvm_get_unaddressable_range(NvU32 num_va_bits, NvU64 *first, NvU64 *
|
||||
|
||||
// Maxwell GPUs (num_va_bits == 40b) do not support canonical form address
|
||||
// even when plugged into platforms using it.
|
||||
if (uvm_platform_uses_canonical_form_address() && num_va_bits > 40) {
|
||||
if (num_va_bits > 40) {
|
||||
*first = 1ULL << (num_va_bits - 1);
|
||||
*outer = (NvU64)((NvS64)(1ULL << 63) >> (64 - num_va_bits));
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2024 NVIDIA Corporation
|
||||
Copyright (c) 2021-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -532,7 +532,7 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;
|
||||
|
||||
// There is no dedicated lock for the CSL context associated with replayable
|
||||
// faults. The mutual exclusion required by the RM CSL API is enforced by
|
||||
@@ -571,7 +571,7 @@ void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;
|
||||
|
||||
// See comment in uvm_conf_computing_fault_decrypt
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
@@ -708,7 +708,12 @@ void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu)
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu)
|
||||
{
|
||||
return gpu->channel_manager->conf_computing.key_rotation_enabled;
|
||||
UVM_ASSERT(gpu);
|
||||
|
||||
// If the channel_manager is not set, we're in channel manager destroy
|
||||
// path after the pointer was NULL-ed. Chances are that other key rotation
|
||||
// infrastructure is not available either. Disallow the key rotation.
|
||||
return gpu->channel_manager && gpu->channel_manager->conf_computing.key_rotation_enabled;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool)
|
||||
|
||||
114
kernel-open/nvidia-uvm/uvm_fd_type.c
Normal file
114
kernel-open/nvidia-uvm/uvm_fd_type.c
Normal file
@@ -0,0 +1,114 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_fd_type.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_test_file.h"
|
||||
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
|
||||
{
|
||||
unsigned long uptr;
|
||||
uvm_fd_type_t type;
|
||||
void *ptr;
|
||||
|
||||
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
|
||||
|
||||
uptr = atomic_long_read_acquire((atomic_long_t *) (&filp->private_data));
|
||||
type = (uvm_fd_type_t)(uptr & UVM_FD_TYPE_MASK);
|
||||
ptr = (void *)(uptr & ~UVM_FD_TYPE_MASK);
|
||||
BUILD_BUG_ON(UVM_FD_COUNT > UVM_FD_TYPE_MASK + 1);
|
||||
|
||||
switch (type) {
|
||||
case UVM_FD_UNINITIALIZED:
|
||||
case UVM_FD_INITIALIZING:
|
||||
UVM_ASSERT(!ptr);
|
||||
break;
|
||||
|
||||
case UVM_FD_VA_SPACE:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
case UVM_FD_MM:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(struct file) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
case UVM_FD_TEST:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(uvm_test_file_t) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
default:
|
||||
UVM_ASSERT(0);
|
||||
}
|
||||
|
||||
if (ptr_val)
|
||||
*ptr_val = ptr;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
|
||||
|
||||
if (uvm_fd_type(filp, &ptr) == type)
|
||||
return ptr;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uvm_fd_type_t uvm_fd_type_init_cas(struct file *filp)
|
||||
{
|
||||
long old = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
|
||||
return (uvm_fd_type_t)(old & UVM_FD_TYPE_MASK);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_fd_type_init(struct file *filp)
|
||||
{
|
||||
uvm_fd_type_t old = uvm_fd_type_init_cas(filp);
|
||||
|
||||
if (old != UVM_FD_UNINITIALIZED)
|
||||
return NV_ERR_IN_USE;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_fd_type_set(struct file *filp, uvm_fd_type_t type, void *ptr)
|
||||
{
|
||||
void *tmp_ptr;
|
||||
UVM_ASSERT(uvm_fd_type(filp, &tmp_ptr) == UVM_FD_INITIALIZING);
|
||||
UVM_ASSERT(!tmp_ptr);
|
||||
|
||||
if (type == UVM_FD_UNINITIALIZED)
|
||||
UVM_ASSERT(!ptr);
|
||||
|
||||
UVM_ASSERT(((uintptr_t)ptr & type) == 0);
|
||||
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, (uintptr_t)ptr | type);
|
||||
}
|
||||
69
kernel-open/nvidia-uvm/uvm_fd_type.h
Normal file
69
kernel-open/nvidia-uvm/uvm_fd_type.h
Normal file
@@ -0,0 +1,69 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef __UVM_FD_TYPE_H__
|
||||
#define __UVM_FD_TYPE_H__
|
||||
|
||||
#include "nvstatus.h"
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING,
|
||||
UVM_FD_VA_SPACE,
|
||||
UVM_FD_MM,
|
||||
UVM_FD_TEST,
|
||||
UVM_FD_COUNT
|
||||
} uvm_fd_type_t;
|
||||
|
||||
// This should be large enough to fit the valid values from uvm_fd_type_t above.
|
||||
// Note we can't use order_base_2(UVM_FD_COUNT) to define this because our code
|
||||
// coverage tool fails when the preprocessor expands that to a huge mess of
|
||||
// ternary operators.
|
||||
#define UVM_FD_TYPE_BITS 3
|
||||
#define UVM_FD_TYPE_MASK ((1UL << UVM_FD_TYPE_BITS) - 1)
|
||||
|
||||
struct file;
|
||||
|
||||
// Returns the type of data filp->private_data contains and if ptr_val != NULL
|
||||
// returns the value of the pointer.
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val);
|
||||
|
||||
// Returns the pointer stored in filp->private_data if the type matches,
|
||||
// otherwise returns NULL.
|
||||
void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type);
|
||||
|
||||
// Does atomic CAS on filp->private_data, expecting UVM_FD_UNINITIALIZED and
|
||||
// swapping in UVM_FD_INITIALIZING. Returns the old type regardless of CAS
|
||||
// success.
|
||||
uvm_fd_type_t uvm_fd_type_init_cas(struct file *filp);
|
||||
|
||||
// Like uvm_fd_type_init_cas() but returns NV_OK on CAS success and
|
||||
// NV_ERR_IN_USE on CAS failure.
|
||||
NV_STATUS uvm_fd_type_init(struct file *filp);
|
||||
|
||||
// Assigns {type, ptr} to filp. filp's current type must be UVM_FD_INITIALIZING.
|
||||
// If the new type is UVM_FD_UNINITIALIZED, ptr must be NULL.
|
||||
void uvm_fd_type_set(struct file *filp, uvm_fd_type_t type, void *ptr);
|
||||
|
||||
#endif // __UVM_FD_TYPE_H__
|
||||
@@ -93,13 +93,12 @@ typedef struct uvm_service_block_context_struct uvm_service_block_context_t;
|
||||
|
||||
typedef struct uvm_ats_fault_invalidate_struct uvm_ats_fault_invalidate_t;
|
||||
|
||||
typedef struct uvm_replayable_fault_buffer_info_struct uvm_replayable_fault_buffer_info_t;
|
||||
typedef struct uvm_non_replayable_fault_buffer_info_struct uvm_non_replayable_fault_buffer_info_t;
|
||||
typedef struct uvm_replayable_fault_buffer_struct uvm_replayable_fault_buffer_t;
|
||||
typedef struct uvm_non_replayable_fault_buffer_struct uvm_non_replayable_fault_buffer_t;
|
||||
typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
|
||||
typedef struct uvm_access_counter_buffer_struct uvm_access_counter_buffer_t;
|
||||
typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
|
||||
typedef struct uvm_pmm_sysmem_mappings_struct uvm_pmm_sysmem_mappings_t;
|
||||
|
||||
typedef struct uvm_reverse_map_struct uvm_reverse_map_t;
|
||||
|
||||
typedef struct uvm_ibm_npu_struct uvm_ibm_npu_t;
|
||||
#endif //__UVM_FORWARD_DECL_H__
|
||||
|
||||
@@ -194,6 +194,12 @@ NV_STATUS uvm_global_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = uvm_access_counters_init();
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_access_counters_init failed: %s\n", nvstatusToString(status));
|
||||
goto error;
|
||||
}
|
||||
|
||||
// This sets up the ISR (interrupt service routine), by hooking into RM's
|
||||
// top-half ISR callback. As soon as this call completes, GPU interrupts
|
||||
// will start arriving, so it's important to be prepared to receive
|
||||
@@ -224,8 +230,8 @@ void uvm_global_exit(void)
|
||||
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
|
||||
|
||||
uvm_unregister_callbacks();
|
||||
uvm_access_counters_exit();
|
||||
uvm_service_block_context_exit();
|
||||
|
||||
uvm_perf_heuristics_exit();
|
||||
uvm_perf_events_exit();
|
||||
uvm_migrate_exit();
|
||||
@@ -287,7 +293,7 @@ static NV_STATUS uvm_suspend(void)
|
||||
// * Flush relevant kthread queues (bottom half, etc.)
|
||||
|
||||
// Some locks acquired by this function, such as pm.lock, are released
|
||||
// by uvm_resume(). This is contrary to the lock tracking code's
|
||||
// by uvm_resume(). This is contrary to the lock tracking code's
|
||||
// expectations, so lock tracking is disabled.
|
||||
uvm_thread_context_lock_disable_tracking();
|
||||
|
||||
@@ -304,7 +310,7 @@ static NV_STATUS uvm_suspend(void)
|
||||
gpu = uvm_gpu_get(gpu_id);
|
||||
|
||||
// Since fault buffer state may be lost across sleep cycles, UVM must
|
||||
// ensure any outstanding replayable faults are dismissed. The RM
|
||||
// ensure any outstanding replayable faults are dismissed. The RM
|
||||
// guarantees that all user channels have been preempted before
|
||||
// uvm_suspend() is called, which implies that no user channels can be
|
||||
// stalled on faults when this point is reached.
|
||||
@@ -330,7 +336,7 @@ static NV_STATUS uvm_suspend(void)
|
||||
}
|
||||
|
||||
// Acquire each VA space's lock in write mode to lock out VMA open and
|
||||
// release callbacks. These entry points do not have feasible early exit
|
||||
// release callbacks. These entry points do not have feasible early exit
|
||||
// options, and so aren't suitable for synchronization with pm.lock.
|
||||
uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
|
||||
|
||||
@@ -360,7 +366,7 @@ static NV_STATUS uvm_resume(void)
|
||||
g_uvm_global.pm.is_suspended = false;
|
||||
|
||||
// Some locks released by this function, such as pm.lock, were acquired
|
||||
// by uvm_suspend(). This is contrary to the lock tracking code's
|
||||
// by uvm_suspend(). This is contrary to the lock tracking code's
|
||||
// expectations, so lock tracking is disabled.
|
||||
uvm_thread_context_lock_disable_tracking();
|
||||
|
||||
@@ -392,7 +398,7 @@ static NV_STATUS uvm_resume(void)
|
||||
uvm_thread_context_lock_enable_tracking();
|
||||
|
||||
// Force completion of any release callbacks successfully queued for
|
||||
// deferred completion while suspended. The deferred release
|
||||
// deferred completion while suspended. The deferred release
|
||||
// queue is not guaranteed to remain empty following this flush since
|
||||
// some threads that failed to acquire pm.lock in uvm_release() may
|
||||
// not have scheduled their handlers yet.
|
||||
@@ -424,7 +430,8 @@ void uvm_global_set_fatal_error_impl(NV_STATUS error)
|
||||
}
|
||||
else {
|
||||
UVM_ERR_PRINT("Encountered a global fatal error: %s after a global error has been already set: %s\n",
|
||||
nvstatusToString(error), nvstatusToString(previous_error));
|
||||
nvstatusToString(error),
|
||||
nvstatusToString(previous_error));
|
||||
}
|
||||
|
||||
nvUvmInterfaceReportFatalError(error);
|
||||
|
||||
@@ -31,7 +31,6 @@
|
||||
#include "uvm_processors.h"
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_ats_ibm.h"
|
||||
|
||||
// Global state of the uvm driver
|
||||
struct uvm_global_struct
|
||||
@@ -124,12 +123,6 @@ struct uvm_global_struct
|
||||
bool enabled;
|
||||
} ats;
|
||||
|
||||
#if UVM_IBM_NPU_SUPPORTED()
|
||||
// On IBM systems this array tracks the active NPUs (the NPUs which are
|
||||
// attached to retained GPUs).
|
||||
uvm_ibm_npu_t npus[NV_MAX_NPUS];
|
||||
#endif
|
||||
|
||||
// List of all active VA spaces
|
||||
struct
|
||||
{
|
||||
|
||||
@@ -149,7 +149,7 @@ static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
|
||||
}
|
||||
|
||||
// Return a PASID to use with the internal address space (AS), or -1 if not
|
||||
// supported. This PASID is needed to enable ATS in the internal AS, but it is
|
||||
// supported. This PASID is needed to enable ATS in the internal AS, but it is
|
||||
// not used in address translation requests, which only translate GPA->SPA.
|
||||
// The buffer management thus remains the same: DMA mapped GPA addresses can
|
||||
// be accessed by the GPU, while unmapped addresses can not and any access is
|
||||
@@ -358,10 +358,11 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
addr_shift = gpu_addr_shift;
|
||||
|
||||
// Pascal+ GPUs are capable of accessing kernel pointers in various modes
|
||||
// by applying the same upper-bit checks that x86, ARM, and Power
|
||||
// processors do. x86 and ARM use canonical form addresses. For ARM, even
|
||||
// by applying the same upper-bit checks that x86 or ARM CPU processors do.
|
||||
// The x86 and ARM platforms use canonical form addresses. For ARM, even
|
||||
// with Top-Byte Ignore enabled, the following logic validates addresses
|
||||
// from the kernel VA range. PowerPC does not use canonical form address.
|
||||
// from the kernel VA range.
|
||||
//
|
||||
// The following diagram illustrates the valid (V) VA regions that can be
|
||||
// mapped (or addressed) by the GPU/CPU when the CPU uses canonical form.
|
||||
// (C) regions are only accessible by the CPU. Similarly, (G) regions
|
||||
@@ -389,8 +390,8 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// 0 +----------------+ 0 +----------------+
|
||||
|
||||
// On canonical form address platforms and Pascal+ GPUs.
|
||||
if (uvm_platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
|
||||
// On Pascal+ GPUs.
|
||||
if (gpu_addr_shift > 40) {
|
||||
// On x86, when cpu_addr_shift > gpu_addr_shift, it means the CPU uses
|
||||
// 5-level paging and the GPU is pre-Hopper. On Pascal-Ada GPUs (49b
|
||||
// wide VA) we set addr_shift to match a 4-level paging x86 (48b wide).
|
||||
@@ -431,30 +432,28 @@ NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)
|
||||
NvU8 addr_shift;
|
||||
NvU64 input_addr = addr;
|
||||
|
||||
if (uvm_platform_uses_canonical_form_address()) {
|
||||
// When the CPU VA width is larger than GPU's, it means that:
|
||||
// On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
|
||||
// On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
|
||||
// We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
|
||||
// behavior of CPUs with smaller (than GPU) VA widths.
|
||||
gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
|
||||
cpu_addr_shift = uvm_cpu_num_va_bits();
|
||||
// When the CPU VA width is larger than GPU's, it means that:
|
||||
// On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
|
||||
// On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
|
||||
// We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
|
||||
// behavior of CPUs with smaller (than GPU) VA widths.
|
||||
gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
|
||||
cpu_addr_shift = uvm_cpu_num_va_bits();
|
||||
|
||||
if (cpu_addr_shift > gpu_addr_shift)
|
||||
addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
|
||||
else if (gpu_addr_shift == 57)
|
||||
addr_shift = gpu_addr_shift;
|
||||
else
|
||||
addr_shift = cpu_addr_shift;
|
||||
if (cpu_addr_shift > gpu_addr_shift)
|
||||
addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
|
||||
else if (gpu_addr_shift == 57)
|
||||
addr_shift = gpu_addr_shift;
|
||||
else
|
||||
addr_shift = cpu_addr_shift;
|
||||
|
||||
addr = (NvU64)((NvS64)(addr << (64 - addr_shift)) >> (64 - addr_shift));
|
||||
addr = (NvU64)((NvS64)(addr << (64 - addr_shift)) >> (64 - addr_shift));
|
||||
|
||||
// This protection acts on when the address is not covered by the GPU's
|
||||
// OOR_ADDR_CHECK. This can only happen when OOR_ADDR_CHECK is in
|
||||
// permissive (NO_CHECK) mode.
|
||||
if ((addr << (64 - gpu_addr_shift)) != (input_addr << (64 - gpu_addr_shift)))
|
||||
return input_addr;
|
||||
}
|
||||
// This protection acts on when the address is not covered by the GPU's
|
||||
// OOR_ADDR_CHECK. This can only happen when OOR_ADDR_CHECK is in
|
||||
// permissive (NO_CHECK) mode.
|
||||
if ((addr << (64 - gpu_addr_shift)) != (input_addr << (64 - gpu_addr_shift)))
|
||||
return input_addr;
|
||||
|
||||
return addr;
|
||||
}
|
||||
@@ -485,7 +484,7 @@ static void gpu_info_print_ce_caps(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
continue;
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " ce %u pce mask 0x%08x grce %u shared %u sysmem read %u sysmem write %u sysmem %u "
|
||||
"nvlink p2p %u p2p %u\n",
|
||||
"nvlink p2p %u p2p %u secure %u\n",
|
||||
i,
|
||||
ce_caps->cePceMask,
|
||||
ce_caps->grce,
|
||||
@@ -494,7 +493,8 @@ static void gpu_info_print_ce_caps(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
ce_caps->sysmemWrite,
|
||||
ce_caps->sysmem,
|
||||
ce_caps->nvlinkP2p,
|
||||
ce_caps->p2p);
|
||||
ce_caps->p2p,
|
||||
ce_caps->secure);
|
||||
}
|
||||
|
||||
out:
|
||||
@@ -538,7 +538,9 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
NvU64 num_pages_in;
|
||||
NvU64 num_pages_out;
|
||||
NvU64 mapped_cpu_pages_size;
|
||||
NvU32 get, put;
|
||||
NvU32 get;
|
||||
NvU32 put;
|
||||
NvU32 i;
|
||||
unsigned int cpu;
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "GPU %s\n", uvm_gpu_name(gpu));
|
||||
@@ -593,9 +595,6 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
window_size / (1024 * 1024));
|
||||
}
|
||||
|
||||
if (gpu->parent->npu)
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "npu_domain %d\n", gpu->parent->npu->pci_domain);
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "interrupts %llu\n", gpu->parent->isr.interrupt_count);
|
||||
|
||||
if (gpu->parent->isr.replayable_faults.handling) {
|
||||
@@ -608,19 +607,19 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
gpu->parent->isr.replayable_faults.stats.cpu_exec_count[cpu]);
|
||||
}
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_buffer_entries %u\n",
|
||||
gpu->parent->fault_buffer_info.replayable.max_faults);
|
||||
gpu->parent->fault_buffer.replayable.max_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_cached_get %u\n",
|
||||
gpu->parent->fault_buffer_info.replayable.cached_get);
|
||||
gpu->parent->fault_buffer.replayable.cached_get);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_cached_put %u\n",
|
||||
gpu->parent->fault_buffer_info.replayable.cached_put);
|
||||
gpu->parent->fault_buffer.replayable.cached_put);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_get %u\n",
|
||||
gpu->parent->fault_buffer_hal->read_get(gpu->parent));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_put %u\n",
|
||||
gpu->parent->fault_buffer_hal->read_put(gpu->parent));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_fault_batch_size %u\n",
|
||||
gpu->parent->fault_buffer_info.max_batch_size);
|
||||
gpu->parent->fault_buffer.max_batch_size);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_replay_policy %s\n",
|
||||
uvm_perf_fault_replay_policy_string(gpu->parent->fault_buffer_info.replayable.replay_policy));
|
||||
uvm_perf_fault_replay_policy_string(gpu->parent->fault_buffer.replayable.replay_policy));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_num_faults %llu\n",
|
||||
gpu->parent->stats.num_replayable_faults);
|
||||
}
|
||||
@@ -634,32 +633,35 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
gpu->parent->isr.non_replayable_faults.stats.cpu_exec_count[cpu]);
|
||||
}
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults_buffer_entries %u\n",
|
||||
gpu->parent->fault_buffer_info.non_replayable.max_faults);
|
||||
gpu->parent->fault_buffer.non_replayable.max_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults_num_faults %llu\n",
|
||||
gpu->parent->stats.num_non_replayable_faults);
|
||||
}
|
||||
|
||||
if (gpu->parent->isr.access_counters.handling_ref_count > 0) {
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_bh %llu\n",
|
||||
gpu->parent->isr.access_counters.stats.bottom_half_count);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_bh/cpu\n");
|
||||
for_each_cpu(cpu, &gpu->parent->isr.access_counters.stats.cpus_used_mask) {
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " cpu%02u %llu\n",
|
||||
cpu,
|
||||
gpu->parent->isr.access_counters.stats.cpu_exec_count[cpu]);
|
||||
for (i = 0; i < gpu_info->accessCntrBufferCount; i++) {
|
||||
if (gpu->parent->access_counters_supported && gpu->parent->isr.access_counters[i].handling_ref_count > 0) {
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_notif_buffer_index %u\n", i);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_bh %llu\n",
|
||||
gpu->parent->isr.access_counters[i].stats.bottom_half_count);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_bh/cpu\n");
|
||||
for_each_cpu(cpu, &gpu->parent->isr.access_counters[i].stats.cpus_used_mask) {
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " cpu%02u %llu\n",
|
||||
cpu,
|
||||
gpu->parent->isr.access_counters[i].stats.cpu_exec_count[cpu]);
|
||||
}
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_buffer_entries %u\n",
|
||||
gpu->parent->access_counter_buffer[i].max_notifications);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_cached_get %u\n",
|
||||
gpu->parent->access_counter_buffer[i].cached_get);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_cached_put %u\n",
|
||||
gpu->parent->access_counter_buffer[i].cached_put);
|
||||
|
||||
get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferGet);
|
||||
put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferPut);
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_get %u\n", get);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_put %u\n", put);
|
||||
}
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_buffer_entries %u\n",
|
||||
gpu->parent->access_counter_buffer_info.max_notifications);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_cached_get %u\n",
|
||||
gpu->parent->access_counter_buffer_info.cached_get);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_cached_put %u\n",
|
||||
gpu->parent->access_counter_buffer_info.cached_put);
|
||||
|
||||
get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer_info.rm_info.pAccessCntrBufferGet);
|
||||
put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer_info.rm_info.pAccessCntrBufferPut);
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_get %u\n", get);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_put %u\n", put);
|
||||
}
|
||||
|
||||
num_pages_out = atomic64_read(&gpu->parent->stats.num_pages_out);
|
||||
@@ -694,18 +696,18 @@ gpu_fault_stats_print_common(uvm_parent_gpu_t *parent_gpu, struct seq_file *s)
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults %llu\n", parent_gpu->stats.num_replayable_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "duplicates %llu\n",
|
||||
parent_gpu->fault_buffer_info.replayable.stats.num_duplicate_faults);
|
||||
parent_gpu->fault_buffer.replayable.stats.num_duplicate_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "faults_by_access_type:\n");
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " prefetch %llu\n",
|
||||
parent_gpu->fault_buffer_info.replayable.stats.num_prefetch_faults);
|
||||
parent_gpu->fault_buffer.replayable.stats.num_prefetch_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " read %llu\n",
|
||||
parent_gpu->fault_buffer_info.replayable.stats.num_read_faults);
|
||||
parent_gpu->fault_buffer.replayable.stats.num_read_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " write %llu\n",
|
||||
parent_gpu->fault_buffer_info.replayable.stats.num_write_faults);
|
||||
parent_gpu->fault_buffer.replayable.stats.num_write_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " atomic %llu\n",
|
||||
parent_gpu->fault_buffer_info.replayable.stats.num_atomic_faults);
|
||||
num_pages_out = atomic64_read(&parent_gpu->fault_buffer_info.replayable.stats.num_pages_out);
|
||||
num_pages_in = atomic64_read(&parent_gpu->fault_buffer_info.replayable.stats.num_pages_in);
|
||||
parent_gpu->fault_buffer.replayable.stats.num_atomic_faults);
|
||||
num_pages_out = atomic64_read(&parent_gpu->fault_buffer.replayable.stats.num_pages_out);
|
||||
num_pages_in = atomic64_read(&parent_gpu->fault_buffer.replayable.stats.num_pages_in);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
|
||||
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
|
||||
@@ -713,25 +715,25 @@ gpu_fault_stats_print_common(uvm_parent_gpu_t *parent_gpu, struct seq_file *s)
|
||||
(num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replays:\n");
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " start %llu\n",
|
||||
parent_gpu->fault_buffer_info.replayable.stats.num_replays);
|
||||
parent_gpu->fault_buffer.replayable.stats.num_replays);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " start_ack_all %llu\n",
|
||||
parent_gpu->fault_buffer_info.replayable.stats.num_replays_ack_all);
|
||||
parent_gpu->fault_buffer.replayable.stats.num_replays_ack_all);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults %llu\n", parent_gpu->stats.num_non_replayable_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "faults_by_access_type:\n");
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " read %llu\n",
|
||||
parent_gpu->fault_buffer_info.non_replayable.stats.num_read_faults);
|
||||
parent_gpu->fault_buffer.non_replayable.stats.num_read_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " write %llu\n",
|
||||
parent_gpu->fault_buffer_info.non_replayable.stats.num_write_faults);
|
||||
parent_gpu->fault_buffer.non_replayable.stats.num_write_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " atomic %llu\n",
|
||||
parent_gpu->fault_buffer_info.non_replayable.stats.num_atomic_faults);
|
||||
parent_gpu->fault_buffer.non_replayable.stats.num_atomic_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "faults_by_addressing:\n");
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " virtual %llu\n",
|
||||
parent_gpu->stats.num_non_replayable_faults -
|
||||
parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults);
|
||||
parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " physical %llu\n",
|
||||
parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults);
|
||||
num_pages_out = atomic64_read(&parent_gpu->fault_buffer_info.non_replayable.stats.num_pages_out);
|
||||
num_pages_in = atomic64_read(&parent_gpu->fault_buffer_info.non_replayable.stats.num_pages_in);
|
||||
parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults);
|
||||
num_pages_out = atomic64_read(&parent_gpu->fault_buffer.non_replayable.stats.num_pages_out);
|
||||
num_pages_in = atomic64_read(&parent_gpu->fault_buffer.non_replayable.stats.num_pages_in);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
|
||||
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
|
||||
@@ -743,16 +745,25 @@ static void gpu_access_counters_print_common(uvm_parent_gpu_t *parent_gpu, struc
|
||||
{
|
||||
NvU64 num_pages_in;
|
||||
NvU64 num_pages_out;
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(uvm_procfs_is_debug_enabled());
|
||||
|
||||
num_pages_out = atomic64_read(&parent_gpu->access_counter_buffer_info.stats.num_pages_out);
|
||||
num_pages_in = atomic64_read(&parent_gpu->access_counter_buffer_info.stats.num_pages_in);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
|
||||
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_out %llu (%llu MB)\n", num_pages_out,
|
||||
(num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
|
||||
// procfs_files are created before gpu_init_isr, we need to check if the
|
||||
// access_counter_buffer is allocated.
|
||||
if (parent_gpu->access_counter_buffer) {
|
||||
for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++) {
|
||||
uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[i];
|
||||
|
||||
num_pages_out = atomic64_read(&access_counters->stats.num_pages_out);
|
||||
num_pages_in = atomic64_read(&access_counters->stats.num_pages_in);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "migrations - buffer index %u:\n", i);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
|
||||
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_out %llu (%llu MB)\n", num_pages_out,
|
||||
(num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This function converts an index of 2D array of size [N x N] into an index
|
||||
@@ -892,7 +903,7 @@ static int nv_procfs_read_gpu_info(struct seq_file *s, void *v)
|
||||
uvm_gpu_t *gpu = (uvm_gpu_t *)s->private;
|
||||
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
return -EAGAIN;
|
||||
|
||||
gpu_info_print_common(gpu, s);
|
||||
|
||||
@@ -911,7 +922,7 @@ static int nv_procfs_read_gpu_fault_stats(struct seq_file *s, void *v)
|
||||
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)s->private;
|
||||
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
return -EAGAIN;
|
||||
|
||||
gpu_fault_stats_print_common(parent_gpu, s);
|
||||
|
||||
@@ -930,7 +941,7 @@ static int nv_procfs_read_gpu_access_counters(struct seq_file *s, void *v)
|
||||
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)s->private;
|
||||
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
return -EAGAIN;
|
||||
|
||||
gpu_access_counters_print_common(parent_gpu, s);
|
||||
|
||||
@@ -1027,7 +1038,7 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
{
|
||||
struct proc_dir_entry *gpu_base_dir_entry;
|
||||
char symlink_name[16]; // Hold a uvm_gpu_id_t value in decimal.
|
||||
char uuid_buffer[max(UVM_PARENT_GPU_UUID_STRING_LENGTH, UVM_GPU_UUID_STRING_LENGTH)];
|
||||
char uuid_buffer[NV_MAX(UVM_PARENT_GPU_UUID_STRING_LENGTH, UVM_GPU_UUID_STRING_LENGTH)];
|
||||
char gpu_dir_name[sizeof(symlink_name) + sizeof(uuid_buffer) + 1];
|
||||
|
||||
if (!uvm_procfs_is_enabled())
|
||||
@@ -1182,7 +1193,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_uuid_copy(&parent_gpu->uuid, gpu_uuid);
|
||||
uvm_sema_init(&parent_gpu->isr.replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
uvm_sema_init(&parent_gpu->isr.non_replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
uvm_sema_init(&parent_gpu->isr.access_counters.service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
uvm_mutex_init(&parent_gpu->access_counters_enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS);
|
||||
uvm_spin_lock_irqsave_init(&parent_gpu->isr.interrupts_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_spin_lock_init(&parent_gpu->instance_ptr_table_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
|
||||
@@ -1221,7 +1232,7 @@ static uvm_gpu_t *alloc_gpu(uvm_parent_gpu_t *parent_gpu, uvm_gpu_id_t gpu_id)
|
||||
|
||||
// Initialize enough of the gpu struct for remove_gpu to be called
|
||||
gpu->magic = UVM_GPU_MAGIC_VALUE;
|
||||
uvm_spin_lock_init(&gpu->peer_info.peer_gpus_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_spin_lock_init(&gpu->peer_info.peer_gpu_lock, UVM_LOCK_ORDER_LEAF);
|
||||
|
||||
sub_processor_index = uvm_id_sub_processor_index(gpu_id);
|
||||
parent_gpu->gpus[sub_processor_index] = gpu;
|
||||
@@ -1235,13 +1246,15 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
NvU32 num_entries;
|
||||
NvU64 va_size;
|
||||
NvU64 va_per_entry;
|
||||
NvU64 physical_address;
|
||||
NvU64 dma_address;
|
||||
uvm_mmu_page_table_alloc_t *tree_alloc;
|
||||
|
||||
status = uvm_page_tree_init(gpu,
|
||||
NULL,
|
||||
UVM_PAGE_TREE_TYPE_KERNEL,
|
||||
gpu->big_page.internal_size,
|
||||
uvm_get_page_tree_location(gpu->parent),
|
||||
uvm_get_page_tree_location(gpu),
|
||||
&gpu->address_space_tree);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Initializing the page tree failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
@@ -1265,12 +1278,17 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
gpu->parent->rm_va_size,
|
||||
va_per_entry);
|
||||
|
||||
tree_alloc = uvm_page_tree_pdb(&gpu->address_space_tree);
|
||||
tree_alloc = uvm_page_tree_pdb_internal(&gpu->address_space_tree);
|
||||
if (tree_alloc->addr.aperture == UVM_APERTURE_VID)
|
||||
physical_address = tree_alloc->addr.address;
|
||||
else
|
||||
physical_address = page_to_phys(tree_alloc->handle.page);
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceSetPageDirectory(gpu->rm_address_space,
|
||||
tree_alloc->addr.address,
|
||||
physical_address,
|
||||
num_entries,
|
||||
tree_alloc->addr.aperture == UVM_APERTURE_VID,
|
||||
gpu_get_internal_pasid(gpu)));
|
||||
gpu_get_internal_pasid(gpu),
|
||||
&dma_address));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("nvUvmInterfaceSetPageDirectory() failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
@@ -1278,6 +1296,9 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
return status;
|
||||
}
|
||||
|
||||
if (tree_alloc->addr.aperture == UVM_APERTURE_SYS)
|
||||
gpu->address_space_tree.pdb_rm_dma_address = uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_address);
|
||||
|
||||
gpu->rm_address_space_moved_to_page_tree = true;
|
||||
|
||||
return NV_OK;
|
||||
@@ -1390,13 +1411,12 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
parent_gpu->egm.enabled = gpu_info->egmEnabled;
|
||||
parent_gpu->egm.local_peer_id = gpu_info->egmPeerId;
|
||||
parent_gpu->egm.base_address = gpu_info->egmBaseAddr;
|
||||
parent_gpu->access_counters_supported = (gpu_info->accessCntrBufferCount != 0);
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceGetFbInfo(parent_gpu->rm_device, &fb_info));
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
parent_gpu->sli_enabled = (gpu_info->subdeviceCount > 1);
|
||||
|
||||
if (!fb_info.bZeroFb)
|
||||
parent_gpu->max_allocatable_address = fb_info.maxAllocatableAddress;
|
||||
|
||||
@@ -1616,7 +1636,7 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
|
||||
// trackers.
|
||||
if (sync_replay_tracker) {
|
||||
uvm_parent_gpu_replayable_faults_isr_lock(parent_gpu);
|
||||
status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.replayable.replay_tracker);
|
||||
status = uvm_tracker_wait(&parent_gpu->fault_buffer.replayable.replay_tracker);
|
||||
uvm_parent_gpu_replayable_faults_isr_unlock(parent_gpu);
|
||||
|
||||
if (status != NV_OK)
|
||||
@@ -1627,7 +1647,7 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
|
||||
// VA block trackers, too.
|
||||
if (sync_clear_faulted_tracker) {
|
||||
uvm_parent_gpu_non_replayable_faults_isr_lock(parent_gpu);
|
||||
status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.non_replayable.clear_faulted_tracker);
|
||||
status = uvm_tracker_wait(&parent_gpu->fault_buffer.non_replayable.clear_faulted_tracker);
|
||||
uvm_parent_gpu_non_replayable_faults_isr_unlock(parent_gpu);
|
||||
|
||||
if (status != NV_OK)
|
||||
@@ -1635,13 +1655,20 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
|
||||
}
|
||||
|
||||
// Sync the access counter clear tracker too.
|
||||
if (parent_gpu->access_counters_supported) {
|
||||
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
|
||||
status = uvm_tracker_wait(&parent_gpu->access_counter_buffer_info.clear_tracker);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
if (parent_gpu->access_counters_supported && parent_gpu->access_counter_buffer) {
|
||||
NvU32 notif_buf_index;
|
||||
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[notif_buf_index];
|
||||
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(status == uvm_global_get_status());
|
||||
if (access_counters->rm_info.accessCntrBufferHandle != 0) {
|
||||
uvm_access_counters_isr_lock(access_counters);
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(status == uvm_global_get_status());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1680,15 +1707,11 @@ static void deinit_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->instance_ptr_table));
|
||||
UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->tsg_table));
|
||||
|
||||
// Access counters should have been disabled when the GPU is no longer
|
||||
// registered in any VA space.
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
|
||||
deinit_parent_procfs_files(parent_gpu);
|
||||
|
||||
// Return ownership to RM
|
||||
uvm_parent_gpu_deinit_isr(parent_gpu);
|
||||
|
||||
deinit_parent_procfs_files(parent_gpu);
|
||||
|
||||
uvm_pmm_devmem_deinit(parent_gpu);
|
||||
uvm_ats_remove_gpu(parent_gpu);
|
||||
|
||||
@@ -1794,14 +1817,14 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
|
||||
switch (fault_entry->fault_access_type)
|
||||
{
|
||||
case UVM_FAULT_ACCESS_TYPE_READ:
|
||||
++parent_gpu->fault_buffer_info.non_replayable.stats.num_read_faults;
|
||||
++parent_gpu->fault_buffer.non_replayable.stats.num_read_faults;
|
||||
break;
|
||||
case UVM_FAULT_ACCESS_TYPE_WRITE:
|
||||
++parent_gpu->fault_buffer_info.non_replayable.stats.num_write_faults;
|
||||
++parent_gpu->fault_buffer.non_replayable.stats.num_write_faults;
|
||||
break;
|
||||
case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
|
||||
case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
|
||||
++parent_gpu->fault_buffer_info.non_replayable.stats.num_atomic_faults;
|
||||
++parent_gpu->fault_buffer.non_replayable.stats.num_atomic_faults;
|
||||
break;
|
||||
default:
|
||||
UVM_ASSERT_MSG(false, "Invalid access type for non-replayable faults\n");
|
||||
@@ -1809,7 +1832,7 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
|
||||
}
|
||||
|
||||
if (!fault_entry->is_virtual)
|
||||
++parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults;
|
||||
++parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults;
|
||||
|
||||
++parent_gpu->stats.num_non_replayable_faults;
|
||||
|
||||
@@ -1821,23 +1844,23 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
|
||||
switch (fault_entry->fault_access_type)
|
||||
{
|
||||
case UVM_FAULT_ACCESS_TYPE_PREFETCH:
|
||||
++parent_gpu->fault_buffer_info.replayable.stats.num_prefetch_faults;
|
||||
++parent_gpu->fault_buffer.replayable.stats.num_prefetch_faults;
|
||||
break;
|
||||
case UVM_FAULT_ACCESS_TYPE_READ:
|
||||
++parent_gpu->fault_buffer_info.replayable.stats.num_read_faults;
|
||||
++parent_gpu->fault_buffer.replayable.stats.num_read_faults;
|
||||
break;
|
||||
case UVM_FAULT_ACCESS_TYPE_WRITE:
|
||||
++parent_gpu->fault_buffer_info.replayable.stats.num_write_faults;
|
||||
++parent_gpu->fault_buffer.replayable.stats.num_write_faults;
|
||||
break;
|
||||
case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
|
||||
case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
|
||||
++parent_gpu->fault_buffer_info.replayable.stats.num_atomic_faults;
|
||||
++parent_gpu->fault_buffer.replayable.stats.num_atomic_faults;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (is_duplicate || fault_entry->filtered)
|
||||
++parent_gpu->fault_buffer_info.replayable.stats.num_duplicate_faults;
|
||||
++parent_gpu->fault_buffer.replayable.stats.num_duplicate_faults;
|
||||
|
||||
++parent_gpu->stats.num_replayable_faults;
|
||||
}
|
||||
@@ -1901,21 +1924,29 @@ static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_
|
||||
|
||||
if (gpu_dst) {
|
||||
atomic64_add(pages, &gpu_dst->parent->stats.num_pages_in);
|
||||
if (is_replayable_fault)
|
||||
atomic64_add(pages, &gpu_dst->parent->fault_buffer_info.replayable.stats.num_pages_in);
|
||||
else if (is_non_replayable_fault)
|
||||
atomic64_add(pages, &gpu_dst->parent->fault_buffer_info.non_replayable.stats.num_pages_in);
|
||||
else if (is_access_counter)
|
||||
atomic64_add(pages, &gpu_dst->parent->access_counter_buffer_info.stats.num_pages_in);
|
||||
if (is_replayable_fault) {
|
||||
atomic64_add(pages, &gpu_dst->parent->fault_buffer.replayable.stats.num_pages_in);
|
||||
}
|
||||
else if (is_non_replayable_fault) {
|
||||
atomic64_add(pages, &gpu_dst->parent->fault_buffer.non_replayable.stats.num_pages_in);
|
||||
}
|
||||
else if (is_access_counter) {
|
||||
NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
|
||||
atomic64_add(pages, &gpu_dst->parent->access_counter_buffer[index].stats.num_pages_in);
|
||||
}
|
||||
}
|
||||
if (gpu_src) {
|
||||
atomic64_add(pages, &gpu_src->parent->stats.num_pages_out);
|
||||
if (is_replayable_fault)
|
||||
atomic64_add(pages, &gpu_src->parent->fault_buffer_info.replayable.stats.num_pages_out);
|
||||
else if (is_non_replayable_fault)
|
||||
atomic64_add(pages, &gpu_src->parent->fault_buffer_info.non_replayable.stats.num_pages_out);
|
||||
else if (is_access_counter)
|
||||
atomic64_add(pages, &gpu_src->parent->access_counter_buffer_info.stats.num_pages_out);
|
||||
if (is_replayable_fault) {
|
||||
atomic64_add(pages, &gpu_src->parent->fault_buffer.replayable.stats.num_pages_out);
|
||||
}
|
||||
else if (is_non_replayable_fault) {
|
||||
atomic64_add(pages, &gpu_src->parent->fault_buffer.non_replayable.stats.num_pages_out);
|
||||
}
|
||||
else if (is_access_counter) {
|
||||
NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
|
||||
atomic64_add(pages, &gpu_src->parent->access_counter_buffer[index].stats.num_pages_out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1929,8 +1960,9 @@ static void uvm_param_conf(void)
|
||||
}
|
||||
else {
|
||||
if (strcmp(uvm_peer_copy, UVM_PARAM_PEER_COPY_PHYSICAL) != 0) {
|
||||
pr_info("Invalid value for uvm_peer_copy = %s, using %s instead.\n",
|
||||
uvm_peer_copy, UVM_PARAM_PEER_COPY_PHYSICAL);
|
||||
UVM_INFO_PRINT("Invalid value for uvm_peer_copy = %s, using %s instead.\n",
|
||||
uvm_peer_copy,
|
||||
UVM_PARAM_PEER_COPY_PHYSICAL);
|
||||
}
|
||||
|
||||
g_uvm_global.peer_copy_mode = UVM_GPU_PEER_COPY_MODE_PHYSICAL;
|
||||
@@ -2397,6 +2429,7 @@ static NV_STATUS peers_init(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *pe
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
UVM_ASSERT(peer_caps->ref_count == 0);
|
||||
|
||||
status = parent_peers_retain(gpu0->parent, gpu1->parent);
|
||||
@@ -2419,25 +2452,13 @@ static NV_STATUS peers_init(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *pe
|
||||
UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
|
||||
UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);
|
||||
|
||||
// In the case of NVLINK peers, this initialization will happen during
|
||||
// add_gpu. As soon as the peer info table is assigned below, the access
|
||||
// counter bottom half could start operating on the GPU being newly
|
||||
// added and inspecting the peer caps, so all of the appropriate
|
||||
// initialization must happen before this point.
|
||||
uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
|
||||
|
||||
uvm_spin_lock(&gpu0->peer_info.peer_gpu_lock);
|
||||
uvm_processor_mask_set(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
|
||||
UVM_ASSERT(gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] == NULL);
|
||||
gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = gpu1;
|
||||
|
||||
uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
|
||||
uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
|
||||
uvm_spin_unlock(&gpu0->peer_info.peer_gpu_lock);
|
||||
|
||||
uvm_spin_lock(&gpu1->peer_info.peer_gpu_lock);
|
||||
uvm_processor_mask_set(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
|
||||
UVM_ASSERT(gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] == NULL);
|
||||
gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = gpu0;
|
||||
|
||||
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
|
||||
uvm_spin_unlock(&gpu1->peer_info.peer_gpu_lock);
|
||||
|
||||
return NV_OK;
|
||||
|
||||
@@ -2465,18 +2486,18 @@ static NV_STATUS peers_retain(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
|
||||
static void peers_destroy(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *peer_caps)
|
||||
{
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
uvm_mmu_destroy_peer_identity_mappings(gpu0, gpu1);
|
||||
uvm_mmu_destroy_peer_identity_mappings(gpu1, gpu0);
|
||||
|
||||
uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
|
||||
uvm_spin_lock(&gpu0->peer_info.peer_gpu_lock);
|
||||
uvm_processor_mask_clear(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
|
||||
gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = NULL;
|
||||
uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
|
||||
uvm_spin_unlock(&gpu0->peer_info.peer_gpu_lock);
|
||||
|
||||
uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
|
||||
uvm_spin_lock(&gpu1->peer_info.peer_gpu_lock);
|
||||
uvm_processor_mask_clear(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
|
||||
gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = NULL;
|
||||
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
|
||||
uvm_spin_unlock(&gpu1->peer_info.peer_gpu_lock);
|
||||
|
||||
// Flush the access counter buffer to avoid getting stale notifications for
|
||||
// accesses to GPUs to which peer access is being disabled. This is also
|
||||
@@ -2690,7 +2711,7 @@ static void remove_gpu(uvm_gpu_t *gpu)
|
||||
uvm_processor_mask_clear(&g_uvm_global.retained_gpus, gpu->id);
|
||||
|
||||
// If the parent is being freed, stop scheduling new bottom halves and
|
||||
// update relevant software state. Else flush any pending bottom halves
|
||||
// update relevant software state. Else flush any pending bottom halves
|
||||
// before continuing.
|
||||
if (free_parent)
|
||||
uvm_parent_gpu_disable_isr(parent_gpu);
|
||||
@@ -2713,6 +2734,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
const UvmGpuInfo *gpu_info,
|
||||
const UvmGpuPlatformInfo *gpu_platform_info,
|
||||
uvm_parent_gpu_t *parent_gpu,
|
||||
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
|
||||
uvm_gpu_t **gpu_out)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@@ -2725,6 +2747,9 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
status = alloc_parent_gpu(gpu_uuid, uvm_parent_gpu_id_from_gpu_id(gpu_id), &parent_gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
if (uvm_enable_builtin_tests)
|
||||
parent_gpu->test = *parent_gpu_error;
|
||||
}
|
||||
|
||||
gpu = alloc_gpu(parent_gpu, gpu_id);
|
||||
@@ -2794,7 +2819,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
// Clear the interrupt bit and force the re-evaluation of the interrupt
|
||||
// condition to ensure that we don't miss any pending interrupt
|
||||
parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
|
||||
parent_gpu->fault_buffer_info.replayable.cached_get);
|
||||
parent_gpu->fault_buffer.replayable.cached_get);
|
||||
}
|
||||
|
||||
// Access counters are enabled on demand
|
||||
@@ -2837,6 +2862,7 @@ error:
|
||||
// the partition.
|
||||
static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
|
||||
const uvm_rm_user_object_t *user_rm_device,
|
||||
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
|
||||
uvm_gpu_t **gpu_out)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@@ -2888,7 +2914,7 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
|
||||
if (status != NV_OK)
|
||||
goto error_unregister;
|
||||
|
||||
status = add_gpu(gpu_uuid, gpu_id, gpu_info, &gpu_platform_info, parent_gpu, &gpu);
|
||||
status = add_gpu(gpu_uuid, gpu_id, gpu_info, &gpu_platform_info, parent_gpu, parent_gpu_error, &gpu);
|
||||
if (status != NV_OK)
|
||||
goto error_unregister;
|
||||
}
|
||||
@@ -2913,11 +2939,12 @@ error_free_gpu_info:
|
||||
|
||||
NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
|
||||
const uvm_rm_user_object_t *user_rm_device,
|
||||
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
|
||||
uvm_gpu_t **gpu_out)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
status = gpu_retain_by_uuid_locked(gpu_uuid, user_rm_device, gpu_out);
|
||||
status = gpu_retain_by_uuid_locked(gpu_uuid, user_rm_device, parent_gpu_error, gpu_out);
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
return status;
|
||||
}
|
||||
@@ -3072,118 +3099,81 @@ bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
|
||||
return (address.address >= gpu->parent->peer_va_base &&
|
||||
address.address < (gpu->parent->peer_va_base + gpu->parent->peer_va_size));
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
phys_addr_t phys_addr;
|
||||
|
||||
if (uvm_aperture_is_peer(address.aperture)) {
|
||||
bool is_peer = true;
|
||||
uvm_parent_processor_mask_t parent_gpus;
|
||||
uvm_parent_gpu_t *parent_peer_gpu;
|
||||
|
||||
// Local EGM accesses don't go over NVLINK
|
||||
if (gpu->parent->egm.enabled && address.aperture == gpu->parent->egm.local_peer_id)
|
||||
return false;
|
||||
|
||||
// EGM uses peer IDs but they are different from VIDMEM peer IDs.
|
||||
// Check if the address aperture is an EGM aperture.
|
||||
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
|
||||
uvm_parent_gpus_from_processor_mask(&parent_gpus, &gpu->peer_info.peer_gpu_mask);
|
||||
uvm_spin_lock(&gpu->peer_info.peer_gpus_lock);
|
||||
for_each_parent_gpu_in_mask(parent_peer_gpu, &parent_gpus) {
|
||||
uvm_aperture_t egm_peer_aperture;
|
||||
|
||||
if (!parent_peer_gpu->egm.enabled)
|
||||
continue;
|
||||
|
||||
egm_peer_aperture = uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu);
|
||||
|
||||
if (address.aperture == egm_peer_aperture) {
|
||||
is_peer = false;
|
||||
break;
|
||||
}
|
||||
// EGM uses peer IDs but they are different from VIDMEM peer
|
||||
// IDs.
|
||||
// Check if the address aperture is an EGM aperture.
|
||||
// We should not use remote EGM addresses internally until
|
||||
// NVLINK STO handling is updated to handle EGM.
|
||||
// TODO: Bug: 5068688 [UVM] Detect STO and prevent data leaks
|
||||
// when accessing EGM memory
|
||||
// TODO: Bug: 5007527 [UVM] Extend STO recovery to EGM enabled
|
||||
// systems
|
||||
UVM_ASSERT(address.aperture != uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu));
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&gpu->peer_info.peer_gpus_lock);
|
||||
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
|
||||
|
||||
return true;
|
||||
} else if (address.aperture == UVM_APERTURE_SYS) {
|
||||
bool is_peer = false;
|
||||
|
||||
// GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
|
||||
// either inline, or via ATS.
|
||||
phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
|
||||
|
||||
// Exposed coherent vidmem can be accessed via sys aperture
|
||||
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
|
||||
for_each_parent_gpu(parent_gpu) {
|
||||
if (parent_gpu == gpu->parent)
|
||||
continue;
|
||||
|
||||
if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
|
||||
phys_addr <= parent_gpu->system_bus.memory_window_end) {
|
||||
is_peer = true;
|
||||
}
|
||||
}
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
return is_peer;
|
||||
}
|
||||
|
||||
if (address.aperture != UVM_APERTURE_SYS)
|
||||
return false;
|
||||
|
||||
// GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
|
||||
// either inline, or via ATS.
|
||||
phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
|
||||
|
||||
// Exposed coherent vidmem can be accessed via sys aperture
|
||||
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
|
||||
for_each_parent_gpu(parent_gpu) {
|
||||
if (parent_gpu == gpu->parent)
|
||||
continue;
|
||||
|
||||
if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
|
||||
phys_addr <= parent_gpu->system_bus.memory_window_end) {
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
UVM_ASSERT(address.aperture == UVM_APERTURE_VID);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_aperture_t uvm_get_page_tree_location(const uvm_gpu_t *gpu)
|
||||
{
|
||||
// See comment in page_tree_set_location
|
||||
if (uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu) || g_uvm_global.conf_computing_enabled)
|
||||
// See comments in page_tree_set_location
|
||||
if (uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent) || g_uvm_global.conf_computing_enabled)
|
||||
return UVM_APERTURE_VID;
|
||||
|
||||
if (!gpu->mem_info.size)
|
||||
return UVM_APERTURE_SYS;
|
||||
|
||||
|
||||
return UVM_APERTURE_DEFAULT;
|
||||
}
|
||||
|
||||
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr)
|
||||
{
|
||||
uvm_processor_id_t id = UVM_ID_INVALID;
|
||||
|
||||
// TODO: Bug 1899622: On P9 systems with multiple CPU sockets, SYS aperture
|
||||
// is also reported for accesses to remote GPUs connected to a different CPU
|
||||
// NUMA domain. We will need to determine the actual processor id using the
|
||||
// reported physical address.
|
||||
if (addr.aperture == UVM_APERTURE_SYS)
|
||||
return UVM_ID_CPU;
|
||||
else if (addr.aperture == UVM_APERTURE_VID)
|
||||
return gpu->id;
|
||||
|
||||
uvm_spin_lock(&gpu->peer_info.peer_gpus_lock);
|
||||
|
||||
for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
|
||||
uvm_gpu_t *other_gpu = gpu->peer_info.peer_gpus[uvm_id_gpu_index(id)];
|
||||
|
||||
UVM_ASSERT(other_gpu);
|
||||
UVM_ASSERT(!uvm_gpus_are_smc_peers(gpu, other_gpu));
|
||||
|
||||
if (uvm_parent_gpus_are_nvswitch_connected(gpu->parent, other_gpu->parent)) {
|
||||
// NVSWITCH connected systems use an extended physical address to
|
||||
// map to peers. Find the physical memory 'slot' containing the
|
||||
// given physical address to find the peer gpu that owns the
|
||||
// physical address
|
||||
NvU64 fabric_window_end = other_gpu->parent->nvswitch_info.fabric_memory_window_start +
|
||||
other_gpu->mem_info.max_allocatable_address;
|
||||
|
||||
if (other_gpu->parent->nvswitch_info.fabric_memory_window_start <= addr.address &&
|
||||
fabric_window_end >= addr.address)
|
||||
break;
|
||||
}
|
||||
else if (uvm_gpu_peer_aperture(gpu, other_gpu) == addr.aperture) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&gpu->peer_info.peer_gpus_lock);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
static NvU64 instance_ptr_to_key(uvm_gpu_phys_address_t instance_ptr)
|
||||
{
|
||||
NvU64 key;
|
||||
@@ -3570,20 +3560,19 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare
|
||||
|
||||
*out_va_space = NULL;
|
||||
*out_gpu = NULL;
|
||||
UVM_ASSERT(entry->address.is_virtual);
|
||||
|
||||
uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
user_channel = instance_ptr_to_user_channel(parent_gpu, entry->virtual_info.instance_ptr);
|
||||
user_channel = instance_ptr_to_user_channel(parent_gpu, entry->instance_ptr);
|
||||
if (!user_channel) {
|
||||
status = NV_ERR_INVALID_CHANNEL;
|
||||
goto exit_unlock;
|
||||
}
|
||||
|
||||
if (!user_channel->in_subctx) {
|
||||
UVM_ASSERT_MSG(entry->virtual_info.ve_id == 0,
|
||||
UVM_ASSERT_MSG(entry->ve_id == 0,
|
||||
"Access counter packet contains SubCTX %u for channel not in subctx\n",
|
||||
entry->virtual_info.ve_id);
|
||||
entry->ve_id);
|
||||
|
||||
gpu_va_space = user_channel->gpu_va_space;
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
|
||||
@@ -3591,7 +3580,7 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare
|
||||
*out_gpu = gpu_va_space->gpu;
|
||||
}
|
||||
else {
|
||||
gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->virtual_info.ve_id);
|
||||
gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->ve_id);
|
||||
if (gpu_va_space) {
|
||||
*out_va_space = gpu_va_space->va_space;
|
||||
*out_gpu = gpu_va_space->gpu;
|
||||
@@ -3638,7 +3627,7 @@ static NvU64 gpu_addr_to_dma_addr(uvm_parent_gpu_t *parent_gpu, NvU64 gpu_addr)
|
||||
// dma_addressable_start (in bifSetupDmaWindow_IMPL()) and hence when
|
||||
// referencing sysmem from the GPU, dma_addressable_start should be
|
||||
// subtracted from the DMA address we get from the OS.
|
||||
static NvU64 dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr)
|
||||
NvU64 uvm_parent_gpu_dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr)
|
||||
{
|
||||
NvU64 gpu_addr = dma_addr - parent_gpu->dma_addressable_start;
|
||||
UVM_ASSERT(dma_addr >= gpu_addr);
|
||||
@@ -3646,32 +3635,40 @@ static NvU64 dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr)
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
void *uvm_parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu, gfp_t gfp_flags, NvU64 *dma_address_out)
|
||||
static void *parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu, gfp_t gfp_flags, NvU64 *dma_address_out)
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
void *cpu_addr;
|
||||
|
||||
cpu_addr = dma_alloc_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, &dma_addr, gfp_flags);
|
||||
|
||||
if (!cpu_addr)
|
||||
return cpu_addr;
|
||||
|
||||
*dma_address_out = dma_addr_to_gpu_addr(parent_gpu, dma_addr);
|
||||
*dma_address_out = uvm_parent_gpu_dma_addr_to_gpu_addr(parent_gpu, dma_addr);
|
||||
atomic64_add(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address)
|
||||
NV_STATUS uvm_gpu_dma_alloc_page(uvm_gpu_t *gpu, gfp_t gfp_flags, void **cpu_addr_out, NvU64 *dma_address_out)
|
||||
{
|
||||
void *cpu_addr = parent_gpu_dma_alloc_page(gpu->parent, gfp_flags, dma_address_out);
|
||||
if (!cpu_addr)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
// TODO: Bug 4868590: Issue GPA invalidate here
|
||||
|
||||
*cpu_addr_out = cpu_addr;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *cpu_addr, NvU64 dma_address)
|
||||
{
|
||||
dma_address = gpu_addr_to_dma_addr(parent_gpu, dma_address);
|
||||
dma_free_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, va, dma_address);
|
||||
dma_free_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, cpu_addr, dma_address);
|
||||
atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu,
|
||||
struct page *page,
|
||||
size_t size,
|
||||
NvU64 *dma_address_out)
|
||||
static NV_STATUS parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
|
||||
@@ -3694,11 +3691,20 @@ NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu,
|
||||
}
|
||||
|
||||
atomic64_add(size, &parent_gpu->mapped_cpu_pages_size);
|
||||
*dma_address_out = dma_addr_to_gpu_addr(parent_gpu, dma_addr);
|
||||
*dma_address_out = uvm_parent_gpu_dma_addr_to_gpu_addr(parent_gpu, dma_addr);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
{
|
||||
NV_STATUS status = parent_gpu_map_cpu_pages(gpu->parent, page, size, dma_address_out);
|
||||
|
||||
// TODO: Bug 4868590: Issue GPA invalidate here
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size)
|
||||
{
|
||||
UVM_ASSERT(PAGE_ALIGNED(size));
|
||||
|
||||
@@ -189,6 +189,9 @@ struct uvm_service_block_context_struct
|
||||
|
||||
// Prefetch temporary state.
|
||||
uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
|
||||
|
||||
// Access counters notification buffer index.
|
||||
NvU32 access_counters_buffer_index;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
@@ -197,8 +200,8 @@ typedef struct
|
||||
{
|
||||
struct
|
||||
{
|
||||
// Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. Used for batching ATS faults in a vma.
|
||||
// Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned
|
||||
// region of a SAM VMA. Used for batching ATS faults in a vma.
|
||||
uvm_page_mask_t prefetch_only_fault_mask;
|
||||
|
||||
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
@@ -350,7 +353,7 @@ typedef struct
|
||||
// entries from the GPU buffer
|
||||
NvU32 max_batch_size;
|
||||
|
||||
struct uvm_replayable_fault_buffer_info_struct
|
||||
struct uvm_replayable_fault_buffer_struct
|
||||
{
|
||||
// Maximum number of faults entries that can be stored in the buffer
|
||||
NvU32 max_faults;
|
||||
@@ -414,7 +417,7 @@ typedef struct
|
||||
uvm_ats_fault_invalidate_t ats_invalidate;
|
||||
} replayable;
|
||||
|
||||
struct uvm_non_replayable_fault_buffer_info_struct
|
||||
struct uvm_non_replayable_fault_buffer_struct
|
||||
{
|
||||
// Maximum number of faults entries that can be stored in the buffer
|
||||
NvU32 max_faults;
|
||||
@@ -468,7 +471,7 @@ typedef struct
|
||||
|
||||
// Timestamp when prefetch faults where disabled last time
|
||||
NvU64 disable_prefetch_faults_timestamp;
|
||||
} uvm_fault_buffer_info_t;
|
||||
} uvm_fault_buffer_t;
|
||||
|
||||
struct uvm_access_counter_service_batch_context_struct
|
||||
{
|
||||
@@ -476,30 +479,14 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
|
||||
NvU32 num_cached_notifications;
|
||||
|
||||
struct
|
||||
{
|
||||
uvm_access_counter_buffer_entry_t **notifications;
|
||||
uvm_access_counter_buffer_entry_t **notifications;
|
||||
|
||||
NvU32 num_notifications;
|
||||
NvU32 num_notifications;
|
||||
|
||||
// Boolean used to avoid sorting the fault batch by instance_ptr if we
|
||||
// determine at fetch time that all the access counter notifications in
|
||||
// the batch report the same instance_ptr
|
||||
bool is_single_instance_ptr;
|
||||
} virt;
|
||||
|
||||
struct
|
||||
{
|
||||
uvm_access_counter_buffer_entry_t **notifications;
|
||||
uvm_reverse_map_t *translations;
|
||||
|
||||
NvU32 num_notifications;
|
||||
|
||||
// Boolean used to avoid sorting the fault batch by aperture if we
|
||||
// determine at fetch time that all the access counter notifications in
|
||||
// the batch report the same aperture
|
||||
bool is_single_aperture;
|
||||
} phys;
|
||||
// Boolean used to avoid sorting the fault batch by instance_ptr if we
|
||||
// determine at fetch time that all the access counter notifications in
|
||||
// the batch report the same instance_ptr
|
||||
bool is_single_instance_ptr;
|
||||
|
||||
// Helper page mask to compute the accessed pages within a VA block
|
||||
uvm_page_mask_t accessed_pages;
|
||||
@@ -514,31 +501,15 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
NvU32 batch_id;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
struct uvm_access_counter_buffer_struct
|
||||
{
|
||||
// Values used to configure access counters in RM
|
||||
struct
|
||||
{
|
||||
UVM_ACCESS_COUNTER_GRANULARITY granularity;
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT use_limit;
|
||||
} rm;
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
|
||||
// The following values are precomputed by the access counter notification
|
||||
// handling code. See comments for UVM_MAX_TRANSLATION_SIZE in
|
||||
// uvm_gpu_access_counters.c for more details.
|
||||
NvU64 translation_size;
|
||||
|
||||
NvU64 translations_per_counter;
|
||||
|
||||
NvU64 sub_granularity_region_size;
|
||||
|
||||
NvU64 sub_granularity_regions_per_translation;
|
||||
} uvm_gpu_access_counter_type_config_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UvmGpuAccessCntrInfo rm_info;
|
||||
|
||||
// Access counters may have multiple notification buffers.
|
||||
NvU32 index;
|
||||
|
||||
NvU32 max_notifications;
|
||||
|
||||
NvU32 max_batch_size;
|
||||
@@ -560,10 +531,22 @@ typedef struct
|
||||
// may override it to try different configuration values.
|
||||
struct
|
||||
{
|
||||
uvm_gpu_access_counter_type_config_t mimc;
|
||||
uvm_gpu_access_counter_type_config_t momc;
|
||||
// Values used to configure access counters in RM
|
||||
struct
|
||||
{
|
||||
UVM_ACCESS_COUNTER_GRANULARITY granularity;
|
||||
} rm;
|
||||
|
||||
NvU32 threshold;
|
||||
// The following values are precomputed by the access counter
|
||||
// notification handling code. See comments for UVM_MAX_TRANSLATION_SIZE
|
||||
// in uvm_gpu_access_counters.c for more details.
|
||||
NvU64 translation_size;
|
||||
|
||||
NvU64 sub_granularity_region_size;
|
||||
|
||||
NvU64 sub_granularity_regions_per_translation;
|
||||
|
||||
NvU32 threshold;
|
||||
} current_config;
|
||||
|
||||
// Access counter statistics
|
||||
@@ -575,7 +558,7 @@ typedef struct
|
||||
} stats;
|
||||
|
||||
// Ignoring access counters means that notifications are left in the HW
|
||||
// buffer without being serviced. Requests to ignore access counters
|
||||
// buffer without being serviced. Requests to ignore access counters
|
||||
// are counted since the suspend path inhibits access counter interrupts,
|
||||
// and the resume path needs to know whether to reenable them.
|
||||
NvU32 notifications_ignored_count;
|
||||
@@ -583,13 +566,25 @@ typedef struct
|
||||
// Context structure used to service a GPU access counter batch
|
||||
uvm_access_counter_service_batch_context_t batch_service_context;
|
||||
|
||||
// VA space that reconfigured the access counters configuration, if any.
|
||||
// Used in builtin tests only, to avoid reconfigurations from different
|
||||
// processes
|
||||
//
|
||||
// Locking: both readers and writers must hold the access counters ISR lock
|
||||
uvm_va_space_t *reconfiguration_owner;
|
||||
} uvm_access_counter_buffer_info_t;
|
||||
struct
|
||||
{
|
||||
// VA space that reconfigured the access counters configuration, if any.
|
||||
// Used in builtin tests only, to avoid reconfigurations from different
|
||||
// processes.
|
||||
//
|
||||
// Locking: both readers and writers must hold the access counters ISR
|
||||
// lock.
|
||||
uvm_va_space_t *reconfiguration_owner;
|
||||
|
||||
// The service access counters loop breaks after processing the first
|
||||
// batch. It will be retriggered if there are pending notifications, but
|
||||
// it releases the ISR service lock to check certain races that would be
|
||||
// difficult to hit otherwise.
|
||||
bool one_iteration_per_batch;
|
||||
NvU32 sleep_per_iteration_us;
|
||||
} test;
|
||||
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -745,15 +740,11 @@ struct uvm_gpu_struct
|
||||
|
||||
struct
|
||||
{
|
||||
// Mask of peer_gpus set
|
||||
// Mask of peer_gpus set.
|
||||
uvm_processor_mask_t peer_gpu_mask;
|
||||
|
||||
// lazily-populated array of peer GPUs, indexed by the peer's GPU index
|
||||
uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];
|
||||
|
||||
// Leaf spinlock used to synchronize access to the peer_gpus table so
|
||||
// that it can be safely accessed from the access counters bottom half
|
||||
uvm_spinlock_t peer_gpus_lock;
|
||||
// Leaf spinlock used to synchronize access to peer_gpu_mask.
|
||||
uvm_spinlock_t peer_gpu_lock;
|
||||
} peer_info;
|
||||
|
||||
// Maximum number of subcontexts supported
|
||||
@@ -957,6 +948,16 @@ struct uvm_gpu_struct
|
||||
uvm_mutex_t device_p2p_lock;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
bool access_counters_alloc_buffer;
|
||||
bool access_counters_alloc_block_context;
|
||||
bool isr_access_counters_alloc;
|
||||
bool isr_access_counters_alloc_stats_cpu;
|
||||
bool access_counters_batch_context_notifications;
|
||||
bool access_counters_batch_context_notification_cache;
|
||||
} uvm_test_parent_gpu_inject_error_t;
|
||||
|
||||
// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
|
||||
// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
|
||||
// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
|
||||
@@ -965,8 +966,8 @@ struct uvm_gpu_struct
|
||||
struct uvm_parent_gpu_struct
|
||||
{
|
||||
// Reference count for how many places are holding on to a parent GPU
|
||||
// (internal to the UVM driver). This includes any GPUs we know about, not
|
||||
// just GPUs that are registered with a VA space. Most GPUs end up being
|
||||
// (internal to the UVM driver). This includes any GPUs we know about, not
|
||||
// just GPUs that are registered with a VA space. Most GPUs end up being
|
||||
// registered, but there are brief periods when they are not registered,
|
||||
// such as during interrupt handling, and in add_gpu() or remove_gpu().
|
||||
nv_kref_t gpu_kref;
|
||||
@@ -976,7 +977,7 @@ struct uvm_parent_gpu_struct
|
||||
|
||||
uvm_gpu_t *gpus[UVM_PARENT_ID_MAX_SUB_PROCESSORS];
|
||||
|
||||
// Bitmap of valid child entries in the gpus[] table. Used to retrieve a
|
||||
// Bitmap of valid child entries in the gpus[] table. Used to retrieve a
|
||||
// usable child GPU in bottom-halves.
|
||||
DECLARE_BITMAP(valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
|
||||
@@ -1000,17 +1001,6 @@ struct uvm_parent_gpu_struct
|
||||
// nvUvmInterfaceUnregisterGpu()).
|
||||
struct pci_dev *pci_dev;
|
||||
|
||||
// NVLINK Processing Unit (NPU) on PowerPC platforms. The NPU is a
|
||||
// collection of CPU-side PCI devices which bridge GPU NVLINKs and the CPU
|
||||
// memory bus.
|
||||
//
|
||||
// There is one PCI device per NVLINK. A set of NVLINKs connects to a single
|
||||
// GPU, and all NVLINKs for a given socket are collected logically under
|
||||
// this UVM NPU because some resources (such as register mappings) are
|
||||
// shared by all those NVLINKs. This means multiple GPUs may connect to the
|
||||
// same UVM NPU.
|
||||
uvm_ibm_npu_t *npu;
|
||||
|
||||
// On kernels with NUMA support, this entry contains the closest CPU NUMA
|
||||
// node to this GPU. Otherwise, the value will be -1.
|
||||
int closest_cpu_numa_node;
|
||||
@@ -1033,13 +1023,12 @@ struct uvm_parent_gpu_struct
|
||||
// dma_addressable_start (in bifSetupDmaWindow_IMPL()) and hence when
|
||||
// referencing sysmem from the GPU, dma_addressable_start should be
|
||||
// subtracted from the physical address. The DMA mapping helpers like
|
||||
// uvm_parent_gpu_map_cpu_pages() and uvm_parent_gpu_dma_alloc_page() take
|
||||
// care of that.
|
||||
// uvm_gpu_map_cpu_pages() and uvm_gpu_dma_alloc_page() take care of that.
|
||||
NvU64 dma_addressable_start;
|
||||
NvU64 dma_addressable_limit;
|
||||
|
||||
// Total size (in bytes) of physically mapped (with
|
||||
// uvm_parent_gpu_map_cpu_pages) sysmem pages, used for leak detection.
|
||||
// uvm_gpu_map_cpu_pages) sysmem pages, used for leak detection.
|
||||
atomic64_t mapped_cpu_pages_size;
|
||||
|
||||
// Hardware Abstraction Layer
|
||||
@@ -1079,9 +1068,9 @@ struct uvm_parent_gpu_struct
|
||||
|
||||
bool access_counters_supported;
|
||||
|
||||
// If this is true, physical address based access counter notifications are
|
||||
// potentially generated. If false, only virtual address based notifications
|
||||
// are generated (assuming access_counters_supported is true too).
|
||||
// TODO: Bug 4637114: [UVM] Remove support for physical access counter
|
||||
// notifications. Always set to false, until we remove the PMM reverse
|
||||
// mapping code.
|
||||
bool access_counters_can_use_physical_addresses;
|
||||
|
||||
bool fault_cancel_va_supported;
|
||||
@@ -1144,6 +1133,13 @@ struct uvm_parent_gpu_struct
|
||||
// Indicates whether the GPU can map sysmem with pages larger than 4k
|
||||
bool can_map_sysmem_with_large_pages;
|
||||
|
||||
// An integrated GPU has no vidmem and coherent access to sysmem. Note
|
||||
// integrated GPUs have a write-back L2 cache (cf. discrete GPUs
|
||||
// write-through cache.)
|
||||
// TODO: Bug 5023085: this should be queried from RM instead of determined
|
||||
// by UVM.
|
||||
bool is_integrated_gpu;
|
||||
|
||||
struct
|
||||
{
|
||||
// If true, the granularity of key rotation is a single channel. If
|
||||
@@ -1205,17 +1201,17 @@ struct uvm_parent_gpu_struct
|
||||
// Interrupt handling state and locks
|
||||
uvm_isr_info_t isr;
|
||||
|
||||
// Fault buffer info. This is only valid if supports_replayable_faults is
|
||||
// set to true.
|
||||
uvm_fault_buffer_info_t fault_buffer_info;
|
||||
// This is only valid if supports_replayable_faults is set to true.
|
||||
uvm_fault_buffer_t fault_buffer;
|
||||
|
||||
// PMM lazy free processing queue.
|
||||
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
|
||||
nv_kthread_q_t lazy_free_q;
|
||||
|
||||
// Access counter buffer info. This is only valid if
|
||||
// supports_access_counters is set to true.
|
||||
uvm_access_counter_buffer_info_t access_counter_buffer_info;
|
||||
// This is only valid if supports_access_counters is set to true. This array
|
||||
// has rm_info.accessCntrBufferCount entries.
|
||||
uvm_access_counter_buffer_t *access_counter_buffer;
|
||||
uvm_mutex_t access_counters_enablement_lock;
|
||||
|
||||
// Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
|
||||
NvU32 utlb_per_gpc_count;
|
||||
@@ -1264,9 +1260,6 @@ struct uvm_parent_gpu_struct
|
||||
uvm_rb_tree_t instance_ptr_table;
|
||||
uvm_spinlock_t instance_ptr_table_lock;
|
||||
|
||||
// This is set to true if the GPU belongs to an SLI group.
|
||||
bool sli_enabled;
|
||||
|
||||
struct
|
||||
{
|
||||
bool supported;
|
||||
@@ -1348,8 +1341,12 @@ struct uvm_parent_gpu_struct
|
||||
// GPUs.
|
||||
NvU64 base_address;
|
||||
} egm;
|
||||
|
||||
uvm_test_parent_gpu_inject_error_t test;
|
||||
};
|
||||
|
||||
NvU64 uvm_parent_gpu_dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr);
|
||||
|
||||
static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return parent_gpu->name;
|
||||
@@ -1395,10 +1392,10 @@ typedef struct
|
||||
// detected to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was
|
||||
// called.
|
||||
//
|
||||
// - The peer_gpus_lock is held on one of the GPUs. In this case, the other
|
||||
// GPU must be read from the original GPU's peer_gpus table. The fields
|
||||
// will not change while the lock is held, but they may no longer be valid
|
||||
// because the other GPU might be in teardown.
|
||||
// - The peer_gpu_lock is held on one of the GPUs. In this case, the other
|
||||
// GPU must be referred from the original GPU's peer_gpu_mask reference.
|
||||
// The fields will not change while the lock is held, but they may no
|
||||
// longer be valid because the other GPU might be in teardown.
|
||||
|
||||
// This field is used to determine when this struct has been initialized
|
||||
// (ref_count != 0). NVLink peers are initialized at GPU registration time.
|
||||
@@ -1510,7 +1507,7 @@ uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
|
||||
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
|
||||
|
||||
// Like uvm_parent_gpu_get_by_uuid(), but this variant does not assertion-check
|
||||
// that the caller is holding the global_lock. This is a narrower-purpose
|
||||
// that the caller is holding the global_lock. This is a narrower-purpose
|
||||
// function, and is only intended for use by the top-half ISR, or other very
|
||||
// limited cases.
|
||||
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);
|
||||
@@ -1521,6 +1518,7 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_u
|
||||
// LOCKING: Takes and releases the global lock for the caller.
|
||||
NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
|
||||
const uvm_rm_user_object_t *user_rm_device,
|
||||
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
|
||||
uvm_gpu_t **gpu_out);
|
||||
|
||||
// Retain a gpu which is known to already be retained. Does NOT require the
|
||||
@@ -1578,10 +1576,6 @@ uvm_gpu_address_t uvm_gpu_peer_copy_address(uvm_gpu_t *owning_gpu, NvU64 address
|
||||
// The two GPUs must have different parents.
|
||||
NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);
|
||||
|
||||
// Get the processor id accessible by the given GPU for the given physical
|
||||
// address.
|
||||
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
|
||||
|
||||
// Get the EGM aperture for local_gpu to use to map memory resident on the CPU
|
||||
// NUMA node that remote_gpu is attached to.
|
||||
// Note that local_gpu can be equal to remote_gpu when memory is resident in
|
||||
@@ -1655,7 +1649,8 @@ static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_
|
||||
|
||||
// Check whether the provided address points to peer memory:
|
||||
// * Physical address using one of the PEER apertures
|
||||
// * Physical address using SYS aperture that belongs to an exposed coherent memory
|
||||
// * Physical address using SYS aperture that belongs to an exposed coherent
|
||||
// memory
|
||||
// * Virtual address in the region [peer_va_base, peer_va_base + peer_va_size)
|
||||
bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address);
|
||||
|
||||
@@ -1684,24 +1679,25 @@ NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu);
|
||||
// Check for NVLINK errors without calling into RM
|
||||
//
|
||||
// Calling into RM is problematic in many places, this check is always safe to
|
||||
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK error
|
||||
// and it's required to call uvm_gpu_check_nvlink_error() to be sure.
|
||||
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK
|
||||
// error and it's required to call uvm_gpu_check_nvlink_error() to be sure.
|
||||
NV_STATUS uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_t *gpu);
|
||||
|
||||
// Map size bytes of contiguous sysmem on the GPU for physical access
|
||||
// Map size bytes of contiguous sysmem on the GPU for physical access.
|
||||
//
|
||||
// size has to be aligned to PAGE_SIZE.
|
||||
//
|
||||
// Returns the physical address of the pages that can be used to access them on
|
||||
// the GPU.
|
||||
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
// the GPU. This address is usable by any GPU under the same parent for the
|
||||
// lifetime of that parent.
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
|
||||
// Unmap num_pages pages previously mapped with uvm_parent_gpu_map_cpu_pages().
|
||||
// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
|
||||
void uvm_parent_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
|
||||
|
||||
static NV_STATUS uvm_parent_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
|
||||
static NV_STATUS uvm_gpu_map_cpu_page(uvm_gpu_t *gpu, struct page *page, NvU64 *dma_address_out)
|
||||
{
|
||||
return uvm_parent_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
|
||||
return uvm_gpu_map_cpu_pages(gpu, page, PAGE_SIZE, dma_address_out);
|
||||
}
|
||||
|
||||
static void uvm_parent_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
|
||||
@@ -1712,16 +1708,15 @@ static void uvm_parent_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dm
|
||||
// Allocate and map a page of system DMA memory on the GPU for physical access
|
||||
//
|
||||
// Returns
|
||||
// - the address of the page that can be used to access them on
|
||||
// the GPU in the dma_address_out parameter.
|
||||
// - the address of allocated memory in CPU virtual address space.
|
||||
void *uvm_parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu,
|
||||
gfp_t gfp_flags,
|
||||
NvU64 *dma_address_out);
|
||||
// - the address of the page that can be used to access them on
|
||||
// the GPU in the dma_address_out parameter. This address is usable by any GPU
|
||||
// under the same parent for the lifetime of that parent.
|
||||
NV_STATUS uvm_gpu_dma_alloc_page(uvm_gpu_t *gpu, gfp_t gfp_flags, void **cpu_addr_out, NvU64 *dma_address_out);
|
||||
|
||||
// Unmap and free size bytes of contiguous sysmem DMA previously allocated
|
||||
// with uvm_parent_gpu_map_cpu_pages().
|
||||
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address);
|
||||
// with uvm_gpu_dma_alloc_page().
|
||||
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *cpu_addr, NvU64 dma_address);
|
||||
|
||||
// Returns whether the given range is within the GPU's addressable VA ranges.
|
||||
// It requires the input 'addr' to be in canonical form for platforms compliant
|
||||
@@ -1742,8 +1737,6 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
// The GPU must be initialized before calling this function.
|
||||
bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
|
||||
bool uvm_platform_uses_canonical_form_address(void);
|
||||
|
||||
// Returns addr's canonical form for host systems that use canonical form
|
||||
// addresses.
|
||||
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
|
||||
@@ -1786,7 +1779,7 @@ static bool uvm_parent_gpu_needs_proxy_channel_pool(const uvm_parent_gpu_t *pare
|
||||
return uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu);
|
||||
}
|
||||
|
||||
uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu);
|
||||
uvm_aperture_t uvm_get_page_tree_location(const uvm_gpu_t *gpu);
|
||||
|
||||
// Add the given instance pointer -> user_channel mapping to this GPU. The
|
||||
// bottom half GPU page fault handler uses this to look up the VA space for GPU
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
Copyright (c) 2017-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -27,11 +27,11 @@
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_test_ioctl.h"
|
||||
|
||||
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
|
||||
void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_service_access_counters(uvm_access_counter_buffer_t *access_counters);
|
||||
|
||||
void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
@@ -46,17 +46,23 @@ void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
|
||||
//
|
||||
// When uningoring, the interrupt conditions will be re-evaluated to trigger
|
||||
// processing of buffered notifications, if any exist.
|
||||
//
|
||||
// All parent_gpu's notifications buffers are affected.
|
||||
void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore);
|
||||
|
||||
// Return whether the VA space has access counter migrations enabled. The
|
||||
// caller must ensure that the VA space cannot go away.
|
||||
bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space);
|
||||
|
||||
// Global perf initialization/cleanup functions
|
||||
// Global access counters initialization/cleanup functions.
|
||||
NV_STATUS uvm_access_counters_init(void);
|
||||
void uvm_access_counters_exit(void);
|
||||
|
||||
// Global perf initialization/cleanup functions.
|
||||
NV_STATUS uvm_perf_access_counters_init(void);
|
||||
void uvm_perf_access_counters_exit(void);
|
||||
|
||||
// VA space Initialization/cleanup functions. See comments in
|
||||
// VA space initialization/cleanup functions. See comments in
|
||||
// uvm_perf_heuristics.h
|
||||
NV_STATUS uvm_perf_access_counters_load(uvm_va_space_t *va_space);
|
||||
void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);
|
||||
@@ -72,17 +78,18 @@ bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
|
||||
// counters are currently enabled. The hardware notifications and interrupts on
|
||||
// the GPU are enabled the first time any VA space invokes
|
||||
// uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
|
||||
// uvm_parent_gpu_access_counters_disable().
|
||||
// uvm_gpu_access_counters_disable().
|
||||
//
|
||||
// Locking: the VA space lock must not be held by the caller since these
|
||||
// functions may take the access counters ISR lock.
|
||||
NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
|
||||
void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu, uvm_va_space_t *va_space);
|
||||
void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
|
||||
|
||||
NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
|
||||
struct file *filp);
|
||||
NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_query_access_counters(UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
|
||||
|
||||
#endif // __UVM_GPU_ACCESS_COUNTERS_H__
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -154,62 +154,73 @@ static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_
|
||||
return 1;
|
||||
}
|
||||
|
||||
static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
|
||||
static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
|
||||
{
|
||||
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
|
||||
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
|
||||
|
||||
// On Volta, accessCntrBufferCount is > 0, but we don't support access
|
||||
// counters in UVM (access_counters_supported is cleared during HAL
|
||||
// initialization.) This check prevents the top-half from accessing
|
||||
// unallocated memory.
|
||||
if (!parent_gpu->access_counters_supported)
|
||||
return 0;
|
||||
|
||||
if (parent_gpu->isr.is_suspended)
|
||||
return 0;
|
||||
|
||||
if (!parent_gpu->isr.access_counters.handling_ref_count)
|
||||
if (!parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count)
|
||||
return 0;
|
||||
|
||||
if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
|
||||
if (down_trylock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem) != 0)
|
||||
return 0;
|
||||
|
||||
if (!uvm_parent_gpu_access_counters_pending(parent_gpu)) {
|
||||
up(&parent_gpu->isr.access_counters.service_lock.sem);
|
||||
if (!uvm_parent_gpu_access_counters_pending(parent_gpu, notif_buf_index)) {
|
||||
up(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
nv_kref_get(&parent_gpu->gpu_kref);
|
||||
|
||||
// Interrupts need to be disabled to avoid an interrupt storm
|
||||
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
|
||||
uvm_access_counters_intr_disable(&parent_gpu->access_counter_buffer[notif_buf_index]);
|
||||
|
||||
nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
|
||||
&parent_gpu->isr.access_counters.bottom_half_q_item);
|
||||
&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// This is called from RM's top-half ISR (see: the nvidia_isr() function), and UVM is given a
|
||||
// chance to handle the interrupt, before most of the RM processing. UVM communicates what it
|
||||
// did, back to RM, via the return code:
|
||||
// This is called from RM's top-half ISR (see: the nvidia_isr() function), and
|
||||
// UVM is given a chance to handle the interrupt, before most of the RM
|
||||
// processing. UVM communicates what it did, back to RM, via the return code:
|
||||
//
|
||||
// NV_OK:
|
||||
// UVM handled an interrupt.
|
||||
//
|
||||
// NV_WARN_MORE_PROCESSING_REQUIRED:
|
||||
// UVM did not schedule a bottom half, because it was unable to get the locks it
|
||||
// needed, but there is still UVM work to be done. RM will return "not handled" to the
|
||||
// Linux kernel, *unless* RM handled other faults in its top half. In that case, the
|
||||
// fact that UVM did not handle its interrupt is lost. However, life and interrupt
|
||||
// processing continues anyway: the GPU will soon raise another interrupt, because
|
||||
// that's what it does when there are replayable page faults remaining (GET != PUT in
|
||||
// the fault buffer).
|
||||
// UVM did not schedule a bottom half, because it was unable to get the
|
||||
// locks it needed, but there is still UVM work to be done. RM will
|
||||
// return "not handled" to the Linux kernel, *unless* RM handled other
|
||||
// faults in its top half. In that case, the fact that UVM did not
|
||||
// handle its interrupt is lost. However, life and interrupt processing
|
||||
// continues anyway: the GPU will soon raise another interrupt, because
|
||||
// that's what it does when there are replayable page faults remaining
|
||||
// (GET != PUT in the fault buffer).
|
||||
//
|
||||
// NV_ERR_NO_INTR_PENDING:
|
||||
// UVM did not find any work to do. Currently this is handled in RM in exactly the same
|
||||
// way as NV_WARN_MORE_PROCESSING_REQUIRED is handled. However, the extra precision is
|
||||
// available for the future. RM's interrupt handling tends to evolve as new chips and
|
||||
// new interrupts get created.
|
||||
// UVM did not find any work to do. Currently this is handled in RM in
|
||||
// exactly the same way as NV_WARN_MORE_PROCESSING_REQUIRED is handled.
|
||||
// However, the extra precision is available for the future. RM's
|
||||
// interrupt handling tends to evolve as new chips and new interrupts
|
||||
// get created.
|
||||
|
||||
static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
unsigned num_handlers_scheduled = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 i;
|
||||
|
||||
if (!in_interrupt() && in_atomic()) {
|
||||
// Early-out if we're not in interrupt context, but memory allocations
|
||||
@@ -243,14 +254,16 @@ static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
|
||||
nv_kref_get(&parent_gpu->gpu_kref);
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
|
||||
// Now that we got a GPU object, lock it so that it can't be removed without us noticing.
|
||||
// Now that we got a GPU object, lock it so that it can't be removed without
|
||||
// us noticing.
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
++parent_gpu->isr.interrupt_count;
|
||||
|
||||
num_handlers_scheduled += schedule_replayable_faults_handler(parent_gpu);
|
||||
num_handlers_scheduled += schedule_non_replayable_faults_handler(parent_gpu);
|
||||
num_handlers_scheduled += schedule_access_counters_handler(parent_gpu);
|
||||
for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++)
|
||||
num_handlers_scheduled += schedule_access_counters_handler(parent_gpu, i);
|
||||
|
||||
if (num_handlers_scheduled == 0) {
|
||||
if (parent_gpu->isr.is_suspended)
|
||||
@@ -288,6 +301,55 @@ static NV_STATUS init_queue_on_node(nv_kthread_q_t *queue, const char *name, int
|
||||
return errno_to_nv_status(nv_kthread_q_init(queue, name));
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_isr_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_block_context_t *block_context;
|
||||
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
|
||||
|
||||
uvm_sema_init(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
|
||||
status = uvm_parent_gpu_init_access_counters(parent_gpu, notif_buf_index);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s, notif buf index: %u\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
notif_buf_index);
|
||||
return status;
|
||||
}
|
||||
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
block_context = uvm_va_block_context_alloc(NULL);
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->access_counter_buffer[notif_buf_index].batch_service_context.block_service_context.block_context =
|
||||
block_context;
|
||||
|
||||
nv_kthread_q_item_init(&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item,
|
||||
access_counters_isr_bottom_half_entry,
|
||||
&parent_gpu->access_counter_buffer[notif_buf_index]);
|
||||
|
||||
// Access counters interrupts are initially disabled. They are
|
||||
// dynamically enabled when the GPU is registered on a VA space.
|
||||
parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count = 0;
|
||||
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc_stats_cpu)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count =
|
||||
uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count) *
|
||||
num_possible_cpus());
|
||||
if (!parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@@ -316,7 +378,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.block_service_context.block_context = block_context;
|
||||
parent_gpu->fault_buffer.replayable.block_service_context.block_context = block_context;
|
||||
|
||||
parent_gpu->isr.replayable_faults.handling = true;
|
||||
|
||||
@@ -344,7 +406,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context = block_context;
|
||||
parent_gpu->fault_buffer.non_replayable.block_service_context.block_context = block_context;
|
||||
|
||||
parent_gpu->isr.non_replayable_faults.handling = true;
|
||||
|
||||
@@ -361,32 +423,31 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
|
||||
if (parent_gpu->access_counters_supported) {
|
||||
status = uvm_parent_gpu_init_access_counters(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return status;
|
||||
NvU32 index_count = parent_gpu->rm_info.accessCntrBufferCount;
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
UVM_ASSERT(index_count > 0);
|
||||
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_buffer)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->access_counter_buffer = uvm_kvmalloc_zero(sizeof(*parent_gpu->access_counter_buffer) *
|
||||
index_count);
|
||||
if (!parent_gpu->access_counter_buffer)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->isr.access_counters = uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters) * index_count);
|
||||
if (!parent_gpu->isr.access_counters)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
for (notif_buf_index = 0; notif_buf_index < index_count; notif_buf_index++) {
|
||||
status = uvm_isr_init_access_counters(parent_gpu, notif_buf_index);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
block_context = uvm_va_block_context_alloc(NULL);
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context =
|
||||
block_context;
|
||||
|
||||
nv_kthread_q_item_init(&parent_gpu->isr.access_counters.bottom_half_q_item,
|
||||
access_counters_isr_bottom_half_entry,
|
||||
parent_gpu);
|
||||
|
||||
// Access counters interrupts are initially disabled. They are
|
||||
// dynamically enabled when the GPU is registered on a VA space.
|
||||
parent_gpu->isr.access_counters.handling_ref_count = 0;
|
||||
parent_gpu->isr.access_counters.stats.cpu_exec_count =
|
||||
uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters.stats.cpu_exec_count) * num_possible_cpus());
|
||||
if (!parent_gpu->isr.access_counters.stats.cpu_exec_count)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -401,7 +462,15 @@ void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
if (parent_gpu->isr.access_counters) {
|
||||
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
UVM_ASSERT_MSG(parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count == 0,
|
||||
"notif buf index: %u\n",
|
||||
notif_buf_index);
|
||||
}
|
||||
}
|
||||
|
||||
// Now that the GPU is safely out of the global table, lock the GPU and mark
|
||||
// it as no longer handling interrupts so the top half knows not to schedule
|
||||
@@ -459,24 +528,38 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
|
||||
if (parent_gpu->access_counters_supported) {
|
||||
// It is safe to deinitialize access counters even if they have not been
|
||||
// successfully initialized.
|
||||
uvm_parent_gpu_deinit_access_counters(parent_gpu);
|
||||
block_context =
|
||||
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
// It is safe to deinitialize access counters even if they have not
|
||||
// been successfully initialized.
|
||||
uvm_parent_gpu_deinit_access_counters(parent_gpu, notif_buf_index);
|
||||
|
||||
if (parent_gpu->access_counter_buffer) {
|
||||
uvm_access_counter_buffer_t *access_counter = &parent_gpu->access_counter_buffer[notif_buf_index];
|
||||
block_context = access_counter->batch_service_context.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
}
|
||||
|
||||
if (parent_gpu->isr.access_counters)
|
||||
uvm_kvfree(parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count);
|
||||
}
|
||||
|
||||
uvm_kvfree(parent_gpu->isr.access_counters);
|
||||
uvm_kvfree(parent_gpu->access_counter_buffer);
|
||||
}
|
||||
|
||||
if (parent_gpu->non_replayable_faults_supported) {
|
||||
block_context = parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context;
|
||||
block_context = parent_gpu->fault_buffer.non_replayable.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
|
||||
uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
|
||||
}
|
||||
|
||||
block_context = parent_gpu->fault_buffer_info.replayable.block_service_context.block_context;
|
||||
block_context = parent_gpu->fault_buffer.replayable.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
|
||||
uvm_kvfree(parent_gpu->isr.replayable_faults.stats.cpu_exec_count);
|
||||
uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
|
||||
uvm_kvfree(parent_gpu->isr.access_counters.stats.cpu_exec_count);
|
||||
}
|
||||
|
||||
uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
@@ -584,25 +667,29 @@ static void non_replayable_faults_isr_bottom_half_entry(void *args)
|
||||
|
||||
static void access_counters_isr_bottom_half(void *args)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
|
||||
uvm_access_counter_buffer_t *access_counters = (uvm_access_counter_buffer_t *)args;
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
unsigned int cpu;
|
||||
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
|
||||
|
||||
uvm_record_lock(&parent_gpu->isr.access_counters.service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
|
||||
uvm_record_lock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
|
||||
|
||||
// Multiple bottom halves for counter notifications can be running
|
||||
// concurrently, but only one can be running this function for a given GPU
|
||||
// since we enter with the access_counters_isr_lock held.
|
||||
// concurrently, but only one per-notification-buffer (i.e.,
|
||||
// notif_buf_index) can be running this function for a given GPU since we
|
||||
// enter with the per-notification-buffer access_counters_isr_lock held.
|
||||
cpu = get_cpu();
|
||||
++parent_gpu->isr.access_counters.stats.bottom_half_count;
|
||||
cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters.stats.cpus_used_mask);
|
||||
++parent_gpu->isr.access_counters.stats.cpu_exec_count[cpu];
|
||||
++parent_gpu->isr.access_counters[notif_buf_index].stats.bottom_half_count;
|
||||
cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters[notif_buf_index].stats.cpus_used_mask);
|
||||
++parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count[cpu];
|
||||
put_cpu();
|
||||
|
||||
uvm_parent_gpu_service_access_counters(parent_gpu);
|
||||
uvm_service_access_counters(access_counters);
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
|
||||
uvm_parent_gpu_kref_put(parent_gpu);
|
||||
}
|
||||
@@ -725,7 +812,7 @@ void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
// clear_replayable_faults is a no-op for architectures that don't
|
||||
// support pulse-based interrupts.
|
||||
parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
|
||||
parent_gpu->fault_buffer_info.replayable.cached_get);
|
||||
parent_gpu->fault_buffer.replayable.cached_get);
|
||||
}
|
||||
|
||||
// This unlock call has to be out-of-order unlock due to interrupts_lock
|
||||
@@ -751,37 +838,41 @@ void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gp
|
||||
uvm_up(&parent_gpu->isr.non_replayable_faults.service_lock);
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
// See comments in uvm_parent_gpu_replayable_faults_isr_lock
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
|
||||
uvm_access_counters_intr_disable(access_counters);
|
||||
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
uvm_down(&parent_gpu->isr.access_counters.service_lock);
|
||||
uvm_down(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
uvm_access_counter_buffer_hal_t *ac_hal = parent_gpu->access_counter_buffer_hal;
|
||||
|
||||
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
|
||||
|
||||
// See comments in uvm_parent_gpu_replayable_faults_isr_unlock
|
||||
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
|
||||
uvm_access_counters_intr_enable(access_counters);
|
||||
|
||||
if (parent_gpu->isr.access_counters.handling_ref_count > 0) {
|
||||
parent_gpu->access_counter_buffer_hal->clear_access_counter_notifications(parent_gpu,
|
||||
parent_gpu->access_counter_buffer_info.cached_get);
|
||||
}
|
||||
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0)
|
||||
ac_hal->clear_access_counter_notifications(access_counters, access_counters->cached_get);
|
||||
|
||||
// This unlock call has to be out-of-order unlock due to interrupts_lock
|
||||
// still being held. Otherwise, it would result in a lock order violation.
|
||||
uvm_up_out_of_order(&parent_gpu->isr.access_counters.service_lock);
|
||||
uvm_up_out_of_order(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);
|
||||
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
}
|
||||
@@ -806,8 +897,11 @@ static void uvm_parent_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *paren
|
||||
parent_gpu->fault_buffer_hal->enable_replayable_faults(parent_gpu);
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
|
||||
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
// The read of handling_ref_count could race with a write from
|
||||
@@ -815,24 +909,27 @@ void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
|
||||
// ISR lock. But those functions are invoked with the interrupt disabled
|
||||
// (disable_intr_ref_count > 0), so the check always returns false when the
|
||||
// race occurs
|
||||
if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
|
||||
parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
|
||||
parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(parent_gpu);
|
||||
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
|
||||
parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
|
||||
parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(access_counters);
|
||||
}
|
||||
|
||||
++parent_gpu->isr.access_counters.disable_intr_ref_count;
|
||||
++parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
|
||||
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters.disable_intr_ref_count > 0);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count > 0);
|
||||
|
||||
--parent_gpu->isr.access_counters.disable_intr_ref_count;
|
||||
--parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;
|
||||
|
||||
if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
|
||||
parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
|
||||
parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(parent_gpu);
|
||||
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
|
||||
parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
|
||||
parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(access_counters);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -70,8 +70,8 @@ typedef struct
|
||||
|
||||
struct
|
||||
{
|
||||
// Number of the bottom-half invocations for this interrupt on a GPU over
|
||||
// its lifetime
|
||||
// Number of the bottom-half invocations for this interrupt on a GPU
|
||||
// over its lifetime.
|
||||
NvU64 bottom_half_count;
|
||||
|
||||
// A bitmask of the CPUs on which the bottom half has executed. The
|
||||
@@ -110,20 +110,20 @@ typedef struct
|
||||
// bottom-half per interrupt type.
|
||||
nv_kthread_q_t bottom_half_q;
|
||||
|
||||
// Protects the state of interrupts (enabled/disabled) and whether the GPU is
|
||||
// currently handling them. Taken in both interrupt and process context.
|
||||
// Protects the state of interrupts (enabled/disabled) and whether the GPU
|
||||
// is currently handling them. Taken in both interrupt and process context.
|
||||
uvm_spinlock_irqsave_t interrupts_lock;
|
||||
|
||||
uvm_intr_handler_t replayable_faults;
|
||||
uvm_intr_handler_t non_replayable_faults;
|
||||
uvm_intr_handler_t access_counters;
|
||||
uvm_intr_handler_t *access_counters;
|
||||
|
||||
// Kernel thread used to kill channels on fatal non-replayable faults.
|
||||
// This is needed because we cannot call into RM from the bottom-half to
|
||||
// avoid deadlocks.
|
||||
nv_kthread_q_t kill_channel_q;
|
||||
|
||||
// Number of top-half ISRs called for this GPU over its lifetime
|
||||
// Number of top-half ISRs called for this GPU over its lifetime.
|
||||
NvU64 interrupt_count;
|
||||
} uvm_isr_info_t;
|
||||
|
||||
@@ -133,7 +133,7 @@ NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid);
|
||||
// Initialize ISR handling state
|
||||
NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Flush any currently scheduled bottom halves. This is called during GPU
|
||||
// Flush any currently scheduled bottom halves. This is called during GPU
|
||||
// removal.
|
||||
void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
@@ -146,7 +146,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Take parent_gpu->isr.replayable_faults.service_lock from a non-top/bottom
|
||||
// half thread. This will also disable replayable page fault interrupts (if
|
||||
// half thread. This will also disable replayable page fault interrupts (if
|
||||
// supported by the GPU) because the top half attempts to take this lock, and we
|
||||
// would cause an interrupt storm if we didn't disable them first.
|
||||
//
|
||||
@@ -154,49 +154,48 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Unlock parent_gpu->isr.replayable_faults.service_lock. This call may
|
||||
// re-enable replayable page fault interrupts. Unlike
|
||||
// uvm_parent_gpu_replayable_faults_isr_lock(), which should only called from
|
||||
// re-enable replayable page fault interrupts. Unlike
|
||||
// uvm_parent_gpu_replayable_faults_isr_lock(), which should only be called from
|
||||
// non-top/bottom half threads, this can be called by any thread.
|
||||
void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Lock/unlock routines for non-replayable faults. These do not need to prevent
|
||||
// interrupt storms since the GPU fault buffers for non-replayable faults are
|
||||
// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
|
||||
// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
|
||||
// under the parent need to have been previously retained.
|
||||
void uvm_parent_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// See uvm_parent_gpu_replayable_faults_isr_lock/unlock
|
||||
void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters);
|
||||
void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters);
|
||||
|
||||
// Increments the reference count tracking whether access counter interrupts
|
||||
// should be disabled. The caller is guaranteed that access counter interrupts
|
||||
// are disabled upon return. Interrupts might already be disabled prior to
|
||||
// making this call. Each call is ref-counted, so this must be paired with a
|
||||
// call to uvm_parent_gpu_access_counters_intr_enable().
|
||||
// call to uvm_access_counters_intr_enable().
|
||||
//
|
||||
// parent_gpu->isr.interrupts_lock must be held to call this function.
|
||||
void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters);
|
||||
|
||||
// Decrements the reference count tracking whether access counter interrupts
|
||||
// should be disabled. Only once the count reaches 0 are the HW interrupts
|
||||
// actually enabled, so this call does not guarantee that the interrupts have
|
||||
// been re-enabled upon return.
|
||||
//
|
||||
// uvm_parent_gpu_access_counters_intr_disable() must have been called prior to
|
||||
// calling this function.
|
||||
// uvm_access_counters_intr_disable() must have been called prior to calling
|
||||
// this function.
|
||||
//
|
||||
// NOTE: For pulse-based interrupts, the caller is responsible for re-arming
|
||||
// the interrupt.
|
||||
//
|
||||
// parent_gpu->isr.interrupts_lock must be held to call this function.
|
||||
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters);
|
||||
|
||||
// Return the first valid GPU given the parent GPU or NULL if no MIG instances
|
||||
// are registered. This should only be called from bottom halves or if the
|
||||
// g_uvm_global.global_lock is held so that the returned pointer remains valid.
|
||||
//
|
||||
uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
#endif // __UVM_GPU_ISR_H__
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
Copyright (c) 2017-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -119,18 +119,18 @@
|
||||
// calling uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults on failure.
|
||||
NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
|
||||
|
||||
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
|
||||
|
||||
non_replayable_faults->shadow_buffer_copy = NULL;
|
||||
non_replayable_faults->fault_cache = NULL;
|
||||
|
||||
non_replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize /
|
||||
non_replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize /
|
||||
parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
|
||||
non_replayable_faults->shadow_buffer_copy =
|
||||
uvm_kvmalloc_zero(parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize);
|
||||
uvm_kvmalloc_zero(parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize);
|
||||
if (!non_replayable_faults->shadow_buffer_copy)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
@@ -147,7 +147,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_
|
||||
|
||||
void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
|
||||
|
||||
if (non_replayable_faults->fault_cache) {
|
||||
UVM_ASSERT(uvm_tracker_is_empty(&non_replayable_faults->clear_faulted_tracker));
|
||||
@@ -170,7 +170,7 @@ bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
UVM_ASSERT(parent_gpu->isr.non_replayable_faults.handling);
|
||||
|
||||
status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
|
||||
status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
|
||||
&has_pending_faults);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
@@ -182,14 +182,14 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
|
||||
NV_STATUS status;
|
||||
NvU32 i;
|
||||
NvU32 entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
|
||||
char *current_hw_entry = (char *)non_replayable_faults->shadow_buffer_copy;
|
||||
uvm_fault_buffer_entry_t *fault_entry = non_replayable_faults->fault_cache;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.non_replayable_faults.service_lock));
|
||||
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
|
||||
|
||||
status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
|
||||
status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
|
||||
current_hw_entry,
|
||||
cached_faults);
|
||||
|
||||
@@ -267,7 +267,7 @@ static NV_STATUS clear_faulted_method_on_gpu(uvm_user_channel_t *user_channel,
|
||||
uvm_gpu_t *gpu = user_channel->gpu;
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
|
||||
|
||||
UVM_ASSERT(!fault_entry->is_fatal);
|
||||
|
||||
@@ -355,7 +355,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t new_residency;
|
||||
bool read_duplicate;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
|
||||
const uvm_va_policy_t *policy;
|
||||
|
||||
UVM_ASSERT(!fault_entry->is_fatal);
|
||||
@@ -450,7 +450,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
|
||||
NV_STATUS status, tracker_status;
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
uvm_gpu_t *gpu = fault_entry->gpu;
|
||||
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.non_replayable.block_service_context;
|
||||
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.non_replayable.block_service_context;
|
||||
|
||||
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
|
||||
service_context->num_retries = 0;
|
||||
@@ -467,7 +467,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
|
||||
service_context,
|
||||
hmm_migratable));
|
||||
|
||||
tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer_info.non_replayable.fault_service_tracker,
|
||||
tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer.non_replayable.fault_service_tracker,
|
||||
&va_block->tracker);
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
@@ -507,7 +507,7 @@ static void schedule_kill_channel(uvm_fault_buffer_entry_t *fault_entry, uvm_use
|
||||
{
|
||||
uvm_va_space_t *va_space = fault_entry->va_space;
|
||||
uvm_parent_gpu_t *parent_gpu = fault_entry->gpu->parent;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
|
||||
void *packet = (char *)non_replayable_faults->shadow_buffer_copy +
|
||||
(fault_entry->non_replayable.buffer_index * parent_gpu->fault_buffer_hal->entry_size(parent_gpu));
|
||||
|
||||
@@ -551,7 +551,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
|
||||
NV_STATUS status = lookup_status;
|
||||
NV_STATUS fatal_fault_status = NV_ERR_INVALID_ADDRESS;
|
||||
@@ -588,7 +588,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
ats_invalidate->tlb_batch_pending = false;
|
||||
|
||||
va_range_next = uvm_va_space_iter_first(va_space, fault_entry->fault_address, ~0ULL);
|
||||
va_range_next = uvm_va_space_iter_gmmu_mappable_first(va_space, fault_entry->fault_address);
|
||||
|
||||
// The VA isn't managed. See if ATS knows about it.
|
||||
vma = find_vma_intersection(mm, fault_address, fault_address + 1);
|
||||
@@ -649,7 +649,7 @@ static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,
|
||||
struct mm_struct *mm;
|
||||
uvm_gpu_va_space_t *gpu_va_space;
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
|
||||
uvm_va_block_context_t *va_block_context = non_replayable_faults->block_service_context.block_context;
|
||||
|
||||
status = uvm_parent_gpu_fault_entry_to_va_space(parent_gpu,
|
||||
@@ -757,7 +757,7 @@ exit_no_channel:
|
||||
static NV_STATUS service_fault(uvm_parent_gpu_t *parent_gpu, uvm_fault_buffer_entry_t *fault_entry)
|
||||
{
|
||||
uvm_service_block_context_t *service_context =
|
||||
&parent_gpu->fault_buffer_info.non_replayable.block_service_context;
|
||||
&parent_gpu->fault_buffer.non_replayable.block_service_context;
|
||||
NV_STATUS status;
|
||||
bool hmm_migratable = true;
|
||||
|
||||
@@ -794,7 +794,7 @@ void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent
|
||||
// non-replayable faults since getting multiple faults on the same
|
||||
// memory region is not very likely
|
||||
for (i = 0; i < cached_faults; ++i) {
|
||||
status = service_fault(parent_gpu, &parent_gpu->fault_buffer_info.non_replayable.fault_cache[i]);
|
||||
status = service_fault(parent_gpu, &parent_gpu->fault_buffer.non_replayable.fault_cache[i]);
|
||||
if (status != NV_OK)
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -119,7 +119,7 @@ module_param(uvm_perf_fault_coalesce, uint, S_IRUGO);
|
||||
// the power management resume path.
|
||||
static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
|
||||
// Read the current get/put pointers, as this might not be the first time
|
||||
// we take control of the fault buffer since the GPU was initialized,
|
||||
@@ -129,7 +129,7 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);
|
||||
|
||||
// (Re-)enable fault prefetching
|
||||
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled)
|
||||
if (parent_gpu->fault_buffer.prefetch_faults_enabled)
|
||||
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
|
||||
else
|
||||
parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
|
||||
@@ -140,28 +140,28 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
|
||||
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize %
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.rm_info.replayable.bufferSize %
|
||||
parent_gpu->fault_buffer_hal->entry_size(parent_gpu) == 0);
|
||||
|
||||
replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize /
|
||||
replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.replayable.bufferSize /
|
||||
parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
|
||||
// Check provided module parameter value
|
||||
parent_gpu->fault_buffer_info.max_batch_size = max(uvm_perf_fault_batch_count,
|
||||
(NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
|
||||
parent_gpu->fault_buffer_info.max_batch_size = min(parent_gpu->fault_buffer_info.max_batch_size,
|
||||
replayable_faults->max_faults);
|
||||
parent_gpu->fault_buffer.max_batch_size = max(uvm_perf_fault_batch_count,
|
||||
(NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
|
||||
parent_gpu->fault_buffer.max_batch_size = min(parent_gpu->fault_buffer.max_batch_size,
|
||||
replayable_faults->max_faults);
|
||||
|
||||
if (parent_gpu->fault_buffer_info.max_batch_size != uvm_perf_fault_batch_count) {
|
||||
pr_info("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_batch_count,
|
||||
UVM_PERF_FAULT_BATCH_COUNT_MIN,
|
||||
replayable_faults->max_faults,
|
||||
parent_gpu->fault_buffer_info.max_batch_size);
|
||||
if (parent_gpu->fault_buffer.max_batch_size != uvm_perf_fault_batch_count) {
|
||||
UVM_INFO_PRINT("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_batch_count,
|
||||
UVM_PERF_FAULT_BATCH_COUNT_MIN,
|
||||
replayable_faults->max_faults,
|
||||
parent_gpu->fault_buffer.max_batch_size);
|
||||
}
|
||||
|
||||
batch_context->fault_cache = uvm_kvmalloc_zero(replayable_faults->max_faults * sizeof(*batch_context->fault_cache));
|
||||
@@ -198,22 +198,22 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
|
||||
UVM_PERF_FAULT_REPLAY_POLICY_DEFAULT;
|
||||
|
||||
if (replayable_faults->replay_policy != uvm_perf_fault_replay_policy) {
|
||||
pr_info("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_replay_policy,
|
||||
replayable_faults->replay_policy);
|
||||
UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_replay_policy,
|
||||
replayable_faults->replay_policy);
|
||||
}
|
||||
|
||||
replayable_faults->replay_update_put_ratio = min(uvm_perf_fault_replay_update_put_ratio, 100u);
|
||||
if (replayable_faults->replay_update_put_ratio != uvm_perf_fault_replay_update_put_ratio) {
|
||||
pr_info("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_replay_update_put_ratio,
|
||||
replayable_faults->replay_update_put_ratio);
|
||||
UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_replay_update_put_ratio,
|
||||
replayable_faults->replay_update_put_ratio);
|
||||
}
|
||||
|
||||
// Re-enable fault prefetching just in case it was disabled in a previous run
|
||||
parent_gpu->fault_buffer_info.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;
|
||||
parent_gpu->fault_buffer.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;
|
||||
|
||||
fault_buffer_reinit_replayable_faults(parent_gpu);
|
||||
|
||||
@@ -222,7 +222,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
|
||||
|
||||
static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
|
||||
|
||||
if (batch_context->fault_cache) {
|
||||
@@ -230,9 +230,9 @@ static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_tracker_deinit(&replayable_faults->replay_tracker);
|
||||
}
|
||||
|
||||
if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
|
||||
if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
|
||||
// Re-enable prefetch faults in case we disabled them
|
||||
if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer_info.prefetch_faults_enabled)
|
||||
if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer.prefetch_faults_enabled)
|
||||
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
|
||||
}
|
||||
|
||||
@@ -252,7 +252,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(parent_gpu->replayable_faults_supported);
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceInitFaultInfo(parent_gpu->rm_device,
|
||||
&parent_gpu->fault_buffer_info.rm_info));
|
||||
&parent_gpu->fault_buffer.rm_info));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init fault buffer info from RM: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
@@ -262,7 +262,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
|
||||
// when it returns an error. Set the buffer handle to zero as it is
|
||||
// used by the deinitialization logic to determine if it was correctly
|
||||
// initialized.
|
||||
parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
|
||||
parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
@@ -304,24 +304,25 @@ void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
fault_buffer_deinit_replayable_faults(parent_gpu);
|
||||
|
||||
if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
|
||||
if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceOwnPageFaultIntr(parent_gpu->rm_device, NV_FALSE));
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
uvm_rm_locked_call_void(nvUvmInterfaceDestroyFaultInfo(parent_gpu->rm_device,
|
||||
&parent_gpu->fault_buffer_info.rm_info));
|
||||
&parent_gpu->fault_buffer.rm_info));
|
||||
|
||||
parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
|
||||
parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
|
||||
UVM_ASSERT(parent_gpu->replayable_faults_supported);
|
||||
|
||||
// Fast path 1: we left some faults unserviced in the buffer in the last pass
|
||||
// Fast path 1: we left some faults unserviced in the buffer in the last
|
||||
// pass
|
||||
if (replayable_faults->cached_get != replayable_faults->cached_put)
|
||||
return true;
|
||||
|
||||
@@ -357,7 +358,7 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer_info.replayable.replay_tracker;
|
||||
uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer.replayable.replay_tracker;
|
||||
|
||||
UVM_ASSERT(tracker != NULL);
|
||||
|
||||
@@ -443,7 +444,7 @@ static NV_STATUS cancel_fault_precise_va(uvm_fault_buffer_entry_t *fault_entry,
|
||||
uvm_gpu_t *gpu = fault_entry->gpu;
|
||||
uvm_gpu_phys_address_t pdb;
|
||||
uvm_push_t push;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
NvU64 offset;
|
||||
|
||||
UVM_ASSERT(gpu->parent->replayable_faults_supported);
|
||||
@@ -452,7 +453,7 @@ static NV_STATUS cancel_fault_precise_va(uvm_fault_buffer_entry_t *fault_entry,
|
||||
|
||||
gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
|
||||
UVM_ASSERT(gpu_va_space);
|
||||
pdb = uvm_page_tree_pdb(&gpu_va_space->page_tables)->addr;
|
||||
pdb = uvm_page_tree_pdb_address(&gpu_va_space->page_tables);
|
||||
|
||||
// Record fatal fault event
|
||||
uvm_tools_record_gpu_fatal_fault(gpu->id, va_space, fault_entry, fault_entry->fatal_reason);
|
||||
@@ -505,7 +506,7 @@ static NV_STATUS push_replay_on_gpu(uvm_gpu_t *gpu,
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
uvm_tracker_t *tracker = NULL;
|
||||
|
||||
if (batch_context)
|
||||
@@ -556,7 +557,7 @@ static NV_STATUS push_replay_on_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
{
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
|
||||
@@ -589,7 +590,7 @@ static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu, hw_f
|
||||
return NV_OK;
|
||||
|
||||
is_flush_mode_move = (NvBool) (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
|
||||
status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer_info.rm_info, is_flush_mode_move);
|
||||
status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer.rm_info, is_flush_mode_move);
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
@@ -618,7 +619,7 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 get;
|
||||
NvU32 put;
|
||||
uvm_spin_loop_t spin;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
@@ -852,7 +853,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_fault_buffer_entry_t *fault_cache;
|
||||
uvm_spin_loop_t spin;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
const bool in_pascal_cancel_path = (!parent_gpu->fault_cancel_va_supported && fetch_mode == FAULT_FETCH_MODE_ALL);
|
||||
const bool may_filter = uvm_perf_fault_coalesce && !in_pascal_cancel_path;
|
||||
|
||||
@@ -887,7 +888,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
// Parse until get != put and have enough space to cache.
|
||||
while ((get != put) &&
|
||||
(fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer_info.max_batch_size)) {
|
||||
(fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer.max_batch_size)) {
|
||||
bool is_same_instance_ptr = true;
|
||||
uvm_fault_buffer_entry_t *current_entry = &fault_cache[fault_index];
|
||||
uvm_fault_utlb_info_t *current_tlb;
|
||||
@@ -1385,7 +1386,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_page_index_t last_page_index;
|
||||
NvU32 page_fault_count = 0;
|
||||
uvm_range_group_range_iter_t iter;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
uvm_fault_buffer_entry_t **ordered_fault_cache = batch_context->ordered_fault_cache;
|
||||
uvm_fault_buffer_entry_t *first_fault_entry = ordered_fault_cache[first_fault_index];
|
||||
uvm_service_block_context_t *block_context = &replayable_faults->block_service_context;
|
||||
@@ -1612,7 +1613,7 @@ static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
|
||||
NV_STATUS status;
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
NV_STATUS tracker_status;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
uvm_service_block_context_t *fault_block_context = &replayable_faults->block_service_context;
|
||||
|
||||
fault_block_context->operation = UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS;
|
||||
@@ -1803,7 +1804,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_page_mask_t *prefetch_only_fault_mask = &ats_context->faults.prefetch_only_fault_mask;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
bool replay_per_va_block =
|
||||
(gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
|
||||
(gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
|
||||
@@ -1851,8 +1852,8 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
page_index = (fault_address - sub_batch_base) / PAGE_SIZE;
|
||||
|
||||
// Do not check for coalesced access type. If there are multiple different
|
||||
// accesses to an address, we can disregard the prefetch one.
|
||||
// Do not check for coalesced access type. If there are multiple
|
||||
// different accesses to an address, we can disregard the prefetch one.
|
||||
if ((access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) &&
|
||||
(uvm_fault_access_type_mask_highest(current_entry->access_type_mask) == UVM_FAULT_ACCESS_TYPE_PREFETCH))
|
||||
uvm_page_mask_set(prefetch_only_fault_mask, page_index);
|
||||
@@ -1956,19 +1957,19 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_va_block_context_t *va_block_context =
|
||||
gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
|
||||
gpu->parent->fault_buffer.replayable.block_service_context.block_context;
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[fault_index];
|
||||
struct mm_struct *mm = va_block_context->mm;
|
||||
NvU64 fault_address = current_entry->fault_address;
|
||||
|
||||
(*block_faults) = 0;
|
||||
|
||||
va_range_next = uvm_va_space_iter_first(va_space, fault_address, ~0ULL);
|
||||
va_range_next = uvm_va_space_iter_gmmu_mappable_first(va_space, fault_address);
|
||||
if (va_range_next && (fault_address >= va_range_next->node.start)) {
|
||||
UVM_ASSERT(fault_address < va_range_next->node.end);
|
||||
|
||||
va_range = va_range_next;
|
||||
va_range_next = uvm_va_space_iter_next(va_range_next, ~0ULL);
|
||||
va_range_next = uvm_va_range_gmmu_mappable_next(va_range);
|
||||
}
|
||||
|
||||
if (va_range)
|
||||
@@ -1985,7 +1986,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
NvU64 outer = ~0ULL;
|
||||
|
||||
UVM_ASSERT(replay_per_va_block ==
|
||||
(gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));
|
||||
(gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));
|
||||
|
||||
// Limit outer to the minimum of next va_range.start and first
|
||||
// fault_address' next UVM_GMMU_ATS_GRANULARITY alignment so that it's
|
||||
@@ -2046,8 +2047,8 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
|
||||
uvm_gpu_t *gpu = batch_context->fatal_gpu;
|
||||
uvm_gpu_va_space_t *gpu_va_space = NULL;
|
||||
struct mm_struct *mm;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.replayable.block_service_context;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.replayable.block_service_context;
|
||||
uvm_va_block_context_t *va_block_context = service_context->block_context;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
@@ -2155,7 +2156,7 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
|
||||
++i;
|
||||
}
|
||||
else {
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer.replayable.ats_invalidate;
|
||||
NvU32 block_faults;
|
||||
const bool hmm_migratable = true;
|
||||
|
||||
@@ -2236,12 +2237,12 @@ static NV_STATUS service_fault_batch(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 i;
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
uvm_gpu_va_space_t *prev_gpu_va_space = NULL;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer_info.replayable.ats_invalidate;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer.replayable.ats_invalidate;
|
||||
struct mm_struct *mm = NULL;
|
||||
const bool replay_per_va_block = service_mode != FAULT_SERVICE_MODE_CANCEL &&
|
||||
parent_gpu->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
|
||||
parent_gpu->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
|
||||
uvm_service_block_context_t *service_context =
|
||||
&parent_gpu->fault_buffer_info.replayable.block_service_context;
|
||||
&parent_gpu->fault_buffer.replayable.block_service_context;
|
||||
uvm_va_block_context_t *va_block_context = service_context->block_context;
|
||||
bool hmm_migratable = true;
|
||||
|
||||
@@ -2711,8 +2712,9 @@ static void cancel_fault_batch(uvm_parent_gpu_t *parent_gpu,
|
||||
// 5- Fetch all faults from buffer
|
||||
// 6- Check what uTLBs are in lockdown mode and can be cancelled
|
||||
// 7- Preprocess faults (order per va_space, fault address, access type)
|
||||
// 8- Service all non-fatal faults and mark all non-serviceable faults as fatal
|
||||
// 6.1- If fatal faults are not found, we are done
|
||||
// 8- Service all non-fatal faults and mark all non-serviceable faults as
|
||||
// fatal.
|
||||
// 8.1- If fatal faults are not found, we are done
|
||||
// 9- Search for a uTLB which can be targeted for cancel, as described in
|
||||
// try_to_cancel_utlbs. If found, cancel it.
|
||||
// END LOOP
|
||||
@@ -2726,14 +2728,14 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
|
||||
{
|
||||
NV_STATUS status;
|
||||
NV_STATUS tracker_status;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
bool first = true;
|
||||
|
||||
UVM_ASSERT(gpu->parent->replayable_faults_supported);
|
||||
|
||||
// 1) Disable prefetching to avoid new requests keep coming and flooding
|
||||
// the buffer
|
||||
if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
|
||||
if (gpu->parent->fault_buffer.prefetch_faults_enabled)
|
||||
gpu->parent->arch_hal->disable_prefetch_faults(gpu->parent);
|
||||
|
||||
while (1) {
|
||||
@@ -2847,7 +2849,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
|
||||
}
|
||||
|
||||
// 10) Re-enable prefetching
|
||||
if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
|
||||
if (gpu->parent->fault_buffer.prefetch_faults_enabled)
|
||||
gpu->parent->arch_hal->enable_prefetch_faults(gpu->parent);
|
||||
|
||||
if (status == NV_OK)
|
||||
@@ -2884,16 +2886,16 @@ static void enable_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu,
|
||||
// comment in mark_fault_invalid_prefetch(..).
|
||||
// Some tests rely on this logic (and ratio) to correctly disable prefetch
|
||||
// fault reporting. If the logic changes, the tests will have to be changed.
|
||||
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled &&
|
||||
if (parent_gpu->fault_buffer.prefetch_faults_enabled &&
|
||||
uvm_perf_reenable_prefetch_faults_lapse_msec > 0 &&
|
||||
((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer_info.max_batch_size * 2) ||
|
||||
((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer.max_batch_size * 2) ||
|
||||
(uvm_enable_builtin_tests &&
|
||||
parent_gpu->rm_info.isSimulated &&
|
||||
batch_context->num_invalid_prefetch_faults > 5))) {
|
||||
uvm_parent_gpu_disable_prefetch_faults(parent_gpu);
|
||||
}
|
||||
else if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
|
||||
NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp;
|
||||
else if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
|
||||
NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer.disable_prefetch_faults_timestamp;
|
||||
|
||||
// Reenable prefetch faults after some time
|
||||
if (lapse > ((NvU64)uvm_perf_reenable_prefetch_faults_lapse_msec * (1000 * 1000)))
|
||||
@@ -2907,7 +2909,7 @@ void uvm_parent_gpu_service_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
NvU32 num_batches = 0;
|
||||
NvU32 num_throttled = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
|
||||
|
||||
UVM_ASSERT(parent_gpu->replayable_faults_supported);
|
||||
@@ -3030,9 +3032,9 @@ void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
|
||||
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
|
||||
|
||||
if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
|
||||
if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
|
||||
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
|
||||
parent_gpu->fault_buffer_info.prefetch_faults_enabled = true;
|
||||
parent_gpu->fault_buffer.prefetch_faults_enabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3041,10 +3043,10 @@ void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
|
||||
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
|
||||
|
||||
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
|
||||
if (parent_gpu->fault_buffer.prefetch_faults_enabled) {
|
||||
parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
|
||||
parent_gpu->fault_buffer_info.prefetch_faults_enabled = false;
|
||||
parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp = NV_GETTIME();
|
||||
parent_gpu->fault_buffer.prefetch_faults_enabled = false;
|
||||
parent_gpu->fault_buffer.disable_prefetch_faults_timestamp = NV_GETTIME();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -792,7 +792,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
|
||||
//
|
||||
// Notably as of 4.3, atomic64_set_release() and atomic64_read_acquire()
|
||||
// have been added that are exactly what we need and could be slightly
|
||||
// faster on arm and powerpc than the implementation below. But at least in
|
||||
// faster on arm than the implementation below. But at least in
|
||||
// 4.3 the implementation looks broken for arm32 (it maps directly to
|
||||
// smp_load_acquire() and that doesn't support 64-bit reads on 32-bit
|
||||
// architectures) so instead of dealing with that just use a slightly bigger
|
||||
|
||||
@@ -217,7 +217,6 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.clear_faulted_channel_method = uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported,
|
||||
.clear_faulted_channel_register = uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported,
|
||||
.access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
|
||||
.access_counter_clear_type = uvm_hal_maxwell_access_counter_clear_type_unsupported,
|
||||
.access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
|
||||
.get_time = uvm_hal_maxwell_get_time,
|
||||
}
|
||||
@@ -254,9 +253,6 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.replay_faults = uvm_hal_volta_replay_faults,
|
||||
.cancel_faults_va = uvm_hal_volta_cancel_faults_va,
|
||||
.clear_faulted_channel_method = uvm_hal_volta_host_clear_faulted_channel_method,
|
||||
.access_counter_clear_all = uvm_hal_volta_access_counter_clear_all,
|
||||
.access_counter_clear_type = uvm_hal_volta_access_counter_clear_type,
|
||||
.access_counter_clear_targeted = uvm_hal_volta_access_counter_clear_targeted,
|
||||
.semaphore_timestamp = uvm_hal_volta_host_semaphore_timestamp,
|
||||
}
|
||||
},
|
||||
@@ -271,6 +267,8 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.tlb_invalidate_all = uvm_hal_turing_host_tlb_invalidate_all,
|
||||
.tlb_invalidate_va = uvm_hal_turing_host_tlb_invalidate_va,
|
||||
.tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test,
|
||||
.access_counter_clear_all = uvm_hal_turing_access_counter_clear_all,
|
||||
.access_counter_clear_targeted = uvm_hal_turing_access_counter_clear_targeted,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -409,6 +407,32 @@ static uvm_hal_class_ops_t arch_table[] =
|
||||
},
|
||||
};
|
||||
|
||||
// chip_table[] is different from the other class op tables - it is used to
|
||||
// apply chip specific overrides to arch ops. This means unlike the other class
|
||||
// op tables, parent_id does not refer to a preceding entry within the table
|
||||
// itself. parent_id is an architecture (not a chip id) and instead refers to an
|
||||
// entry in arch_table[]. This means that arch_table[] must be initialized
|
||||
// before chip_table[]. chip_table[] must be initialized using
|
||||
// ops_init_from_table(arch_table) instead of ops_init_from_parent().
|
||||
// TODO: BUG 5044266: the chip ops should be separated from the arch ops.
|
||||
static uvm_hal_class_ops_t chip_table[] =
|
||||
{
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100 | NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
|
||||
.u.arch_ops = {
|
||||
.mmu_mode_hal = uvm_hal_mmu_mode_blackwell_integrated,
|
||||
}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 | NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200,
|
||||
.u.arch_ops = {
|
||||
.mmu_mode_hal = uvm_hal_mmu_mode_blackwell_integrated,
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
{
|
||||
{
|
||||
@@ -537,22 +561,19 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
|
||||
.u.access_counter_buffer_ops = {
|
||||
.enable_access_counter_notifications = uvm_hal_volta_enable_access_counter_notifications,
|
||||
.disable_access_counter_notifications = uvm_hal_volta_disable_access_counter_notifications,
|
||||
.clear_access_counter_notifications = uvm_hal_volta_clear_access_counter_notifications,
|
||||
.parse_entry = uvm_hal_volta_access_counter_buffer_parse_entry,
|
||||
.entry_is_valid = uvm_hal_volta_access_counter_buffer_entry_is_valid,
|
||||
.entry_clear_valid = uvm_hal_volta_access_counter_buffer_entry_clear_valid,
|
||||
.entry_size = uvm_hal_volta_access_counter_buffer_entry_size,
|
||||
}
|
||||
.u.access_counter_buffer_ops = {}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
|
||||
.u.access_counter_buffer_ops = {
|
||||
.enable_access_counter_notifications = uvm_hal_turing_enable_access_counter_notifications,
|
||||
.disable_access_counter_notifications = uvm_hal_turing_disable_access_counter_notifications,
|
||||
.clear_access_counter_notifications = uvm_hal_turing_clear_access_counter_notifications,
|
||||
.parse_entry = uvm_hal_turing_access_counter_buffer_parse_entry,
|
||||
.entry_is_valid = uvm_hal_turing_access_counter_buffer_entry_is_valid,
|
||||
.entry_clear_valid = uvm_hal_turing_access_counter_buffer_entry_clear_valid,
|
||||
.entry_size = uvm_hal_turing_access_counter_buffer_entry_size,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -675,33 +696,35 @@ static inline void op_copy(uvm_hal_class_ops_t *dst, uvm_hal_class_ops_t *src, N
|
||||
memcpy(m_dst, m_src, sizeof(void *));
|
||||
}
|
||||
|
||||
static inline NV_STATUS ops_init_from_parent(uvm_hal_class_ops_t *table,
|
||||
NvU32 row_count,
|
||||
NvLength op_count,
|
||||
NvLength op_offset)
|
||||
static inline NV_STATUS ops_init_from_table(uvm_hal_class_ops_t *dest_table,
|
||||
NvU32 dest_row_count,
|
||||
uvm_hal_class_ops_t *src_table,
|
||||
NvU32 src_row_count,
|
||||
NvLength op_count,
|
||||
NvLength op_offset)
|
||||
{
|
||||
NvLength i;
|
||||
|
||||
for (i = 0; i < row_count; i++) {
|
||||
for (i = 0; i < dest_row_count; i++) {
|
||||
NvLength j;
|
||||
uvm_hal_class_ops_t *parent = NULL;
|
||||
|
||||
if (table[i].parent_id != 0) {
|
||||
parent = ops_find_by_id(table, i, table[i].parent_id);
|
||||
if (dest_table[i].parent_id != 0) {
|
||||
parent = ops_find_by_id(src_table, src_row_count, dest_table[i].parent_id);
|
||||
if (parent == NULL)
|
||||
return NV_ERR_INVALID_CLASS;
|
||||
|
||||
// Go through all the ops and assign from parent's corresponding op
|
||||
// if NULL
|
||||
for (j = 0; j < op_count; j++) {
|
||||
if (op_is_null(table + i, j, op_offset))
|
||||
op_copy(table + i, parent, j, op_offset);
|
||||
if (op_is_null(dest_table + i, j, op_offset))
|
||||
op_copy(dest_table + i, parent, j, op_offset);
|
||||
}
|
||||
}
|
||||
|
||||
// At this point, it is an error to have missing HAL operations
|
||||
for (j = 0; j < op_count; j++) {
|
||||
if (op_is_null(table + i, j, op_offset))
|
||||
if (op_is_null(dest_table + i, j, op_offset))
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
}
|
||||
@@ -709,6 +732,19 @@ static inline NV_STATUS ops_init_from_parent(uvm_hal_class_ops_t *table,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static inline NV_STATUS ops_init_from_parent(uvm_hal_class_ops_t *table,
|
||||
NvU32 row_count,
|
||||
NvLength op_count,
|
||||
NvLength op_offset)
|
||||
{
|
||||
return ops_init_from_table(table,
|
||||
row_count,
|
||||
table,
|
||||
row_count,
|
||||
op_count,
|
||||
op_offset);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hal_init_table(void)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@@ -737,6 +773,18 @@ NV_STATUS uvm_hal_init_table(void)
|
||||
return status;
|
||||
}
|
||||
|
||||
// chip_table[] must be initialized after arch_table[].
|
||||
status = ops_init_from_table(chip_table,
|
||||
ARRAY_SIZE(chip_table),
|
||||
arch_table,
|
||||
ARRAY_SIZE(arch_table),
|
||||
ARCH_OP_COUNT,
|
||||
offsetof(uvm_hal_class_ops_t, u.arch_ops));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("ops_init_from_table(chip_table) failed: %s\n", nvstatusToString(status));
|
||||
return status;
|
||||
}
|
||||
|
||||
status = ops_init_from_parent(fault_buffer_table,
|
||||
ARRAY_SIZE(fault_buffer_table),
|
||||
FAULT_BUFFER_OP_COUNT,
|
||||
@@ -802,6 +850,13 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->arch_hal = &class_ops->u.arch_ops;
|
||||
|
||||
// Apply per chip overrides if required
|
||||
class_ops = ops_find_by_id(chip_table,
|
||||
ARRAY_SIZE(chip_table),
|
||||
gpu_info->gpuArch | gpu_info->gpuImplementation);
|
||||
if (class_ops)
|
||||
parent_gpu->arch_hal = &class_ops->u.arch_ops;
|
||||
|
||||
class_ops = ops_find_by_id(fault_buffer_table, ARRAY_SIZE(fault_buffer_table), gpu_info->gpuArch);
|
||||
if (class_ops == NULL) {
|
||||
UVM_ERR_PRINT("Fault buffer HAL not found, GPU %s, arch: 0x%X\n",
|
||||
@@ -843,10 +898,14 @@ static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
// Computing.
|
||||
//
|
||||
// TODO: Bug 200692962: Add support for access counters in vGPU
|
||||
if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled) {
|
||||
if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled)
|
||||
parent_gpu->access_counters_supported = false;
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
}
|
||||
|
||||
|
||||
// TODO: Bug 4637114: [UVM] Remove support for physical access counter
|
||||
// notifications. Always set to false, until we remove the PMM reverse
|
||||
// mapping code.
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
}
|
||||
|
||||
void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
@@ -1042,36 +1101,15 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry)
|
||||
UVM_DBG_PRINT(" timestamp: %llu\n", entry->timestamp);
|
||||
}
|
||||
|
||||
const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_ACCESS_COUNTER_TYPE_MAX != 2);
|
||||
|
||||
switch (access_counter_type) {
|
||||
UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MIMC);
|
||||
UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MOMC);
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
}
|
||||
|
||||
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry)
|
||||
{
|
||||
if (!entry->address.is_virtual) {
|
||||
UVM_DBG_PRINT("physical address: {0x%llx:%s}\n",
|
||||
entry->address.address,
|
||||
uvm_aperture_string(entry->address.aperture));
|
||||
}
|
||||
else {
|
||||
UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address.address);
|
||||
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n",
|
||||
entry->virtual_info.instance_ptr.address,
|
||||
uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->virtual_info.mmu_engine_type));
|
||||
UVM_DBG_PRINT(" mmu_engine_id %u\n", entry->virtual_info.mmu_engine_id);
|
||||
UVM_DBG_PRINT(" ve_id %u\n", entry->virtual_info.ve_id);
|
||||
}
|
||||
|
||||
UVM_DBG_PRINT(" is_virtual %u\n", entry->address.is_virtual);
|
||||
UVM_DBG_PRINT(" counter_type %s\n", uvm_access_counter_type_string(entry->counter_type));
|
||||
UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address);
|
||||
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n",
|
||||
entry->instance_ptr.address,
|
||||
uvm_aperture_string(entry->instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->mmu_engine_type));
|
||||
UVM_DBG_PRINT(" mmu_engine_id %u\n", entry->mmu_engine_id);
|
||||
UVM_DBG_PRINT(" ve_id %u\n", entry->ve_id);
|
||||
UVM_DBG_PRINT(" counter_value %u\n", entry->counter_value);
|
||||
UVM_DBG_PRINT(" subgranularity 0x%08x\n", entry->sub_granularity);
|
||||
UVM_DBG_PRINT(" bank %u\n", entry->bank);
|
||||
|
||||
@@ -494,6 +494,7 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell_integrated(NvU64 big_page_size);
|
||||
|
||||
void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
@@ -686,54 +687,52 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry);
|
||||
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry);
|
||||
|
||||
// Access counters
|
||||
typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
|
||||
typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
|
||||
typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters, NvU32 get);
|
||||
|
||||
// Parse the entry on the given buffer index. This also clears the valid bit of
|
||||
// the entry in the buffer.
|
||||
typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_parent_gpu_t *parent_gpu,
|
||||
typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index,
|
||||
uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index);
|
||||
typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index);
|
||||
typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push);
|
||||
typedef void (*uvm_hal_access_counter_clear_type_t)(uvm_push_t *push, uvm_access_counter_type_t type);
|
||||
typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
|
||||
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
|
||||
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
|
||||
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
|
||||
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 get);
|
||||
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index,
|
||||
uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index);
|
||||
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index);
|
||||
NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push);
|
||||
void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type);
|
||||
void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
|
||||
void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 index,
|
||||
uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push);
|
||||
void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type);
|
||||
void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
|
||||
void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
|
||||
void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
|
||||
void uvm_hal_turing_clear_access_counter_notifications(uvm_access_counter_buffer_t *access_counters, NvU32 get);
|
||||
void uvm_hal_turing_access_counter_buffer_parse_entry(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index,
|
||||
uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
bool uvm_hal_turing_access_counter_buffer_entry_is_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
|
||||
void uvm_hal_turing_access_counter_buffer_entry_clear_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
|
||||
NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push);
|
||||
void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
|
||||
// The source and destination addresses must be 16-byte aligned. Note that the
|
||||
// best performance is achieved with 256-byte alignment. The decrypt size must
|
||||
@@ -786,7 +785,6 @@ struct uvm_host_hal_struct
|
||||
uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_method;
|
||||
uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register;
|
||||
uvm_hal_access_counter_clear_all_t access_counter_clear_all;
|
||||
uvm_hal_access_counter_clear_type_t access_counter_clear_type;
|
||||
uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted;
|
||||
uvm_hal_get_time_t get_time;
|
||||
};
|
||||
@@ -866,7 +864,8 @@ struct uvm_sec2_hal_struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// id is either a hardware class or GPU architecture
|
||||
// TODO: BUG 5044266: the chip ops should be separated from the arch ops.
|
||||
// id is either a hardware class, a chip or a GPU architecture
|
||||
NvU32 id;
|
||||
NvU32 parent_id;
|
||||
union
|
||||
@@ -877,7 +876,7 @@ typedef struct
|
||||
// ce_ops: id is a hardware class
|
||||
uvm_ce_hal_t ce_ops;
|
||||
|
||||
// arch_ops: id is an architecture
|
||||
// arch_ops: id is an architecture or a chip
|
||||
uvm_arch_hal_t arch_ops;
|
||||
|
||||
// fault_buffer_ops: id is an architecture
|
||||
|
||||
@@ -471,69 +471,34 @@ static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)
|
||||
return max(membar_1, membar_2);
|
||||
}
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_ACCESS_COUNTER_TYPE_MIMC = 0,
|
||||
UVM_ACCESS_COUNTER_TYPE_MOMC,
|
||||
|
||||
UVM_ACCESS_COUNTER_TYPE_MAX,
|
||||
} uvm_access_counter_type_t;
|
||||
|
||||
const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type);
|
||||
|
||||
struct uvm_access_counter_buffer_entry_struct
|
||||
{
|
||||
// Whether this counter refers to outbound accesses to remote GPUs or
|
||||
// sysmem (MIMC), or it refers to inbound accesses from CPU or a non-peer
|
||||
// GPU (whose accesses are routed through the CPU, too) to vidmem (MOMC)
|
||||
uvm_access_counter_type_t counter_type;
|
||||
|
||||
// Address of the region for which a notification was sent
|
||||
uvm_gpu_address_t address;
|
||||
NvU64 address;
|
||||
|
||||
union
|
||||
{
|
||||
// These fields are only valid if address.is_virtual is true
|
||||
struct
|
||||
{
|
||||
// Instance pointer of one of the channels in the TSG that triggered
|
||||
// the notification.
|
||||
uvm_gpu_phys_address_t instance_ptr;
|
||||
// Instance pointer of one of the channels in the TSG that triggered
|
||||
// the notification.
|
||||
uvm_gpu_phys_address_t instance_ptr;
|
||||
|
||||
uvm_mmu_engine_type_t mmu_engine_type;
|
||||
uvm_mmu_engine_type_t mmu_engine_type;
|
||||
|
||||
NvU32 mmu_engine_id;
|
||||
NvU32 mmu_engine_id;
|
||||
|
||||
// Identifier of the subcontext that performed the memory accesses
|
||||
// that triggered the notification. This value, combined with the
|
||||
// instance_ptr, is needed to obtain the GPU VA space of the process
|
||||
// that triggered the notification.
|
||||
NvU32 ve_id;
|
||||
// Identifier of the subcontext that performed the memory accesses
|
||||
// that triggered the notification. This value, combined with the
|
||||
// instance_ptr, is needed to obtain the GPU VA space of the process
|
||||
// that triggered the notification.
|
||||
NvU32 ve_id;
|
||||
|
||||
// VA space for the address that triggered the notification
|
||||
uvm_va_space_t *va_space;
|
||||
} virtual_info;
|
||||
// VA space for the address that triggered the notification
|
||||
uvm_va_space_t *va_space;
|
||||
|
||||
// These fields are only valid if address.is_virtual is false
|
||||
struct
|
||||
{
|
||||
// Processor id where data is resident
|
||||
//
|
||||
// Although this information is not tied to a VA space, we can use
|
||||
// a regular processor id because P2P is not allowed between
|
||||
// partitioned GPUs.
|
||||
uvm_processor_id_t resident_id;
|
||||
|
||||
} physical_info;
|
||||
};
|
||||
|
||||
// This is the GPU that triggered the notification. Note that physical
|
||||
// address based notifications are only supported on non-MIG-capable GPUs.
|
||||
// This is the GPU that triggered the notification.
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
// Number of times the tracked region was accessed since the last time it
|
||||
// was cleared. Counter values saturate at the maximum value supported by
|
||||
// the GPU (2^16 - 1 in Volta)
|
||||
// the GPU (2^16 - 1 on Turing)
|
||||
NvU32 counter_value;
|
||||
|
||||
// When the granularity of the tracked regions is greater than 64KB, the
|
||||
|
||||
@@ -34,8 +34,9 @@ MODULE_PARM_DESC(uvm_disable_hmm,
|
||||
"enabled if is not supported in this driver build "
|
||||
"configuration, or if ATS settings conflict with HMM.");
|
||||
#else
|
||||
// So far, we've only tested HMM on x86_64, so disable it by default everywhere
|
||||
// else.
|
||||
// TODO: Bug 4103580: UVM: HMM: implement HMM support on ARM64 (aarch64)
|
||||
// So far, we've only tested HMM on x86_64 and aarch64 and it is broken on
|
||||
// aarch64 so disable it by default everywhere except x86_64.
|
||||
static bool uvm_disable_hmm = true;
|
||||
MODULE_PARM_DESC(uvm_disable_hmm,
|
||||
"Force-disable HMM functionality in the UVM driver. "
|
||||
@@ -186,7 +187,7 @@ static NV_STATUS hmm_copy_devmem_page(struct page *dst_page, struct page *src_pa
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
status = uvm_parent_gpu_map_cpu_pages(gpu->parent, dst_page, PAGE_SIZE, &dma_addr);
|
||||
status = uvm_gpu_map_cpu_page(gpu, dst_page, &dma_addr);
|
||||
if (status != NV_OK)
|
||||
goto out_unmap_gpu;
|
||||
|
||||
|
||||
@@ -50,12 +50,10 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_hopper_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
|
||||
(sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Hopper covers 64 PB and that's the minimum
|
||||
@@ -97,10 +95,6 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
|
||||
@@ -393,9 +393,13 @@ bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push,
|
||||
bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
const bool peer_copy = uvm_gpu_address_is_peer(gpu, dst) || uvm_gpu_address_is_peer(gpu, src);
|
||||
|
||||
if (push->channel && peer_copy && !uvm_channel_is_p2p(push->channel)) {
|
||||
if (uvm_gpu_address_is_peer(gpu, src)) {
|
||||
UVM_ERR_PRINT("Peer copy from peer address (0x%llx) is not allowed!", src.address);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (push->channel && uvm_gpu_address_is_peer(gpu, dst) && !uvm_channel_is_p2p(push->channel)) {
|
||||
UVM_ERR_PRINT("Peer copy from address (0x%llx) to address (0x%llx) should use designated p2p channels!",
|
||||
src.address,
|
||||
dst.address);
|
||||
|
||||
@@ -212,7 +212,13 @@ static NvU64 make_pte_hopper(uvm_aperture_t aperture, NvU64 address, uvm_prot_t
|
||||
|
||||
static NvU64 make_sked_reflected_pte_hopper(void)
|
||||
{
|
||||
// On discrete GPUs, SKED Reflected PTEs may use either the local aperture
|
||||
// or the system non coherent aperture. However, integrated GPUs may only
|
||||
// use the system non-coherent aperture. We always use the system
|
||||
// non-coherent aperture as that is common to both discrete and integrated
|
||||
// GPUs.
|
||||
return HWCONST64(_MMU_VER3, PTE, VALID, TRUE) |
|
||||
HWCONST64(_MMU_VER3, PTE, APERTURE, SYSTEM_NON_COHERENT_MEMORY) |
|
||||
HWVALUE64(_MMU_VER3, PTE, PCF, pte_pcf(UVM_PROT_READ_WRITE_ATOMIC, UVM_MMU_PTE_FLAGS_NONE)) |
|
||||
HWVALUE64(_MMU_VER3, PTE, KIND, NV_MMU_PTE_KIND_SMSKED_MESSAGE);
|
||||
}
|
||||
@@ -323,11 +329,6 @@ static NvU32 pde_pcf(bool valid, pde_type_t pde_type, uvm_page_directory_t *dir,
|
||||
if (!g_uvm_global.ats.enabled)
|
||||
return pcf[pde_type][ATS_ALLOWED];
|
||||
|
||||
// We assume all supported ATS platforms use canonical form address.
|
||||
// See comments in uvm_gpu.c:uvm_gpu_can_address() and in
|
||||
// uvm_mmu.c:page_tree_ats_init();
|
||||
UVM_ASSERT(uvm_platform_uses_canonical_form_address());
|
||||
|
||||
// Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
|
||||
// ATS and GMMU page tables. For managed memory we need to prevent this
|
||||
// parallel lookup since we would not get any GPU fault if the CPU has
|
||||
|
||||
@@ -526,25 +526,6 @@ typedef struct
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_MEM_MAP_PARAMS;
|
||||
|
||||
//
|
||||
// UvmDebugAccessMemory
|
||||
//
|
||||
#define UVM_DEBUG_ACCESS_MEMORY UVM_IOCTL_BASE(36)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#ifdef __linux__
|
||||
NvS32 sessionIndex; // IN
|
||||
#endif
|
||||
NvU64 baseAddress NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 sizeInBytes NV_ALIGN_BYTES(8); // IN
|
||||
NvU32 accessType; // IN (UvmDebugAccessType)
|
||||
NvU64 buffer NV_ALIGN_BYTES(8); // IN/OUT
|
||||
NvBool isBitmaskSet; // OUT
|
||||
NvU64 bitmask NV_ALIGN_BYTES(8); // IN/OUT
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_DEBUG_ACCESS_MEMORY_PARAMS;
|
||||
|
||||
//
|
||||
// UvmRegisterGpu
|
||||
//
|
||||
@@ -1009,20 +990,35 @@ typedef struct
|
||||
//
|
||||
#define UVM_POPULATE_PAGEABLE UVM_IOCTL_BASE(71)
|
||||
|
||||
// Allow population of managed ranges.
|
||||
//
|
||||
// The UVM driver must have builtin tests enabled for the API to use the
|
||||
// following two flags.
|
||||
// Allow population of managed ranges. The goal is to validate that it is
|
||||
// possible to populate pageable ranges backed by VMAs with the VM_MIXEDMAP or
|
||||
// VM_DONTEXPAND special flags set. But since there is no portable way to force
|
||||
// allocation of such memory from user space, and it is not safe to change the
|
||||
// flags of an already-created VMA from kernel space, we take advantage of the
|
||||
// fact that managed ranges have both special flags set at creation time (see
|
||||
// uvm_mmap).
|
||||
#define UVM_POPULATE_PAGEABLE_FLAG_ALLOW_MANAGED 0x00000001
|
||||
|
||||
// By default UVM_POPULATE_PAGEABLE returns an error if the destination vma
|
||||
// does not have read permission. This flag skips that check.
|
||||
#define UVM_POPULATE_PAGEABLE_FLAG_SKIP_PROT_CHECK 0x00000002
|
||||
|
||||
#define UVM_POPULATE_PAGEABLE_FLAGS_TEST_ALL (UVM_POPULATE_PAGEABLE_FLAG_ALLOW_MANAGED | \
|
||||
// By default UVM_POPULATE_PAGEABLE returns an error if the destination vma
|
||||
// is VM_IO or VM_PFNMAP. This flag skips that check.
|
||||
#define UVM_POPULATE_PAGEABLE_FLAG_ALLOW_SPECIAL 0x00000004
|
||||
|
||||
// These flags are used internally within the driver and are not allowed from
|
||||
// user space.
|
||||
#define UVM_POPULATE_PAGEABLE_FLAGS_INTERNAL UVM_POPULATE_PAGEABLE_FLAG_ALLOW_SPECIAL
|
||||
|
||||
// These flags are allowed from user space only when builtin tests are enabled.
|
||||
// Some of them may also be used internally within the driver in non-test use
|
||||
// cases.
|
||||
#define UVM_POPULATE_PAGEABLE_FLAGS_TEST (UVM_POPULATE_PAGEABLE_FLAG_ALLOW_MANAGED | \
|
||||
UVM_POPULATE_PAGEABLE_FLAG_SKIP_PROT_CHECK)
|
||||
|
||||
#define UVM_POPULATE_PAGEABLE_FLAGS_ALL UVM_POPULATE_PAGEABLE_FLAGS_TEST_ALL
|
||||
#define UVM_POPULATE_PAGEABLE_FLAGS_ALL (UVM_POPULATE_PAGEABLE_FLAGS_INTERNAL | \
|
||||
UVM_POPULATE_PAGEABLE_FLAGS_TEST)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -1142,7 +1138,6 @@ typedef struct
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_IS_8_SUPPORTED_PARAMS;
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2020 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -111,13 +111,13 @@ void uvm_kvmalloc_exit(void)
|
||||
return;
|
||||
|
||||
if (atomic_long_read(&g_uvm_leak_checker.bytes_allocated) > 0) {
|
||||
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
|
||||
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "Memory leak of %lu bytes detected.%s\n",
|
||||
atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
|
||||
uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
|
||||
UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
|
||||
UVM_INFO_PRINT("Memory leak of %lu bytes detected.%s\n",
|
||||
atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
|
||||
uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
|
||||
" insmod with uvm_leak_checker=2 for detailed information." :
|
||||
"");
|
||||
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
|
||||
UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
|
||||
|
||||
if (g_uvm_global.unload_state.ptr)
|
||||
*g_uvm_global.unload_state.ptr |= UVM_TEST_UNLOAD_STATE_MEMORY_LEAK;
|
||||
@@ -129,12 +129,12 @@ void uvm_kvmalloc_exit(void)
|
||||
uvm_rb_tree_for_each_safe(node, next, &g_uvm_leak_checker.allocation_info) {
|
||||
uvm_kvmalloc_info_t *info = container_of(node, uvm_kvmalloc_info_t, node);
|
||||
|
||||
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX " Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
|
||||
uvm_kvsize((void *)((uintptr_t)info->node.key)),
|
||||
kbasename(info->file),
|
||||
info->line,
|
||||
info->function,
|
||||
info->node.key);
|
||||
UVM_INFO_PRINT(" Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
|
||||
uvm_kvsize((void *)((uintptr_t)info->node.key)),
|
||||
kbasename(info->file),
|
||||
info->line,
|
||||
info->function,
|
||||
info->node.key);
|
||||
|
||||
// Free so we don't keep eating up memory while debugging. Note that
|
||||
// this also removes the entry from the table, frees info, and drops
|
||||
|
||||
@@ -76,14 +76,16 @@
|
||||
#include <linux/ratelimit.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_PNV_NPU2_INIT_CONTEXT_PRESENT)
|
||||
#include <asm/powernv.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_LINUX_SCHED_TASK_STACK_H_PRESENT)
|
||||
#include <linux/sched/task_stack.h>
|
||||
#endif
|
||||
|
||||
#include "linux/bitmap.h"
|
||||
#include "linux/bitops.h"
|
||||
#include "linux/gfp.h"
|
||||
#include "linux/pagemap.h"
|
||||
#include "linux/types.h"
|
||||
|
||||
#if !defined(NV_SG_DMA_PAGE_ITER_PRESENT)
|
||||
#include <linux/scatterlist.h>
|
||||
#endif
|
||||
@@ -183,7 +185,7 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
printk(fmt, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define NV_UVM_GFP_FLAGS (GFP_KERNEL)
|
||||
#define NV_UVM_GFP_FLAGS (GFP_KERNEL | __GFP_NOMEMALLOC)
|
||||
|
||||
#if defined(NVCPU_X86)
|
||||
/* Some old IA32 kernels don't have 64/64 division routines,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -27,12 +27,13 @@
|
||||
|
||||
const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 36);
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 37);
|
||||
|
||||
switch (lock_order) {
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL_PM);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ACCESS_COUNTERS);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ISR);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_MMAP_LOCK);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACES_LIST);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -69,6 +69,17 @@
|
||||
//
|
||||
// This should be taken whenever global GPU state might need to be modified.
|
||||
//
|
||||
// - Access counters VA space enablement state lock
|
||||
// Order: UVM_LOCK_ORDER_ACCESS_COUNTERS
|
||||
// Exclusive lock (mutex)
|
||||
//
|
||||
// This protects VA space state associated with access counters enablement.
|
||||
// Blackwell+ GPUs may have multiple access counters notification buffers
|
||||
// and their "atomic" enablement is protected by this lock.
|
||||
//
|
||||
// This should be taken whenever VA space access counters state might need
|
||||
// to be modified.
|
||||
//
|
||||
// - GPU ISR lock
|
||||
// Order: UVM_LOCK_ORDER_ISR
|
||||
// Exclusive lock (mutex) per gpu
|
||||
@@ -487,6 +498,7 @@ typedef enum
|
||||
UVM_LOCK_ORDER_INVALID = 0,
|
||||
UVM_LOCK_ORDER_GLOBAL_PM,
|
||||
UVM_LOCK_ORDER_GLOBAL,
|
||||
UVM_LOCK_ORDER_ACCESS_COUNTERS,
|
||||
UVM_LOCK_ORDER_ISR,
|
||||
UVM_LOCK_ORDER_MMAP_LOCK,
|
||||
UVM_LOCK_ORDER_VA_SPACES_LIST,
|
||||
@@ -742,7 +754,8 @@ bool __uvm_locking_initialized(void);
|
||||
ret; \
|
||||
})
|
||||
|
||||
// Helper for calling a UVM-RM interface function that returns void with lock recording
|
||||
// Helper for calling a UVM-RM interface function that returns void with lock
|
||||
// recording
|
||||
#define uvm_rm_locked_call_void(call) ({ \
|
||||
uvm_record_lock_rm_all(); \
|
||||
call; \
|
||||
|
||||
@@ -947,13 +947,15 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_external_t *ext
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Check for the maximum page size for the mapping of vidmem allocations,
|
||||
// the vMMU segment size may limit the range of page sizes.
|
||||
biggest_mapping_page_size = uvm_mmu_biggest_page_size_up_to(&gpu_va_space->page_tables,
|
||||
mapping_gpu->mem_info.max_vidmem_page_size);
|
||||
if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
|
||||
(mapping_page_size > biggest_mapping_page_size))
|
||||
mapping_page_size = biggest_mapping_page_size;
|
||||
if (mapping_gpu->mem_info.size) {
|
||||
// Check for the maximum page size for the mapping of vidmem
|
||||
// allocations, the vMMU segment size may limit the range of page sizes.
|
||||
biggest_mapping_page_size = uvm_mmu_biggest_page_size_up_to(&gpu_va_space->page_tables,
|
||||
mapping_gpu->mem_info.max_vidmem_page_size);
|
||||
if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
|
||||
(mapping_page_size > biggest_mapping_page_size))
|
||||
mapping_page_size = biggest_mapping_page_size;
|
||||
}
|
||||
|
||||
mem_info.pageSize = mapping_page_size;
|
||||
|
||||
|
||||
@@ -61,10 +61,6 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = false;
|
||||
|
||||
parent_gpu->access_counters_supported = false;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = false;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = false;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2021-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -24,25 +24,29 @@
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_hal.h"
|
||||
|
||||
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
UVM_ASSERT_MSG(false,
|
||||
"enable_access_counter_notifications is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
"enable_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
|
||||
uvm_parent_gpu_name(access_counters->parent_gpu),
|
||||
access_counters->index);
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
UVM_ASSERT_MSG(false,
|
||||
"disable_access_counter_notifications is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
"disable_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
|
||||
uvm_parent_gpu_name(access_counters->parent_gpu),
|
||||
access_counters->index);
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 get)
|
||||
{
|
||||
UVM_ASSERT_MSG(false,
|
||||
"clear_access_counter_notifications is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
"clear_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
|
||||
uvm_parent_gpu_name(access_counters->parent_gpu),
|
||||
access_counters->index);
|
||||
}
|
||||
|
||||
NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu)
|
||||
@@ -53,26 +57,31 @@ NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gp
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index)
|
||||
{
|
||||
UVM_ASSERT_MSG(false,
|
||||
"access_counter_buffer_entry_is_valid is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
"access_counter_buffer_entry_is_valid is not supported on GPU: %s notif buf index: %u.\n",
|
||||
uvm_parent_gpu_name(access_counters->parent_gpu),
|
||||
access_counters->index);
|
||||
return false;
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index)
|
||||
{
|
||||
UVM_ASSERT_MSG(false,
|
||||
"access_counter_buffer_entry_clear_valid is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
"access_counter_buffer_entry_clear_valid is not supported on GPU: %s notif buf index: %u.\n",
|
||||
uvm_parent_gpu_name(access_counters->parent_gpu),
|
||||
access_counters->index);
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
|
||||
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index,
|
||||
uvm_access_counter_buffer_entry_t *buffer_entry)
|
||||
{
|
||||
UVM_ASSERT_MSG(false,
|
||||
"access_counter_buffer_parse_entry is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
"access_counter_buffer_parse_entry is not supported on GPU: %s notif buf index: %u.\n",
|
||||
uvm_parent_gpu_name(access_counters->parent_gpu),
|
||||
access_counters->index);
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user