580.65.06

This commit is contained in:
Maneet Singh
2025-08-04 11:15:02 -07:00
parent d890313300
commit 307159f262
1315 changed files with 477791 additions and 279973 deletions

View File

@@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules,
version 575.64.05.
version 580.65.06.
## How to Build
@@ -17,7 +17,7 @@ as root:
Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
575.64.05 driver release. This can be achieved by installing
580.65.06 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,
@@ -185,7 +185,7 @@ table below).
For details on feature support and limitations, see the NVIDIA GPU driver
end user README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/575.64.05/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/580.65.06/README/kernel_open.html
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
Package for more details.

View File

@@ -8,7 +8,7 @@
# NV_KERNEL_SOURCES : The root of the kernel source tree.
# NV_KERNEL_OUTPUT : The kernel's output tree.
# NV_KERNEL_MODULES : A whitespace-separated list of modules to build.
# ARCH : The target CPU architecture: x86_64|arm64|powerpc
# ARCH : The target CPU architecture: x86_64|arm64
#
# Kbuild provides the variables:
#
@@ -79,12 +79,22 @@ ccflags-y += -I$(src)/common/inc
ccflags-y += -I$(src)
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
ccflags-y += -DNV_VERSION_STRING=\"575.64.05\"
ccflags-y += -DNV_VERSION_STRING=\"580.65.06\"
# Include and link Tegra out-of-tree modules.
ifneq ($(wildcard /usr/src/nvidia/nvidia-oot),)
SYSSRCNVOOT ?= /usr/src/nvidia/nvidia-oot
endif
ifneq ($(SYSSRCHOST1X),)
ccflags-y += -I$(SYSSRCHOST1X)
endif
ifneq ($(SYSSRCNVOOT),)
ccflags-y += -I$(SYSSRCNVOOT)/include
KBUILD_EXTRA_SYMBOLS = $(SYSSRCNVOOT)/Module.symvers
endif
# Some Android kernels prohibit driver use of filesystem functions like
# filp_open() and kernel_read(). Disable the NV_FILESYSTEM_ACCESS_AVAILABLE
# functionality that uses those functions when building for Android.
@@ -124,10 +134,6 @@ ifeq ($(ARCH),x86_64)
ccflags-y += -mno-red-zone -mcmodel=kernel
endif
ifeq ($(ARCH),powerpc)
ccflags-y += -mlittle-endian -mno-strict-align
endif
ccflags-y += -DNV_UVM_ENABLE
ccflags-y += $(call cc-option,-Werror=undef,)
ccflags-y += -DNV_SPECTRE_V2=$(NV_SPECTRE_V2)

View File

@@ -99,9 +99,7 @@ else
ifndef ARCH
ARCH := $(shell uname -m | sed -e 's/i.86/i386/' \
-e 's/armv[0-7]\w\+/arm/' \
-e 's/aarch64/arm64/' \
-e 's/ppc64le/powerpc/' \
-e 's/riscv64/riscv/' \
)
endif
@@ -111,7 +109,7 @@ else
ifneq ($(filter $(ARCH),i386 x86_64),)
KERNEL_ARCH = x86
else
ifeq ($(filter $(ARCH),arm64 powerpc riscv),)
ifeq ($(filter $(ARCH),arm64 riscv),)
$(error Unsupported architecture $(ARCH))
endif
endif

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -49,6 +49,8 @@ typedef enum
NV_FIRMWARE_CHIP_FAMILY_GH100 = 6,
NV_FIRMWARE_CHIP_FAMILY_GB10X = 8,
NV_FIRMWARE_CHIP_FAMILY_GB20X = 9,
NV_FIRMWARE_CHIP_FAMILY_GB10Y = 11,
NV_FIRMWARE_CHIP_FAMILY_GB20Y = 12,
NV_FIRMWARE_CHIP_FAMILY_END,
} nv_firmware_chip_family_t;
@@ -58,6 +60,8 @@ static inline const char *nv_firmware_chip_family_to_string(
{
switch (fw_chip_family) {
case NV_FIRMWARE_CHIP_FAMILY_GB10X: return "gb10x";
case NV_FIRMWARE_CHIP_FAMILY_GB10Y: return "gb10y";
case NV_FIRMWARE_CHIP_FAMILY_GB20Y: return "gb20y";
case NV_FIRMWARE_CHIP_FAMILY_GB20X: return "gb20x";
case NV_FIRMWARE_CHIP_FAMILY_GH100: return "gh100";
case NV_FIRMWARE_CHIP_FAMILY_AD10X: return "ad10x";
@@ -68,9 +72,9 @@ static inline const char *nv_firmware_chip_family_to_string(
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
case NV_FIRMWARE_CHIP_FAMILY_NULL:
return NULL;
return "";
}
return NULL;
return "";
}
// The includer may optionally define
@@ -89,6 +93,8 @@ static inline const char *nv_firmware_for_chip_family(
switch (fw_chip_family)
{
case NV_FIRMWARE_CHIP_FAMILY_GB10X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GB10Y: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GB20Y: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GB20X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
@@ -110,6 +116,8 @@ static inline const char *nv_firmware_for_chip_family(
switch (fw_chip_family)
{
case NV_FIRMWARE_CHIP_FAMILY_GB10X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GB10Y: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GB20Y: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GB20X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through

View File

@@ -29,17 +29,9 @@
#include <linux/kernel.h>
#include <linux/hash.h>
#if defined(NV_LINUX_STRINGHASH_H_PRESENT)
#include <linux/stringhash.h> /* full_name_hash() */
#else
#include <linux/dcache.h>
#endif
#if (NV_FULL_NAME_HASH_ARGUMENT_COUNT == 3)
#define nv_string_hash(_str) full_name_hash(NULL, _str, strlen(_str))
#else
#define nv_string_hash(_str) full_name_hash(_str, strlen(_str))
#endif
/**
* This naive hashtable was introduced by commit d9b482c8ba19 (v3.7, 2012-10-31).

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -140,6 +140,7 @@ typedef struct nv_ioctl_export_to_dma_buf_fd
NvU32 index;
NvU64 totalSize NV_ALIGN_BYTES(8);
NvU8 mappingType;
NvBool bAllowMmap;
NvHandle handles[NV_DMABUF_EXPORT_MAX_HANDLES];
NvU64 offsets[NV_DMABUF_EXPORT_MAX_HANDLES] NV_ALIGN_BYTES(8);
NvU64 sizes[NV_DMABUF_EXPORT_MAX_HANDLES] NV_ALIGN_BYTES(8);

View File

@@ -57,9 +57,7 @@
#include <linux/version.h>
#include <linux/utsname.h>
#if LINUX_VERSION_CODE == KERNEL_VERSION(4, 4, 0)
// Version 4.4 is allowed, temporarily, although not officially supported.
#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
#error "This driver does not support kernels older than Linux 4.15!"
#endif
@@ -78,16 +76,6 @@
#include <linux/mm.h>
#if !defined(VM_RESERVED)
#define VM_RESERVED 0x00000000
#endif
#if !defined(VM_DONTEXPAND)
#define VM_DONTEXPAND 0x00000000
#endif
#if !defined(VM_DONTDUMP)
#define VM_DONTDUMP 0x00000000
#endif
#include <linux/init.h> /* module_init, module_exit */
#include <linux/types.h> /* pic_t, size_t, __u32, etc */
#include <linux/errno.h> /* error codes */
@@ -115,38 +103,20 @@
#endif
#if defined(NV_DRM_AVAILABLE)
#if defined(NV_DRM_DRM_DEVICE_H_PRESENT)
#include <drm/drm_device.h>
#endif
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
#include <drm/drm_drv.h>
#endif
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
#if defined(NV_DRM_DRM_GEM_H_PRESENT)
#include <drm/drm_gem.h>
#endif
#endif /* NV_DRM_AVAILABLE */
/*
* sched.h was refactored with this commit (as part of Linux 4.11)
* 2017-03-03 1827adb11ad26b2290dc9fe2aaf54976b2439865
*/
#if defined(NV_LINUX_SCHED_SIGNAL_H_PRESENT)
#include <linux/sched/signal.h> /* task_lock(), task_unlock() */
#endif
#if defined(NV_LINUX_SCHED_TASK_H_PRESENT)
#include <linux/sched/task.h> /* task_lock(), task_unlock() */
#endif
/* task and signal-related items, for kernels < 4.11: */
#include <linux/sched.h> /* task_lock(), task_unlock() */
/* task and signal-related items */
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
#include <linux/sched.h>
#include <linux/moduleparam.h> /* module_param() */
#include <asm/tlbflush.h> /* flush_tlb(), flush_tlb_all() */
@@ -169,10 +139,7 @@
#include <asm/page.h> /* PAGE_OFFSET */
#include <asm/pgtable.h> /* pte bit definitions */
#include <asm/bitops.h> /* __set_bit() */
#if defined(NV_LINUX_TIME_H_PRESENT)
#include <linux/time.h> /* FD_SET() */
#endif
#include "nv-list-helpers.h"
@@ -211,11 +178,7 @@
#include <linux/workqueue.h> /* workqueue */
#include "nv-kthread-q.h" /* kthread based queue */
#if defined(NV_LINUX_EFI_H_PRESENT)
#include <linux/efi.h> /* efi_enabled */
#endif
#include <linux/fb.h> /* fb_info struct */
#include <linux/screen_info.h> /* screen_info */
@@ -315,65 +278,11 @@ extern int nv_pat_mode;
#define NV_CONFIG_PREEMPT_RT 1
#endif
#if defined(NV_WRITE_CR4_PRESENT)
#define NV_READ_CR4() read_cr4()
#define NV_WRITE_CR4(cr4) write_cr4(cr4)
#else
#define NV_READ_CR4() __read_cr4()
#define NV_WRITE_CR4(cr4) __write_cr4(cr4)
#endif
#ifndef get_cpu
#define get_cpu() smp_processor_id()
#define put_cpu()
#endif
#if !defined(unregister_hotcpu_notifier)
#define unregister_hotcpu_notifier unregister_cpu_notifier
#endif
#if !defined(register_hotcpu_notifier)
#define register_hotcpu_notifier register_cpu_notifier
#endif
#if defined(NVCPU_X86_64)
#if !defined(pmd_large)
#define pmd_large(_pmd) \
((pmd_val(_pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
#endif
#endif /* defined(NVCPU_X86_64) */
#define NV_PAGE_COUNT(page) \
((unsigned int)page_count(page))
#define NV_GET_PAGE_FLAGS(page_ptr) \
(NV_GET_PAGE_STRUCT(page_ptr->phys_addr)->flags)
/*
* Before the introduction of VM_PFNMAP, there was an VM_UNPAGED flag.
* Drivers which wanted to call remap_pfn_range on normal pages had to use this
* VM_UNPAGED flag *and* set PageReserved. With the introduction of VM_PFNMAP,
* that restriction went away. This is described in commit
*
* 2005-10-28 6aab341e0a28aff100a09831c5300a2994b8b986
* ("mm: re-architect the VM_UNPAGED logic")
*
* , which added VM_PFNMAP and vm_normal_page. Therefore, if VM_PFNMAP is
* defined, then we do *not* need to mark a page as reserved, in order to
* call remap_pfn_range().
*/
#if !defined(VM_PFNMAP)
#define NV_MAYBE_RESERVE_PAGE(ptr_ptr) \
SetPageReserved(NV_GET_PAGE_STRUCT(page_ptr->phys_addr))
#define NV_MAYBE_UNRESERVE_PAGE(page_ptr) \
ClearPageReserved(NV_GET_PAGE_STRUCT(page_ptr->phys_addr))
#else
#define NV_MAYBE_RESERVE_PAGE(ptr_ptr)
#define NV_MAYBE_UNRESERVE_PAGE(page_ptr)
#endif /* defined(VM_PFNMAP) */
#if !defined(__GFP_COMP)
#define __GFP_COMP 0
#endif
#if !defined(DEBUG) && defined(__GFP_NOWARN)
#define NV_GFP_KERNEL (GFP_KERNEL | __GFP_NOWARN)
#define NV_GFP_ATOMIC (GFP_ATOMIC | __GFP_NOWARN)
@@ -394,14 +303,6 @@ extern int nv_pat_mode;
#define NV_GFP_DMA32 (NV_GFP_KERNEL)
#endif
typedef enum
{
NV_MEMORY_TYPE_SYSTEM, /* Memory mapped for ROM, SBIOS and physical RAM. */
NV_MEMORY_TYPE_REGISTERS,
NV_MEMORY_TYPE_FRAMEBUFFER,
NV_MEMORY_TYPE_DEVICE_MMIO, /* All kinds of MMIO referred by NVRM e.g. BARs and MCFG of device */
} nv_memory_type_t;
#if defined(NVCPU_AARCH64) || defined(NVCPU_RISCV64)
#define NV_ALLOW_WRITE_COMBINING(mt) 1
#elif defined(NVCPU_X86_64)
@@ -414,10 +315,6 @@ typedef enum
#endif
#endif
#if !defined(IRQF_SHARED)
#define IRQF_SHARED SA_SHIRQ
#endif
#define NV_MAX_RECURRING_WARNING_MESSAGES 10
/* various memory tracking/debugging techniques
@@ -432,6 +329,25 @@ typedef enum
#define NV_DBG_MEMINFO NV_DBG_INFO
#endif
// Provides a consistent way for the driver to obtain the maximum page order
// Starting with Linux kernel 6.8, MAX_ORDER is renamed to MAX_PAGE_ORDER.
#if defined(MAX_PAGE_ORDER)
#define NV_MAX_PAGE_ORDER MAX_PAGE_ORDER
#else
// Linux kernel 6.4.0 changed the meaning of the MAX_ORDER define.
// Prior to 6.4.0, MAX_ORDER was defined as the number of orders available -
// By default defined at 11, it signals that values between 0 and 10 (inclusive)
// are valid order values that the Linux buddy allocator supports.
//
// Starting with 6.4.0, MAX_ORDER is redefined as the maximum valid order value.
// By default defined at 10, it signals that order == 10 is the maximum valid
// order value that the Linux buddy allocator supports.
//
// To smooth interfacing, define NV_MAX_PAGE_ORDER in a safe way even though it might cause
// RM to report a smaller than max order value.
#define NV_MAX_PAGE_ORDER (MAX_ORDER - 1)
#endif // defined(MAX_PAGE_ORDER)
#define NV_MEM_TRACKING_PAD_SIZE(size) \
(size) = NV_ALIGN_UP((size + sizeof(void *)), sizeof(void *))
@@ -597,11 +513,7 @@ static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot)
{
pgprot_t prot = __pgprot(pgprot_val(vm_prot));
#if defined(pgprot_decrypted)
return pgprot_decrypted(prot);
#else
return nv_sme_clr(prot);
#endif // pgprot_decrypted
}
#if defined(PAGE_KERNEL_NOENC)
@@ -616,20 +528,12 @@ static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot)
#endif
#endif
#if defined(NV_GET_NUM_PHYSPAGES_PRESENT)
#define NV_NUM_PHYSPAGES get_num_physpages()
#else
#define NV_NUM_PHYSPAGES num_physpages
#endif
#define NV_GET_CURRENT_PROCESS() current->tgid
#define NV_IN_ATOMIC() in_atomic()
#define NV_LOCAL_BH_DISABLE() local_bh_disable()
#define NV_LOCAL_BH_ENABLE() local_bh_enable()
#define NV_COPY_TO_USER(to, from, n) copy_to_user(to, from, n)
#define NV_COPY_FROM_USER(to, from, n) copy_from_user(to, from, n)
#define NV_IS_SUSER() capable(CAP_SYS_ADMIN)
#define NV_PCI_DEVICE_NAME(pci_dev) ((pci_dev)->pretty_name)
#define NV_CLI() local_irq_disable()
#define NV_SAVE_FLAGS(eflags) local_save_flags(eflags)
#define NV_RESTORE_FLAGS(eflags) local_irq_restore(eflags)
@@ -781,29 +685,9 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
} \
__dev; \
})
#elif defined(NV_PCI_GET_DOMAIN_BUS_AND_SLOT_PRESENT)
#define NV_GET_DOMAIN_BUS_AND_SLOT(domain,bus, devfn) \
pci_get_domain_bus_and_slot(domain, bus, devfn)
#else
#define NV_GET_DOMAIN_BUS_AND_SLOT(domain,bus, devfn) \
({ \
struct pci_dev *__dev = NULL; \
while ((__dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, \
__dev)) != NULL) \
{ \
if ((NV_PCI_DOMAIN_NUMBER(__dev) == domain) && \
(NV_PCI_BUS_NUMBER(__dev) == bus) && \
(NV_PCI_DEVFN(__dev) == devfn)) \
{ \
break; \
} \
} \
__dev; \
})
#endif
#if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.18-rc1 for aarch64
#define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_stop_and_remove_bus_device(pci_dev)
pci_get_domain_bus_and_slot(domain, bus, devfn)
#endif
#define NV_PRINT_AT(nv_debug_level,at) \
@@ -827,17 +711,6 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
# define minor(x) MINOR(x)
#endif
#if defined(cpu_relax)
#define NV_CPU_RELAX() cpu_relax()
#else
#define NV_CPU_RELAX() barrier()
#endif
#ifndef IRQ_RETVAL
typedef void irqreturn_t;
#define IRQ_RETVAL(a)
#endif
#if !defined(PCI_COMMAND_SERR)
#define PCI_COMMAND_SERR 0x100
#endif
@@ -892,13 +765,8 @@ static inline vm_fault_t nv_insert_pfn(struct vm_area_struct *vma,
return vmf_insert_pfn_prot(vma, virt_addr, pfn,
__pgprot(pgprot_val(vma->vm_page_prot)));
#else
int ret = -EINVAL;
#if defined(NV_VM_INSERT_PFN_PROT_PRESENT)
ret = vm_insert_pfn_prot(vma, virt_addr, pfn,
int ret = vm_insert_pfn_prot(vma, virt_addr, pfn,
__pgprot(pgprot_val(vma->vm_page_prot)));
#else
ret = vm_insert_pfn(vma, virt_addr, pfn);
#endif
switch (ret)
{
case 0:
@@ -913,8 +781,8 @@ static inline vm_fault_t nv_insert_pfn(struct vm_area_struct *vma,
default:
break;
}
#endif /* defined(NV_VMF_INSERT_PFN_PROT_PRESENT) */
return VM_FAULT_SIGBUS;
#endif /* defined(NV_VMF_INSERT_PFN_PROT_PRESENT) */
}
/* Converts BAR index to Linux specific PCI BAR index */
@@ -970,7 +838,7 @@ extern void *nvidia_stack_t_cache;
* wait for the timestamp to increment by at least one to ensure that we do
* not hit a name conflict in cache create -> destroy (async) -> create cycle.
*/
#if defined(NV_KMEM_CACHE_HAS_KOBJ_REMOVE_WORK) && !defined(NV_SYSFS_SLAB_UNLINK_PRESENT)
#if !defined(NV_SYSFS_SLAB_UNLINK_PRESENT)
static inline void nv_kmem_ctor_dummy(void *arg)
{
(void)arg;
@@ -998,7 +866,7 @@ static inline void nv_kmem_ctor_dummy(void *arg)
static inline void *nv_kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags)
{
#if defined(NV_KMEM_CACHE_HAS_KOBJ_REMOVE_WORK) && !defined(NV_SYSFS_SLAB_UNLINK_PRESENT)
#if !defined(NV_SYSFS_SLAB_UNLINK_PRESENT)
/*
* We cannot call kmem_cache_zalloc directly as it adds the __GFP_ZERO
* flag. This flag together with the presence of a slab constructor is
@@ -1091,6 +959,7 @@ struct nv_dma_buf
struct dma_buf *dma_buf;
struct dma_buf_attachment *dma_attach;
struct sg_table *sgt;
enum dma_data_direction direction;
};
#endif // CONFIG_DMA_SHARED_BUFFER
@@ -1115,7 +984,7 @@ typedef struct nv_alloc_s {
unsigned int num_pages;
unsigned int order;
unsigned int size;
nvidia_pte_t **page_table; /* list of physical pages allocated */
nvidia_pte_t *page_table; /* array of physical pages allocated */
unsigned int pid;
struct page **user_pages;
NvU64 guest_id; /* id of guest VM */
@@ -1160,14 +1029,6 @@ nv_dma_maps_swiotlb(struct device *dev)
{
NvBool swiotlb_in_use = NV_FALSE;
#if defined(CONFIG_SWIOTLB)
#if defined(NV_DMA_OPS_PRESENT) || defined(NV_GET_DMA_OPS_PRESENT) || \
defined(NV_SWIOTLB_DMA_OPS_PRESENT)
/*
* We only use the 'dma_ops' symbol on older x86_64 kernels; later kernels,
* including those for other architectures, have converged on the
* get_dma_ops() interface.
*/
#if defined(NV_GET_DMA_OPS_PRESENT)
/*
* The __attribute__ ((unused)) is necessary because in at least one
* case, *none* of the preprocessor branches below are taken, and
@@ -1176,20 +1037,12 @@ nv_dma_maps_swiotlb(struct device *dev)
* case.
*/
const struct dma_map_ops *ops __attribute__ ((unused)) = get_dma_ops(dev);
#else
const struct dma_mapping_ops *ops __attribute__ ((unused)) = dma_ops;
#endif
/*
* The switch from dma_mapping_ops -> dma_map_ops coincided with the
* switch from swiotlb_map_sg -> swiotlb_map_sg_attrs.
*/
#if defined(NVCPU_AARCH64) && \
defined(NV_NONCOHERENT_SWIOTLB_DMA_OPS_PRESENT)
/* AArch64 exports these symbols directly */
swiotlb_in_use = ((ops == &noncoherent_swiotlb_dma_ops) ||
(ops == &coherent_swiotlb_dma_ops));
#elif NV_IS_EXPORT_SYMBOL_PRESENT_swiotlb_map_sg_attrs != 0
#if NV_IS_EXPORT_SYMBOL_PRESENT_swiotlb_map_sg_attrs != 0
swiotlb_in_use = (ops->map_sg == swiotlb_map_sg_attrs);
#elif NV_IS_EXPORT_SYMBOL_PRESENT_swiotlb_dma_ops != 0
swiotlb_in_use = (ops == &swiotlb_dma_ops);
@@ -1200,9 +1053,8 @@ nv_dma_maps_swiotlb(struct device *dev)
* NV_IS_EXPORT_SYMBOL_PRESENT_swiotlb_dma_ops == 0) does
* nothing, and ends up dropping us out to the last line of this function,
* effectively returning false. The nearly-human-readable version of that
* case is "struct swiotlb_dma_ops is present (NV_SWIOTLB_DMA_OPS_PRESENT
* is defined) but neither swiotlb_map_sg_attrs nor swiotlb_dma_ops is
* present".
* case is "get_dma_ops() is defined, but neither swiotlb_map_sg_attrs
* nor swiotlb_dma_ops is present".
*
* That can happen on kernels that fall within below range:
*
@@ -1226,7 +1078,6 @@ nv_dma_maps_swiotlb(struct device *dev)
* we just return NV_FALSE and in nv_compute_gfp_mask() we check for
* whether swiotlb could possibly be used (outside of swiotlb=force).
*/
#endif
/*
* Commit 2017-11-07 d7b417fa08d ("x86/mm: Add DMA support for
@@ -1348,6 +1199,15 @@ struct os_wait_queue {
struct completion q;
};
#define MAX_CLIENTS_PER_ADAPTER 127
#define MAX_TEGRA_I2C_PORTS 16
typedef struct nv_i2c_client_entry_s
{
NvU32 port;
void *pOsClient[MAX_CLIENTS_PER_ADAPTER];
} nv_i2c_client_entry_t;
/*!
* @brief Mapping between clock names and clock handles.
*
@@ -1421,6 +1281,9 @@ typedef struct
} nv_acpi_t;
#endif
struct nv_pci_tegra_devfreq_data;
struct nv_pci_tegra_devfreq_dev;
/* linux-specific version of old nv_state_t */
/* this is a general os-specific state structure. the first element *must* be
the general state structure, for the generic unix-based code */
@@ -1524,6 +1387,30 @@ typedef struct nv_linux_state_s {
nv_acpi_t* nv_acpi_object;
#endif
nv_i2c_client_entry_t i2c_clients[MAX_TEGRA_I2C_PORTS];
struct reset_control *dpaux0_reset;
struct reset_control *nvdisplay_reset;
struct reset_control *dsi_core_reset;
struct reset_control *mipi_cal_reset;
struct reset_control *hdacodec_reset;
/*
* nv_imp_icc_path represents the interconnect path across which display
* data must travel.
*/
struct icc_path *nv_imp_icc_path;
#if defined(NV_DEVM_ICC_GET_PRESENT)
/*
* is_upstream_icc_path tracks whether we are using upstream ICC. This
* is required till we fully migrate to use upstream ICC when it is
* available. Right now, even if upstream ICC is available we are still
* using downstream ICC mechanisms for T23x.
*/
NvBool is_upstream_icc_path;
#endif
nvsoc_clks_t soc_clk_handles;
/* Lock serializing ISRs for different SOC vectors */
@@ -1568,6 +1455,18 @@ typedef struct nv_linux_state_s {
wait_queue_head_t wait;
NvS32 return_status;
#endif
#if defined(CONFIG_PM_DEVFREQ)
const struct nv_pci_tegra_devfreq_data *devfreq_table;
unsigned int devfreq_table_size;
struct nv_pci_tegra_devfreq_dev *gpc_devfreq_dev;
struct nv_pci_tegra_devfreq_dev *nvd_devfreq_dev;
struct nv_pci_tegra_devfreq_dev *sys_devfreq_dev;
struct nv_pci_tegra_devfreq_dev *pwr_devfreq_dev;
int (*devfreq_suspend)(struct device *dev);
int (*devfreq_resume)(struct device *dev);
#endif
} nv_linux_state_t;
extern nv_linux_state_t *nv_linux_devices;
@@ -1678,7 +1577,7 @@ static inline struct kmem_cache *nv_kmem_cache_create(const char *name, unsigned
char *name_unique;
struct kmem_cache *cache;
#if defined(NV_KMEM_CACHE_HAS_KOBJ_REMOVE_WORK) && !defined(NV_SYSFS_SLAB_UNLINK_PRESENT)
#if !defined(NV_SYSFS_SLAB_UNLINK_PRESENT)
size_t len;
NvU64 tm_ns = nv_ktime_get_raw_ns();
@@ -1735,6 +1634,7 @@ static inline NV_STATUS nv_check_gpu_state(nv_state_t *nv)
extern NvU32 NVreg_EnableUserNUMAManagement;
extern NvU32 NVreg_RegisterPCIDriver;
extern NvU32 NVreg_RegisterPlatformDeviceDriver;
extern NvU32 NVreg_EnableResizableBar;
extern NvU32 NVreg_EnableNonblockingOpen;
@@ -1777,32 +1677,11 @@ static inline NvBool nv_alloc_release(nv_linux_file_private_t *nvlfp, nv_alloc_t
return NV_FALSE;
}
/*
* RB_EMPTY_ROOT was added in 2.6.18 by this commit:
* 2006-06-21 dd67d051529387f6e44d22d1d5540ef281965fdd
*/
#if !defined(RB_EMPTY_ROOT)
#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
#endif
// Default flags for ISRs
static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
{
NvU32 flags = 0;
/*
* Request IRQs to be disabled in our ISRs to keep consistency across the
* supported kernel versions.
*
* IRQF_DISABLED has been made the default in 2.6.35 with commit e58aa3d2d0cc
* from March 2010. And it has been later completely removed in 4.1 with commit
* d8bf368d0631 from March 2015. Add it to our flags if it's defined to get the
* same behaviour on pre-2.6.35 kernels as on recent ones.
*/
#if defined(IRQF_DISABLED)
flags |= IRQF_DISABLED;
#endif
/*
* For legacy interrupts, also allow sharing. Sharing doesn't make sense
* for MSI(-X) as on Linux they are never shared across different devices
@@ -1814,29 +1693,14 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
return flags;
}
/*
* From v3.7-rc1 kernel have stopped exporting get_unused_fd() and started
* exporting get_unused_fd_flags(), as of this commit:
* 2012-09-26 1a7bd2265fc ("make get_unused_fd_flags() a function")
*/
#if NV_IS_EXPORT_SYMBOL_PRESENT_get_unused_fd
#define NV_GET_UNUSED_FD() get_unused_fd()
#else
#define NV_GET_UNUSED_FD() get_unused_fd_flags(0)
#endif
#if NV_IS_EXPORT_SYMBOL_PRESENT_get_unused_fd_flags
#define NV_GET_UNUSED_FD_FLAGS(flags) get_unused_fd_flags(flags)
#else
#define NV_GET_UNUSED_FD_FLAGS(flags) (-1)
#endif
#define MODULE_BASE_NAME "nvidia"
#define MODULE_INSTANCE_NUMBER 0
#define MODULE_INSTANCE_STRING ""
#define MODULE_NAME MODULE_BASE_NAME MODULE_INSTANCE_STRING
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32, const char*);
NV_STATUS nv_imp_icc_get(nv_state_t *nv);
void nv_imp_icc_put(nv_state_t *nv);
static inline void nv_mutex_destroy(struct mutex *lock)
{
@@ -1886,53 +1750,22 @@ typedef enum
NV_NUMA_STATUS_COUNT
} nv_numa_status_t;
#if defined(NV_LINUX_PLATFORM_DEVICE_H_PRESENT)
#include <linux/platform_device.h>
#endif
#if defined(NV_LINUX_MUTEX_H_PRESENT)
#include <linux/mutex.h>
#endif
#if defined(NV_LINUX_RESET_H_PRESENT)
#include <linux/reset.h>
#endif
#if defined(NV_LINUX_DMA_BUF_H_PRESENT)
#include <linux/dma-buf.h>
#endif
#if defined(NV_LINUX_GPIO_H_PRESENT)
#include <linux/gpio.h>
#endif
#if defined(NV_LINUX_OF_GPIO_H_PRESENT)
#include <linux/of_gpio.h>
#endif
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
#include <linux/of_device.h>
#endif
#if defined(NV_LINUX_OF_PLATFORM_H_PRESENT)
#include <linux/of_platform.h>
#endif
#if defined(NV_LINUX_INTERCONNECT_H_PRESENT)
#include <linux/interconnect.h>
#endif
#if defined(NV_LINUX_PM_RUNTIME_H_PRESENT)
#include <linux/pm_runtime.h>
#endif
#if defined(NV_LINUX_CLK_H_PRESENT)
#include <linux/clk.h>
#endif
#if defined(NV_LINUX_CLK_PROVIDER_H_PRESENT)
#include <linux/clk-provider.h>
#endif
#define NV_EXPORT_SYMBOL(symbol) EXPORT_SYMBOL_GPL(symbol)
#define NV_CHECK_EXPORT_SYMBOL(symbol) NV_IS_EXPORT_SYMBOL_PRESENT_##symbol

View File

@@ -28,12 +28,9 @@
#include <linux/spinlock.h>
#include <linux/rwsem.h>
#include <linux/sched.h> /* signal_pending, cond_resched */
#include <linux/sched.h> /* cond_resched */
#include <linux/semaphore.h>
#if defined(NV_LINUX_SCHED_SIGNAL_H_PRESENT)
#include <linux/sched/signal.h> /* signal_pending for kernels >= 4.11 */
#endif
#include <linux/sched/signal.h> /* signal_pending */
#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL)
typedef raw_spinlock_t nv_spinlock_t;

View File

@@ -66,54 +66,17 @@ typedef int vm_fault_t;
/*
* get_user_pages()
*
* The 8-argument version of get_user_pages() was deprecated by commit
* cde70140fed8 ("mm/gup: Overload get_user_pages() functions") in v4.6-rc1.
* (calling get_user_pages with current and current->mm).
*
* Completely moved to the 6 argument version of get_user_pages() by
* commit c12d2da56d0e ("mm/gup: Remove the macro overload API migration
* helpers from the get_user*() APIs") in v4.6-rc4.
*
* write and force parameters were replaced with gup_flags by
* commit 768ae309a961 ("mm: replace get_user_pages() write/force parameters
* with gup_flags") in v4.9.
*
* A 7-argument version of get_user_pages was introduced into linux-4.4.y by
* commit 8e50b8b07f462 ("mm: replace get_user_pages() write/force parameters
* with gup_flags") which cherry-picked the replacement of the write and
* force parameters with gup_flags.
*
* Removed vmas parameter from get_user_pages() by commit 54d020692b34
* ("mm/gup: remove unused vmas parameter from get_user_pages()") in v6.5.
*
*/
#if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
#if !defined(NV_GET_USER_PAGES_HAS_VMAS_ARG)
#define NV_GET_USER_PAGES get_user_pages
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS)
#else
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages) \
get_user_pages(start, nr_pages, flags, pages, NULL)
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS_VMAS)
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages) \
get_user_pages(current, current->mm, start, nr_pages, flags, pages, NULL)
#else
static inline long NV_GET_USER_PAGES(unsigned long start,
unsigned long nr_pages,
unsigned int flags,
struct page **pages)
{
int write = flags & FOLL_WRITE;
int force = flags & FOLL_FORCE;
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE_VMAS)
return get_user_pages(start, nr_pages, write, force, pages, NULL);
#else
// NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS
return get_user_pages(current, current->mm, start, nr_pages, write,
force, pages, NULL);
#endif // NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE_VMAS
}
#endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS
#endif
/*
* pin_user_pages_remote()
@@ -146,22 +109,12 @@ typedef int vm_fault_t;
#endif // NV_PIN_USER_PAGES_REMOTE_PRESENT
/*
* get_user_pages_remote() was added by commit 1e9877902dc7
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6.
*
* Note that get_user_pages_remote() requires the caller to hold a reference on
* the task_struct (if non-NULL and if this API has tsk argument) and the mm_struct.
* the mm_struct.
* This will always be true when using current and current->mm. If the kernel passes
* the driver a vma via driver callback, the kernel holds a reference on vma->vm_mm
* over that callback.
*
* get_user_pages_remote() write/force parameters were replaced
* with gup_flags by commit 9beae1ea8930 ("mm: replace get_user_pages_remote()
* write/force parameters with gup_flags") in v4.9.
*
* get_user_pages_remote() added 'locked' parameter by commit 5b56d49fc31d
* ("mm: add locked parameter to get_user_pages_remote()") in v4.10.
*
* get_user_pages_remote() removed 'tsk' parameter by
* commit 64019a2e467a ("mm/gup: remove task_struct pointer for
* all gup code") in v5.9.
@@ -171,77 +124,16 @@ typedef int vm_fault_t;
*
*/
#if defined(NV_GET_USER_PAGES_REMOTE_PRESENT)
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED_VMAS)
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_VMAS)
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, NULL, locked)
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK)
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
get_user_pages_remote(mm, start, nr_pages, flags, pages, NULL, locked)
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED_VMAS)
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, NULL, locked)
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_VMAS)
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, NULL)
#else
// NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE_VMAS
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
unsigned int flags,
struct page **pages,
int *locked)
{
int write = flags & FOLL_WRITE;
int force = flags & FOLL_FORCE;
return get_user_pages_remote(NULL, mm, start, nr_pages, write, force,
pages, NULL);
}
#endif // NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED
#else
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS)
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
unsigned int flags,
struct page **pages,
int *locked)
{
int write = flags & FOLL_WRITE;
int force = flags & FOLL_FORCE;
return get_user_pages(NULL, mm, start, nr_pages, write, force, pages, NULL);
}
#else
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
get_user_pages(NULL, mm, start, nr_pages, flags, pages, NULL)
#endif // NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS
#endif // NV_GET_USER_PAGES_REMOTE_PRESENT
/*
* The .virtual_address field was effectively renamed to .address, by these
* two commits:
*
* struct vm_fault: .address was added by:
* 2016-12-14 82b0f8c39a3869b6fd2a10e180a862248736ec6f
*
* struct vm_fault: .virtual_address was removed by:
* 2016-12-14 1a29d85eb0f19b7d8271923d8917d7b4f5540b3e
*/
static inline unsigned long nv_page_fault_va(struct vm_fault *vmf)
{
#if defined(NV_VM_FAULT_HAS_ADDRESS)
return vmf->address;
#else
return (unsigned long)(vmf->virtual_address);
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
#endif
}
static inline void nv_mmap_read_lock(struct mm_struct *mm)
{

View File

@@ -86,12 +86,6 @@ static inline int nv_pci_enable_msix(nv_linux_state_t *nvl, int nvec)
{
int rc = 0;
/*
* pci_enable_msix_range() replaced pci_enable_msix() in 3.14-rc1:
* 2014-01-03 302a2523c277bea0bbe8340312b09507905849ed
*/
#if defined(NV_PCI_ENABLE_MSIX_RANGE_PRESENT)
// We require all the vectors we are requesting so use the same min and max
rc = pci_enable_msix_range(nvl->pci_dev, nvl->msix_entries, nvec, nvec);
if (rc < 0)
@@ -99,13 +93,6 @@ static inline int nv_pci_enable_msix(nv_linux_state_t *nvl, int nvec)
return NV_ERR_OPERATING_SYSTEM;
}
WARN_ON(nvec != rc);
#else
rc = pci_enable_msix(nvl->pci_dev, nvl->msix_entries, nvec);
if (rc != 0)
{
return NV_ERR_OPERATING_SYSTEM;
}
#endif
nvl->num_intr = nvec;
return NV_OK;

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -36,6 +36,6 @@ int nv_pci_count_devices(void);
NvU8 nv_find_pci_capability(struct pci_dev *, NvU8);
int nvidia_dev_get_pci_info(const NvU8 *, struct pci_dev **, NvU64 *, NvU64 *);
nv_linux_state_t * find_pci(NvU32, NvU8, NvU8, NvU8);
NvBool nv_pci_is_valid_topology_for_direct_pci(nv_state_t *, struct device *);
NvBool nv_pci_is_valid_topology_for_direct_pci(nv_state_t *, struct pci_dev *);
NvBool nv_pci_has_common_pci_switch(nv_state_t *nv, struct pci_dev *);
#endif

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2015-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2015-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -63,16 +63,10 @@ static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)
extern NvBool nvos_is_chipset_io_coherent(void);
/*
* Don't rely on the kernel's definition of pgprot_noncached(), as on 64-bit
* ARM that's not for system memory, but device memory instead. For I/O cache
* coherent systems, use cached mappings instead of uncached.
* ARM that's not for system memory, but device memory instead.
*/
#define NV_PGPROT_UNCACHED(old_prot) \
((nvos_is_chipset_io_coherent()) ? \
(old_prot) : \
__pgprot_modify((old_prot), PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)))
#elif defined(NVCPU_PPC64LE)
/* Don't attempt to mark sysmem pages as uncached on ppc64le */
#define NV_PGPROT_UNCACHED(old_prot) old_prot
__pgprot_modify((old_prot), PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC))
#else
#define NV_PGPROT_UNCACHED(old_prot) pgprot_noncached(old_prot)
#endif
@@ -94,32 +88,6 @@ extern NvBool nvos_is_chipset_io_coherent(void);
NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot)
#define NV_PGPROT_READ_ONLY(old_prot) \
__pgprot(pgprot_val((old_prot)) & ~_PAGE_RW)
#elif defined(NVCPU_PPC64LE)
/*
* Some kernels use H_PAGE instead of _PAGE
*/
#if defined(_PAGE_RW)
#define NV_PAGE_RW _PAGE_RW
#elif defined(H_PAGE_RW)
#define NV_PAGE_RW H_PAGE_RW
#else
#warning "The kernel does not provide page protection defines!"
#endif
#if defined(_PAGE_4K_PFN)
#define NV_PAGE_4K_PFN _PAGE_4K_PFN
#elif defined(H_PAGE_4K_PFN)
#define NV_PAGE_4K_PFN H_PAGE_4K_PFN
#else
#undef NV_PAGE_4K_PFN
#endif
#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot) \
pgprot_writecombine(old_prot)
/* Don't attempt to mark sysmem pages as write combined on ppc64le */
#define NV_PGPROT_WRITE_COMBINED(old_prot) old_prot
#define NV_PGPROT_READ_ONLY(old_prot) \
__pgprot(pgprot_val((old_prot)) & ~NV_PAGE_RW)
#elif defined(NVCPU_RISCV64)
#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot) \
pgprot_writecombine(old_prot)

View File

@@ -29,8 +29,20 @@
irqreturn_t nvidia_isr (int, void *);
irqreturn_t nvidia_isr_kthread_bh (int, void *);
#define NV_SUPPORTS_PLATFORM_DEVICE 0
int nv_platform_register_driver(void);
void nv_platform_unregister_driver(void);
int nv_platform_count_devices(void);
int nv_soc_register_irqs(nv_state_t *nv);
void nv_soc_free_irqs(nv_state_t *nv);
#define NV_SUPPORTS_PLATFORM_DISPLAY_DEVICE 0
#define NV_SUPPORTS_PLATFORM_DEVICE NV_IS_EXPORT_SYMBOL_PRESENT___platform_driver_register
#if defined(NV_LINUX_PLATFORM_TEGRA_DCE_DCE_CLIENT_IPC_H_PRESENT)
#define NV_SUPPORTS_DCE_CLIENT_IPC 1
#else
#define NV_SUPPORTS_DCE_CLIENT_IPC 0
#endif
#define NV_SUPPORTS_PLATFORM_DISPLAY_DEVICE (NV_SUPPORTS_PLATFORM_DEVICE && NV_SUPPORTS_DCE_CLIENT_IPC)
#endif

View File

@@ -25,6 +25,7 @@
#define _NV_PROTO_H_
#include "nv-pci.h"
#include "nv-platform.h"
extern const char *nv_device_name;

View File

@@ -36,13 +36,6 @@
#define NV_MAX_ISR_DELAY_MS (NV_MAX_ISR_DELAY_US / 1000)
#define NV_NSECS_TO_JIFFIES(nsec) ((nsec) * HZ / 1000000000)
#if !defined(NV_TIMESPEC64_PRESENT)
struct timespec64 {
__s64 tv_sec;
long tv_nsec;
};
#endif
#if !defined(NV_KTIME_GET_RAW_TS64_PRESENT)
static inline void ktime_get_raw_ts64(struct timespec64 *ts64)
{
@@ -53,16 +46,6 @@ static inline void ktime_get_raw_ts64(struct timespec64 *ts64)
}
#endif
#if !defined(NV_KTIME_GET_REAL_TS64_PRESENT)
static inline void ktime_get_real_ts64(struct timespec64 *ts64)
{
struct timeval tv;
do_gettimeofday(&tv);
ts64->tv_sec = tv.tv_sec;
ts64->tv_nsec = tv.tv_usec * (NvU64) NSEC_PER_USEC;
}
#endif
static NvBool nv_timer_less_than
(
const struct timespec64 *a,
@@ -73,49 +56,6 @@ static NvBool nv_timer_less_than
: (a->tv_sec < b->tv_sec);
}
#if !defined(NV_TIMESPEC64_PRESENT)
static inline struct timespec64 timespec64_add
(
const struct timespec64 a,
const struct timespec64 b
)
{
struct timespec64 result;
result.tv_sec = a.tv_sec + b.tv_sec;
result.tv_nsec = a.tv_nsec + b.tv_nsec;
while (result.tv_nsec >= NSEC_PER_SEC)
{
++result.tv_sec;
result.tv_nsec -= NSEC_PER_SEC;
}
return result;
}
static inline struct timespec64 timespec64_sub
(
const struct timespec64 a,
const struct timespec64 b
)
{
struct timespec64 result;
result.tv_sec = a.tv_sec - b.tv_sec;
result.tv_nsec = a.tv_nsec - b.tv_nsec;
while (result.tv_nsec < 0)
{
--(result.tv_sec);
result.tv_nsec += NSEC_PER_SEC;
}
return result;
}
static inline s64 timespec64_to_ns(struct timespec64 *ts)
{
return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
}
#endif
static inline NvU64 nv_ktime_get_raw_ns(void)
{
struct timespec64 ts;

View File

@@ -42,25 +42,12 @@ static inline void nv_timer_callback_typed_data(struct timer_list *timer)
nv_timer->nv_timer_callback(nv_timer);
}
static inline void nv_timer_callback_anon_data(unsigned long arg)
{
struct nv_timer *nv_timer = (struct nv_timer *)arg;
nv_timer->nv_timer_callback(nv_timer);
}
static inline void nv_timer_setup(struct nv_timer *nv_timer,
void (*callback)(struct nv_timer *nv_timer))
{
nv_timer->nv_timer_callback = callback;
#if defined(NV_TIMER_SETUP_PRESENT)
timer_setup(&nv_timer->kernel_timer, nv_timer_callback_typed_data, 0);
#else
init_timer(&nv_timer->kernel_timer);
nv_timer->kernel_timer.function = nv_timer_callback_anon_data;
nv_timer->kernel_timer.data = (unsigned long)nv_timer;
#endif
}
static inline void nv_timer_delete_sync(struct timer_list *timer)

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -50,6 +50,9 @@ extern nv_cap_t *nvidia_caps_root;
extern const NvBool nv_is_rm_firmware_supported_os;
#include <nvi2c.h>
#include <nvimpshared.h>
#include <nv-kernel-interface-api.h>
#define GPU_UUID_LEN (16)
@@ -83,6 +86,18 @@ extern const NvBool nv_is_rm_firmware_supported_os;
#define NV_RM_DEVICE_INTR_ADDRESS 0x100
/*
* Clock domain identifier, which is used for fetching the engine
* load backed by the specified clock domain for Tegra platforms
* conforming linux devfreq framework to realize dynamic frequency
* scaling.
*/
typedef enum _TEGRASOC_DEVFREQ_CLK
{
TEGRASOC_DEVFREQ_CLK_GPC,
TEGRASOC_DEVFREQ_CLK_NVD,
} TEGRASOC_DEVFREQ_CLK;
/*!
* @brief The order of the display clocks in the below defined enum
* should be synced with below mapping array and macro.
@@ -105,6 +120,12 @@ typedef enum _TEGRASOC_WHICH_CLK
TEGRASOC_WHICH_CLK_NVDISPLAY_DISP,
TEGRASOC_WHICH_CLK_NVDISPLAY_P0,
TEGRASOC_WHICH_CLK_NVDISPLAY_P1,
TEGRASOC_WHICH_CLK_NVDISPLAY_P2,
TEGRASOC_WHICH_CLK_NVDISPLAY_P3,
TEGRASOC_WHICH_CLK_NVDISPLAY_P4,
TEGRASOC_WHICH_CLK_NVDISPLAY_P5,
TEGRASOC_WHICH_CLK_NVDISPLAY_P6,
TEGRASOC_WHICH_CLK_NVDISPLAY_P7,
TEGRASOC_WHICH_CLK_DPAUX0,
TEGRASOC_WHICH_CLK_FUSE,
TEGRASOC_WHICH_CLK_DSIPLL_VCO,
@@ -123,9 +144,21 @@ typedef enum _TEGRASOC_WHICH_CLK
TEGRASOC_WHICH_CLK_VPLL0_REF,
TEGRASOC_WHICH_CLK_VPLL0,
TEGRASOC_WHICH_CLK_VPLL1,
TEGRASOC_WHICH_CLK_VPLL2,
TEGRASOC_WHICH_CLK_VPLL3,
TEGRASOC_WHICH_CLK_VPLL4,
TEGRASOC_WHICH_CLK_VPLL5,
TEGRASOC_WHICH_CLK_VPLL6,
TEGRASOC_WHICH_CLK_VPLL7,
TEGRASOC_WHICH_CLK_NVDISPLAY_P0_REF,
TEGRASOC_WHICH_CLK_RG0,
TEGRASOC_WHICH_CLK_RG1,
TEGRASOC_WHICH_CLK_RG2,
TEGRASOC_WHICH_CLK_RG3,
TEGRASOC_WHICH_CLK_RG4,
TEGRASOC_WHICH_CLK_RG5,
TEGRASOC_WHICH_CLK_RG6,
TEGRASOC_WHICH_CLK_RG7,
TEGRASOC_WHICH_CLK_DISPPLL,
TEGRASOC_WHICH_CLK_DISPHUBPLL,
TEGRASOC_WHICH_CLK_DSI_LP,
@@ -133,9 +166,20 @@ typedef enum _TEGRASOC_WHICH_CLK
TEGRASOC_WHICH_CLK_DSI_PIXEL,
TEGRASOC_WHICH_CLK_PRE_SOR0,
TEGRASOC_WHICH_CLK_PRE_SOR1,
TEGRASOC_WHICH_CLK_PRE_SOR2,
TEGRASOC_WHICH_CLK_PRE_SOR3,
TEGRASOC_WHICH_CLK_DP_LINKA_REF,
TEGRASOC_WHICH_CLK_DP_LINKB_REF,
TEGRASOC_WHICH_CLK_DP_LINKC_REF,
TEGRASOC_WHICH_CLK_DP_LINKD_REF,
TEGRASOC_WHICH_CLK_SOR_LINKA_INPUT,
TEGRASOC_WHICH_CLK_SOR_LINKB_INPUT,
TEGRASOC_WHICH_CLK_SOR_LINKC_INPUT,
TEGRASOC_WHICH_CLK_SOR_LINKD_INPUT,
TEGRASOC_WHICH_CLK_SOR_LINKA_AFIFO,
TEGRASOC_WHICH_CLK_SOR_LINKB_AFIFO,
TEGRASOC_WHICH_CLK_SOR_LINKC_AFIFO,
TEGRASOC_WHICH_CLK_SOR_LINKD_AFIFO,
TEGRASOC_WHICH_CLK_SOR_LINKA_AFIFO_M,
TEGRASOC_WHICH_CLK_RG0_M,
TEGRASOC_WHICH_CLK_RG1_M,
@@ -144,17 +188,36 @@ typedef enum _TEGRASOC_WHICH_CLK
TEGRASOC_WHICH_CLK_PLLHUB,
TEGRASOC_WHICH_CLK_SOR0,
TEGRASOC_WHICH_CLK_SOR1,
TEGRASOC_WHICH_CLK_SOR2,
TEGRASOC_WHICH_CLK_SOR3,
TEGRASOC_WHICH_CLK_SOR_PADA_INPUT,
TEGRASOC_WHICH_CLK_SOR_PADB_INPUT,
TEGRASOC_WHICH_CLK_SOR_PADC_INPUT,
TEGRASOC_WHICH_CLK_SOR_PADD_INPUT,
TEGRASOC_WHICH_CLK_SOR0_PAD,
TEGRASOC_WHICH_CLK_SOR1_PAD,
TEGRASOC_WHICH_CLK_SOR2_PAD,
TEGRASOC_WHICH_CLK_SOR3_PAD,
TEGRASOC_WHICH_CLK_PRE_SF0,
TEGRASOC_WHICH_CLK_SF0,
TEGRASOC_WHICH_CLK_SF1,
TEGRASOC_WHICH_CLK_SF2,
TEGRASOC_WHICH_CLK_SF3,
TEGRASOC_WHICH_CLK_SF4,
TEGRASOC_WHICH_CLK_SF5,
TEGRASOC_WHICH_CLK_SF6,
TEGRASOC_WHICH_CLK_SF7,
TEGRASOC_WHICH_CLK_DSI_PAD_INPUT,
TEGRASOC_WHICH_CLK_PRE_SOR0_REF,
TEGRASOC_WHICH_CLK_PRE_SOR1_REF,
TEGRASOC_WHICH_CLK_SOR0_PLL_REF,
TEGRASOC_WHICH_CLK_SOR1_PLL_REF,
TEGRASOC_WHICH_CLK_SOR2_PLL_REF,
TEGRASOC_WHICH_CLK_SOR3_PLL_REF,
TEGRASOC_WHICH_CLK_SOR0_REF,
TEGRASOC_WHICH_CLK_SOR1_REF,
TEGRASOC_WHICH_CLK_SOR2_REF,
TEGRASOC_WHICH_CLK_SOR3_REF,
TEGRASOC_WHICH_CLK_OSC,
TEGRASOC_WHICH_CLK_DSC,
TEGRASOC_WHICH_CLK_MAUD,
@@ -168,6 +231,18 @@ typedef enum _TEGRASOC_WHICH_CLK
TEGRASOC_WHICH_CLK_PLLA_DISP,
TEGRASOC_WHICH_CLK_PLLA_DISPHUB,
TEGRASOC_WHICH_CLK_PLLA,
TEGRASOC_WHICH_CLK_VPLLX_SOR0_MUXED,
TEGRASOC_WHICH_CLK_VPLLX_SOR1_MUXED,
TEGRASOC_WHICH_CLK_VPLLX_SOR2_MUXED,
TEGRASOC_WHICH_CLK_VPLLX_SOR3_MUXED,
TEGRASOC_WHICH_CLK_SF0_SOR,
TEGRASOC_WHICH_CLK_SF1_SOR,
TEGRASOC_WHICH_CLK_SF2_SOR,
TEGRASOC_WHICH_CLK_SF3_SOR,
TEGRASOC_WHICH_CLK_SF4_SOR,
TEGRASOC_WHICH_CLK_SF5_SOR,
TEGRASOC_WHICH_CLK_SF6_SOR,
TEGRASOC_WHICH_CLK_SF7_SOR,
TEGRASOC_WHICH_CLK_EMC,
TEGRASOC_WHICH_CLK_GPU_FIRST,
TEGRASOC_WHICH_CLK_GPU_SYS = TEGRASOC_WHICH_CLK_GPU_FIRST,
@@ -339,12 +414,8 @@ typedef struct nv_soc_irq_info_s {
#define NV_MAX_SOC_IRQS 10
#define NV_MAX_DPAUX_NUM_DEVICES 4
#define NV_MAX_SOC_DPAUX_NUM_DEVICES 2
#define NV_MAX_SOC_DPAUX_NUM_DEVICES 4
#define NV_IGPU_LEGACY_STALL_IRQ 70
#define NV_IGPU_MAX_STALL_IRQS 3
#define NV_IGPU_MAX_NONSTALL_IRQS 1
/*
* per device state
*/
@@ -390,7 +461,6 @@ typedef struct nv_state_t
nv_aperture_t *mipical_regs;
nv_aperture_t *fb, ud;
nv_aperture_t *simregs;
nv_aperture_t *emc_regs;
NvU32 num_dpaux_instance;
NvU32 interrupt_line;
@@ -404,12 +474,14 @@ typedef struct nv_state_t
NvU32 soc_dcb_size;
NvU32 disp_sw_soc_chip_id;
NvBool soc_is_dpalt_mode_supported;
NvBool soc_is_hfrp_supported;
NvU32 igpu_stall_irq[NV_IGPU_MAX_STALL_IRQS];
NvU32 igpu_nonstall_irq;
NvU32 num_stall_irqs;
NvU64 dma_mask;
NvBool is_tegra_pci_igpu;
NvBool supports_tegra_igpu_rg;
NvBool is_tegra_pci_igpu_rg_enabled;
NvBool primary_vga;
NvU32 sim_env;
@@ -488,6 +560,13 @@ typedef struct nv_state_t
/* Bool to check if the GPU has a coherent sysmem link */
NvBool coherent;
/*
* Bool to check if GPU memory is backed by struct page.
* False for non-coherent platforms. May also be false
* on coherent platforms if GPU memory is not onlined to the kernel.
*/
NvBool mem_has_struct_page;
/* OS detected GPU has ATS capability */
NvBool ats_support;
/*
@@ -508,6 +587,9 @@ typedef struct nv_state_t
/* Console is managed by drm drivers or NVKMS */
NvBool client_managed_console;
/* Bool to check if power management is supported */
NvBool is_pm_supported;
} nv_state_t;
#define NVFP_TYPE_NONE 0x0
@@ -577,13 +659,12 @@ typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemor
#define NV_FLAG_OPEN 0x0001
#define NV_FLAG_EXCLUDE 0x0002
#define NV_FLAG_CONTROL 0x0004
// Unused 0x0008
#define NV_FLAG_PCI_P2P_UNSUPPORTED_CHIPSET 0x0008
#define NV_FLAG_SOC_DISPLAY 0x0010
#define NV_FLAG_USES_MSI 0x0020
#define NV_FLAG_USES_MSIX 0x0040
#define NV_FLAG_PASSTHRU 0x0080
#define NV_FLAG_SUSPENDED 0x0100
#define NV_FLAG_SOC_IGPU 0x0200
/* To be set when an FLR needs to be triggered after device shut down. */
#define NV_FLAG_TRIGGER_FLR 0x0400
#define NV_FLAG_PERSISTENT_SW_STATE 0x0800
@@ -631,15 +712,20 @@ typedef struct
const char *db_support;
} nv_power_info_t;
typedef enum
{
NV_MEMORY_TYPE_SYSTEM, /* Memory mapped for ROM, SBIOS and physical RAM. */
NV_MEMORY_TYPE_REGISTERS,
NV_MEMORY_TYPE_FRAMEBUFFER,
NV_MEMORY_TYPE_DEVICE_MMIO, /* All kinds of MMIO referred by NVRM e.g. BARs and MCFG of device */
} nv_memory_type_t;
#define NV_PRIMARY_VGA(nv) ((nv)->primary_vga)
#define NV_IS_CTL_DEVICE(nv) ((nv)->flags & NV_FLAG_CONTROL)
#define NV_IS_SOC_DISPLAY_DEVICE(nv) \
((nv)->flags & NV_FLAG_SOC_DISPLAY)
#define NV_IS_SOC_IGPU_DEVICE(nv) \
((nv)->flags & NV_FLAG_SOC_IGPU)
#define NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv) \
(((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0)
@@ -789,9 +875,9 @@ static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
NvU32 NV_API_CALL nv_get_dev_minor (nv_state_t *);
void* NV_API_CALL nv_alloc_kernel_mapping (nv_state_t *, void *, NvU64, NvU32, NvU64, void **);
NV_STATUS NV_API_CALL nv_free_kernel_mapping (nv_state_t *, void *, void *, void *);
void NV_API_CALL nv_free_kernel_mapping (nv_state_t *, void *, void *, void *);
NV_STATUS NV_API_CALL nv_alloc_user_mapping (nv_state_t *, void *, NvU64, NvU32, NvU64, NvU32, NvU64 *, void **);
NV_STATUS NV_API_CALL nv_free_user_mapping (nv_state_t *, void *, NvU64, void *);
void NV_API_CALL nv_free_user_mapping (nv_state_t *, void *, NvU64, void *);
NV_STATUS NV_API_CALL nv_add_mapping_context_to_file (nv_state_t *, nv_usermap_access_params_t*, NvU32, void *, NvU64, NvU32);
NvU64 NV_API_CALL nv_get_kern_phys_address (NvU64);
@@ -813,7 +899,8 @@ void NV_API_CALL nv_unregister_peer_io_mem(nv_state_t *, void *);
struct sg_table;
NV_STATUS NV_API_CALL nv_register_sgt (nv_state_t *, NvU64 *, NvU64, NvU32, void **, struct sg_table *, void *);
NV_STATUS NV_API_CALL nv_register_sgt (nv_state_t *, NvU64 *, NvU64, NvU32, void **,
struct sg_table *, void *, NvBool);
void NV_API_CALL nv_unregister_sgt (nv_state_t *, struct sg_table **, void **, void *);
NV_STATUS NV_API_CALL nv_register_phys_pages (nv_state_t *, NvU64 *, NvU64, NvU32, void **);
void NV_API_CALL nv_unregister_phys_pages (nv_state_t *, void *);
@@ -824,12 +911,14 @@ NV_STATUS NV_API_CALL nv_dma_map_alloc (nv_dma_device_t *, NvU64, NvU6
NV_STATUS NV_API_CALL nv_dma_unmap_alloc (nv_dma_device_t *, NvU64, NvU64 *, void **);
NV_STATUS NV_API_CALL nv_dma_map_peer (nv_dma_device_t *, nv_dma_device_t *, NvU8, NvU64, NvU64 *);
NV_STATUS NV_API_CALL nv_dma_map_non_pci_peer (nv_dma_device_t *, NvU64, NvU64 *);
void NV_API_CALL nv_dma_unmap_peer (nv_dma_device_t *, NvU64, NvU64);
NV_STATUS NV_API_CALL nv_dma_map_mmio (nv_dma_device_t *, NvU64, NvU64 *);
void NV_API_CALL nv_dma_unmap_mmio (nv_dma_device_t *, NvU64, NvU64);
void NV_API_CALL nv_dma_cache_invalidate (nv_dma_device_t *, void *);
NvBool NV_API_CALL nv_grdma_pci_topology_supported(nv_state_t *, nv_dma_device_t *);
NvS32 NV_API_CALL nv_start_rc_timer (nv_state_t *);
NvS32 NV_API_CALL nv_stop_rc_timer (nv_state_t *);
@@ -902,8 +991,8 @@ struct drm_gem_object;
NV_STATUS NV_API_CALL nv_dma_import_sgt (nv_dma_device_t *, struct sg_table *, struct drm_gem_object *);
void NV_API_CALL nv_dma_release_sgt(struct sg_table *, struct drm_gem_object *);
NV_STATUS NV_API_CALL nv_dma_import_dma_buf (nv_dma_device_t *, struct dma_buf *, NvU32 *, struct sg_table **, nv_dma_buf_t **);
NV_STATUS NV_API_CALL nv_dma_import_from_fd (nv_dma_device_t *, NvS32, NvU32 *, struct sg_table **, nv_dma_buf_t **);
NV_STATUS NV_API_CALL nv_dma_import_dma_buf (nv_dma_device_t *, struct dma_buf *, NvBool, NvU32 *, struct sg_table **, nv_dma_buf_t **);
NV_STATUS NV_API_CALL nv_dma_import_from_fd (nv_dma_device_t *, NvS32, NvBool, NvU32 *, struct sg_table **, nv_dma_buf_t **);
void NV_API_CALL nv_dma_release_dma_buf (nv_dma_buf_t *);
void NV_API_CALL nv_schedule_uvm_isr (nv_state_t *);
@@ -914,6 +1003,10 @@ void NV_API_CALL nv_schedule_uvm_resume_p2p (NvU8 *);
NvBool NV_API_CALL nv_platform_supports_s0ix (void);
NvBool NV_API_CALL nv_s2idle_pm_configured (void);
NvBool NV_API_CALL nv_pci_tegra_register_power_domain (nv_state_t *, NvBool);
NvBool NV_API_CALL nv_pci_tegra_pm_init (nv_state_t *);
void NV_API_CALL nv_pci_tegra_pm_deinit (nv_state_t *);
NvBool NV_API_CALL nv_is_chassis_notebook (void);
void NV_API_CALL nv_allow_runtime_suspend (nv_state_t *nv);
void NV_API_CALL nv_disallow_runtime_suspend (nv_state_t *nv);
@@ -922,9 +1015,58 @@ typedef void (*nvTegraDceClientIpcCallback)(NvU32, NvU32, NvU32, void *, void *)
NV_STATUS NV_API_CALL nv_get_num_phys_pages (void *, NvU32 *);
NV_STATUS NV_API_CALL nv_get_phys_pages (void *, void *, NvU32 *);
void NV_API_CALL nv_get_disp_smmu_stream_ids (nv_state_t *, NvU32 *, NvU32 *);
typedef struct TEGRA_IMP_IMPORT_DATA TEGRA_IMP_IMPORT_DATA;
typedef struct nv_i2c_msg_s nv_i2c_msg_t;
NV_STATUS NV_API_CALL nv_bpmp_send_mrq (nv_state_t *, NvU32, const void *, NvU32, void *, NvU32, NvS32 *, NvS32 *);
NV_STATUS NV_API_CALL nv_i2c_transfer(nv_state_t *, NvU32, NvU8, nv_i2c_msg_t *, int);
void NV_API_CALL nv_i2c_unregister_clients(nv_state_t *);
NV_STATUS NV_API_CALL nv_i2c_bus_status(nv_state_t *, NvU32, NvS32 *, NvS32 *);
NV_STATUS NV_API_CALL nv_imp_get_import_data (TEGRA_IMP_IMPORT_DATA *);
NV_STATUS NV_API_CALL nv_imp_enable_disable_rfl (nv_state_t *nv, NvBool bEnable);
NV_STATUS NV_API_CALL nv_imp_icc_set_bw (nv_state_t *nv, NvU32 avg_bw_kbps, NvU32 floor_bw_kbps);
NV_STATUS NV_API_CALL nv_get_num_dpaux_instances(nv_state_t *nv, NvU32 *num_instances);
NV_STATUS NV_API_CALL nv_get_tegra_brightness_level(nv_state_t *, NvU32 *);
NV_STATUS NV_API_CALL nv_set_tegra_brightness_level(nv_state_t *, NvU32);
NV_STATUS NV_API_CALL nv_soc_device_reset (nv_state_t *);
NV_STATUS NV_API_CALL nv_soc_pm_powergate (nv_state_t *);
NV_STATUS NV_API_CALL nv_soc_pm_unpowergate (nv_state_t *);
NV_STATUS NV_API_CALL nv_gpio_get_pin_state(nv_state_t *, NvU32, NvU32 *);
void NV_API_CALL nv_gpio_set_pin_state(nv_state_t *, NvU32, NvU32);
NV_STATUS NV_API_CALL nv_gpio_set_pin_direction(nv_state_t *, NvU32, NvU32);
NV_STATUS NV_API_CALL nv_gpio_get_pin_direction(nv_state_t *, NvU32, NvU32 *);
NV_STATUS NV_API_CALL nv_gpio_get_pin_number(nv_state_t *, NvU32, NvU32 *);
NvBool NV_API_CALL nv_gpio_get_pin_interrupt_status(nv_state_t *, NvU32, NvU32);
NV_STATUS NV_API_CALL nv_gpio_set_pin_interrupt(nv_state_t *, NvU32, NvU32);
NvU32 NV_API_CALL nv_tegra_get_rm_interface_type(NvU32);
NV_STATUS NV_API_CALL nv_tegra_dce_register_ipc_client(NvU32, void *, nvTegraDceClientIpcCallback, NvU32 *);
NV_STATUS NV_API_CALL nv_tegra_dce_client_ipc_send_recv(NvU32, void *, NvU32);
NV_STATUS NV_API_CALL nv_tegra_dce_unregister_ipc_client(NvU32);
NV_STATUS NV_API_CALL nv_dsi_parse_panel_props(nv_state_t *, void *);
NvBool NV_API_CALL nv_dsi_is_panel_connected(nv_state_t *);
NV_STATUS NV_API_CALL nv_dsi_panel_enable(nv_state_t *, void *);
NV_STATUS NV_API_CALL nv_dsi_panel_reset(nv_state_t *, void *);
void NV_API_CALL nv_dsi_panel_disable(nv_state_t *, void *);
void NV_API_CALL nv_dsi_panel_cleanup(nv_state_t *, void *);
NV_STATUS NV_API_CALL nv_soc_mipi_cal_reset(nv_state_t *);
NvU32 NV_API_CALL nv_soc_fuse_register_read (NvU32 addr);
NvBool NV_API_CALL nv_get_hdcp_enabled(nv_state_t *nv);
NV_STATUS NV_API_CALL nv_get_valid_window_head_mask(nv_state_t *nv, NvU64 *);
NV_STATUS NV_API_CALL nv_dp_uphy_pll_init(nv_state_t *, NvU32, NvU32);
NV_STATUS NV_API_CALL nv_dp_uphy_pll_deinit(nv_state_t *);
NV_STATUS NV_API_CALL nv_soc_i2c_hsp_semaphore_acquire(NvU32 ownerId, NvBool bAcquire, NvU64 timeout);
typedef void (*nv_soc_tsec_cb_func_t)(void*, void*);
NvU32 NV_API_CALL nv_soc_tsec_send_cmd(void* cmd, nv_soc_tsec_cb_func_t cb_func, void* cb_context);
NvU32 NV_API_CALL nv_soc_tsec_event_register(nv_soc_tsec_cb_func_t cb_func, void* cb_context, NvBool is_init_event);
NvU32 NV_API_CALL nv_soc_tsec_event_unregister(NvBool is_init_event);
void* NV_API_CALL nv_soc_tsec_alloc_mem_desc(NvU32 num_bytes, NvU32 *flcn_addr);
void NV_API_CALL nv_soc_tsec_free_mem_desc(void *mem_desc);
NvBool NV_API_CALL nv_is_clk_enabled (nv_state_t *, TEGRASOC_WHICH_CLK);
NV_STATUS NV_API_CALL nv_set_parent (nv_state_t *, TEGRASOC_WHICH_CLK, TEGRASOC_WHICH_CLK);
NV_STATUS NV_API_CALL nv_get_parent (nv_state_t *, TEGRASOC_WHICH_CLK, TEGRASOC_WHICH_CLK*);
NV_STATUS NV_API_CALL nv_clk_get_handles (nv_state_t *);
void NV_API_CALL nv_clk_clear_handles (nv_state_t *);
NV_STATUS NV_API_CALL nv_enable_clk (nv_state_t *, TEGRASOC_WHICH_CLK);
@@ -961,6 +1103,7 @@ NvBool NV_API_CALL rm_isr (nvidia_stack_t *, nv_state_t *
void NV_API_CALL rm_isr_bh (nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_isr_bh_unlocked (nvidia_stack_t *, nv_state_t *);
NvBool NV_API_CALL rm_is_msix_allowed (nvidia_stack_t *, nv_state_t *);
NV_STATUS NV_API_CALL rm_pmu_perfmon_get_load (nvidia_stack_t *, nv_state_t *, NvU32 *, TEGRASOC_DEVFREQ_CLK);
NV_STATUS NV_API_CALL rm_power_management (nvidia_stack_t *, nv_state_t *, nv_pm_action_t);
NV_STATUS NV_API_CALL rm_stop_user_channels (nvidia_stack_t *, nv_state_t *);
NV_STATUS NV_API_CALL rm_restart_user_channels (nvidia_stack_t *, nv_state_t *);
@@ -1008,14 +1151,16 @@ void NV_API_CALL rm_request_dnotifier_state (nvidia_stack_t *, n
void NV_API_CALL rm_disable_gpu_state_persistence (nvidia_stack_t *sp, nv_state_t *);
NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU64 *, NvU64 *, NvU64 *, NvU64 *, NvU64, NvU64, NvU64, NvU64, void (*)(void *), void *);
NV_STATUS NV_API_CALL rm_p2p_destroy_mapping (nvidia_stack_t *, NvU64);
NV_STATUS NV_API_CALL rm_p2p_get_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, NvU64, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU8 **, void *);
NV_STATUS NV_API_CALL rm_p2p_get_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, NvU64, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU8 **, void *, NvBool *);
NV_STATUS NV_API_CALL rm_p2p_get_gpu_info (nvidia_stack_t *, NvU64, NvU64, NvU8 **, void **);
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, NvBool, void *, void *, void **);
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, NvBool, void *, void *, void **, NvBool *);
NV_STATUS NV_API_CALL rm_p2p_register_callback (nvidia_stack_t *, NvU64, NvU64, NvU64, void *, void (*)(void *), void *);
NV_STATUS NV_API_CALL rm_p2p_put_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, void *);
NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *, void *);
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle,
NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **,
NvBool *, NvU32 *, NvBool *, nv_memory_type_t *);
void NV_API_CALL rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
NV_STATUS NV_API_CALL rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *,
NvHandle, NvHandle, MemoryRange,
@@ -1026,7 +1171,7 @@ void NV_API_CALL rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t
NV_STATUS NV_API_CALL rm_dma_buf_get_client_and_device(nvidia_stack_t *,
nv_state_t *, NvHandle, NvHandle,
NvU8, NvHandle *, NvHandle *,
NvHandle *, void **, NvBool *);
NvHandle *, void **, NvBool *, NvBool *);
void NV_API_CALL rm_dma_buf_put_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, void *);
void NV_API_CALL rm_kernel_rmapi_op(nvidia_stack_t *sp, void *ops_cmd);
@@ -1046,6 +1191,7 @@ NV_STATUS NV_API_CALL rm_set_external_kernel_client_count(nvidia_stack_t *, nv_
NV_STATUS NV_API_CALL rm_schedule_gpu_wakeup(nvidia_stack_t *, nv_state_t *);
NvBool NV_API_CALL rm_disable_iomap_wc(void);
void NV_API_CALL rm_init_tegra_dynamic_power_management(nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_init_dynamic_power_management(nvidia_stack_t *, nv_state_t *, NvBool);
void NV_API_CALL rm_cleanup_dynamic_power_management(nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_enable_dynamic_power_management(nvidia_stack_t *, nv_state_t *);
@@ -1070,7 +1216,7 @@ NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *,
NvU64 *, NvU64 *, NvU32 *, NvBool *, NvU8 *);
NV_STATUS NV_API_CALL nv_vgpu_update_sysfs_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU32);
NV_STATUS NV_API_CALL nv_vgpu_update_sysfs_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU32 *);
NV_STATUS NV_API_CALL nv_vgpu_get_hbm_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU64 *);
NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *);
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *, NvU32, NvBool *);

View File

@@ -46,6 +46,7 @@ struct UvmOpsUvmEvents;
#include "nvgputypes.h"
#include "nvstatus.h"
#include "nv_uvm_types.h"
#include "nv_uvm_user_types.h"
// Define the type here as it's Linux specific, used only by the Linux specific

View File

@@ -22,7 +22,8 @@
*/
//
// This file provides common types for both UVM driver and RM's UVM interface.
// This file provides common types for both the UVM kernel driver and RM's UVM
// interface.
//
#ifndef _NV_UVM_TYPES_H_
@@ -32,21 +33,9 @@
#include "nvstatus.h"
#include "nvgputypes.h"
#include "nvCpuUuid.h"
#include "nv_uvm_user_types.h" // For UvmGpuCachingType, UvmGpuMappingType, etc
//
// Default Page Size if left "0" because in RM BIG page size is default & there
// are multiple BIG page sizes in RM. These defines are used as flags to "0"
// should be OK when user is not sure which pagesize allocation it wants
//
#define UVM_PAGE_SIZE_DEFAULT 0x0ULL
#define UVM_PAGE_SIZE_4K 0x1000ULL
#define UVM_PAGE_SIZE_64K 0x10000ULL
#define UVM_PAGE_SIZE_128K 0x20000ULL
#define UVM_PAGE_SIZE_2M 0x200000ULL
#define UVM_PAGE_SIZE_512M 0x20000000ULL
#define UVM_PAGE_SIZE_256G 0x4000000000ULL
//
// When modifying flags, make sure they are compatible with the mirrored
// PMA_* flags in phys_mem_allocator.h.
@@ -81,9 +70,6 @@
//
#define UVM_PMA_CALLED_FROM_PMA_EVICTION 16384
#define UVM_UUID_LEN 16
#define UVM_SW_OBJ_SUBCHANNEL 5
typedef unsigned long long UvmGpuPointer;
//
@@ -447,80 +433,22 @@ typedef struct UvmGpuAllocInfo_tag
// SEV or GPU CC modes are enabled. Ignored otherwise
} UvmGpuAllocInfo;
typedef enum
{
UVM_VIRT_MODE_NONE = 0, // Baremetal or passthrough virtualization
UVM_VIRT_MODE_LEGACY = 1, // Virtualization without SRIOV support
UVM_VIRT_MODE_SRIOV_HEAVY = 2, // Virtualization with SRIOV Heavy configured
UVM_VIRT_MODE_SRIOV_STANDARD = 3, // Virtualization with SRIOV Standard configured
UVM_VIRT_MODE_COUNT = 4,
} UVM_VIRT_MODE;
// !!! The following enums (with UvmRm prefix) are defined and documented in
// mm/uvm/interface/uvm_types.h and must be mirrored. Please refer to that file
// for more details.
// UVM GPU mapping types
typedef enum
{
UvmRmGpuMappingTypeDefault = 0,
UvmRmGpuMappingTypeReadWriteAtomic = 1,
UvmRmGpuMappingTypeReadWrite = 2,
UvmRmGpuMappingTypeReadOnly = 3,
UvmRmGpuMappingTypeCount = 4
} UvmRmGpuMappingType;
// UVM GPU caching types
typedef enum
{
UvmRmGpuCachingTypeDefault = 0,
UvmRmGpuCachingTypeForceUncached = 1,
UvmRmGpuCachingTypeForceCached = 2,
UvmRmGpuCachingTypeCount = 3
} UvmRmGpuCachingType;
// UVM GPU format types
typedef enum {
UvmRmGpuFormatTypeDefault = 0,
UvmRmGpuFormatTypeBlockLinear = 1,
UvmRmGpuFormatTypeCount = 2
} UvmRmGpuFormatType;
// UVM GPU Element bits types
typedef enum {
UvmRmGpuFormatElementBitsDefault = 0,
UvmRmGpuFormatElementBits8 = 1,
UvmRmGpuFormatElementBits16 = 2,
// Cuda does not support 24-bit width
UvmRmGpuFormatElementBits32 = 4,
UvmRmGpuFormatElementBits64 = 5,
UvmRmGpuFormatElementBits128 = 6,
UvmRmGpuFormatElementBitsCount = 7
} UvmRmGpuFormatElementBits;
// UVM GPU Compression types
typedef enum {
UvmRmGpuCompressionTypeDefault = 0,
UvmRmGpuCompressionTypeEnabledNoPlc = 1,
UvmRmGpuCompressionTypeCount = 2
} UvmRmGpuCompressionType;
typedef struct UvmGpuExternalMappingInfo_tag
{
// In: GPU caching ability.
UvmRmGpuCachingType cachingType;
UvmGpuCachingType cachingType;
// In: Virtual permissions.
UvmRmGpuMappingType mappingType;
UvmGpuMappingType mappingType;
// In: RM virtual mapping memory format
UvmRmGpuFormatType formatType;
UvmGpuFormatType formatType;
// In: RM virtual mapping element bits
UvmRmGpuFormatElementBits elementBits;
UvmGpuFormatElementBits elementBits;
// In: RM virtual compression type
UvmRmGpuCompressionType compressionType;
UvmGpuCompressionType compressionType;
// In: Size of the buffer to store PTEs (in bytes).
NvU64 pteBufferSize;
@@ -546,6 +474,9 @@ typedef struct UvmGpuExternalMappingInfo_tag
// Out: PTE size (in bytes)
NvU32 pteSize;
// Out: UVM needs to invalidate L2 at unmap
NvBool bNeedL2InvalidateAtUnmap;
} UvmGpuExternalMappingInfo;
typedef struct UvmGpuExternalPhysAddrInfo_tag
@@ -553,7 +484,7 @@ typedef struct UvmGpuExternalPhysAddrInfo_tag
// In: Virtual permissions. Returns
// NV_ERR_INVALID_ACCESS_TYPE if input is
// inaccurate
UvmRmGpuMappingType mappingType;
UvmGpuMappingType mappingType;
// In: Size of the buffer to store PhysAddrs (in bytes).
NvU64 physAddrBufferSize;
@@ -603,6 +534,11 @@ typedef struct UvmGpuP2PCapsParams_tag
// second, not taking into account the protocols overhead. The reported
// bandwidth for indirect peers is zero.
NvU32 totalLinkLineRateMBps;
// Out: IOMMU/DMA mappings of bar1 of the respective peer vidmem.
// Size is 0 if bar1 p2p is not supported.
NvU64 bar1DmaAddress[2];
NvU64 bar1DmaSize[2];
} UvmGpuP2PCapsParams;
// Platform-wide information
@@ -746,6 +682,9 @@ typedef struct UvmGpuInfo_tag
// GPU supports ATS capability
NvBool atsSupport;
// GPU supports Non-PASID ATS capability
NvBool nonPasidAtsSupport;
} UvmGpuInfo;
typedef struct UvmGpuFbInfo_tag
@@ -759,6 +698,7 @@ typedef struct UvmGpuFbInfo_tag
NvBool bZeroFb; // Zero FB mode enabled.
NvU64 maxVidmemPageSize; // Largest GPU page size to access vidmem.
NvBool bStaticBar1Enabled; // Static BAR1 mode is enabled
NvBool bStaticBar1WriteCombined; // Write combined is enabled
NvU64 staticBar1StartOffset; // The start offset of the the static mapping
NvU64 staticBar1Size; // The size of the static mapping
NvU32 heapStart; // The start offset of heap in KB, helpful for MIG
@@ -795,19 +735,6 @@ typedef struct UvmPmaAllocationOptions_tag
NvU32 resultFlags; // valid if the allocation function returns NV_OK
} UvmPmaAllocationOptions;
//
// Mirrored in PMA (PMA_STATS)
//
typedef struct UvmPmaStatistics_tag
{
volatile NvU64 numPages2m; // PMA-wide 2MB pages count across all regions
volatile NvU64 numFreePages64k; // PMA-wide free 64KB page count across all regions
volatile NvU64 numFreePages2m; // PMA-wide free 2MB pages count across all regions
volatile NvU64 numPages2mProtected; // PMA-wide 2MB pages count in protected memory
volatile NvU64 numFreePages64kProtected; // PMA-wide free 64KB page count in protected memory
volatile NvU64 numFreePages2mProtected; // PMA-wide free 2MB pages count in protected memory
} UvmPmaStatistics;
/*******************************************************************************
uvmEventSuspend
This function will be called by the GPU driver to signal to UVM that the
@@ -1100,14 +1027,6 @@ typedef struct UvmGpuAccessCntrInfo_tag
NvHandle accessCntrBufferHandle;
} UvmGpuAccessCntrInfo;
typedef enum
{
UVM_ACCESS_COUNTER_GRANULARITY_64K = 1,
UVM_ACCESS_COUNTER_GRANULARITY_2M = 2,
UVM_ACCESS_COUNTER_GRANULARITY_16M = 3,
UVM_ACCESS_COUNTER_GRANULARITY_16G = 4,
} UVM_ACCESS_COUNTER_GRANULARITY;
typedef struct UvmGpuAccessCntrConfig_tag
{
NvU32 granularity;

View File

@@ -0,0 +1,166 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
//
// This file provides common types for the UVM kernel driver, UVM user layer,
// and RM's UVM interface.
//
#ifndef _NV_UVM_USER_TYPES_H_
#define _NV_UVM_USER_TYPES_H_
#include "nvtypes.h"
//
// Default Page Size if left "0" because in RM BIG page size is default & there
// are multiple BIG page sizes in RM. These defines are used as flags to "0"
// should be OK when user is not sure which pagesize allocation it wants
//
#define UVM_PAGE_SIZE_DEFAULT 0x0ULL
#define UVM_PAGE_SIZE_4K 0x1000ULL
#define UVM_PAGE_SIZE_64K 0x10000ULL
#define UVM_PAGE_SIZE_128K 0x20000ULL
#define UVM_PAGE_SIZE_2M 0x200000ULL
#define UVM_PAGE_SIZE_512M 0x20000000ULL
#define UVM_PAGE_SIZE_256G 0x4000000000ULL
typedef enum
{
UVM_VIRT_MODE_NONE = 0, // Baremetal or passthrough virtualization
UVM_VIRT_MODE_LEGACY = 1, // Virtualization without SRIOV support
UVM_VIRT_MODE_SRIOV_HEAVY = 2, // Virtualization with SRIOV Heavy configured
UVM_VIRT_MODE_SRIOV_STANDARD = 3, // Virtualization with SRIOV Standard configured
UVM_VIRT_MODE_COUNT = 4,
} UVM_VIRT_MODE;
//------------------------------------------------------------------------------
// UVM GPU mapping types
//
// These types indicate the kinds of accesses allowed from a given GPU at the
// specified virtual address range. There are 3 basic kinds of accesses: read,
// write and atomics. Each type indicates what kinds of accesses are allowed.
// Accesses of any disallowed kind are fatal. The "Default" type specifies that
// the UVM driver should decide on the types of accesses allowed.
//------------------------------------------------------------------------------
typedef enum
{
UvmGpuMappingTypeDefault = 0,
UvmGpuMappingTypeReadWriteAtomic = 1,
UvmGpuMappingTypeReadWrite = 2,
UvmGpuMappingTypeReadOnly = 3,
UvmGpuMappingTypeCount = 4
} UvmGpuMappingType;
//------------------------------------------------------------------------------
// UVM GPU caching types
//
// These types indicate the cacheability of the specified virtual address range
// from a given GPU. The "Default" type specifies that the UVM driver should
// set caching on or off as required to follow the UVM coherence model. The
// "ForceUncached" and "ForceCached" types will always turn caching off or on
// respectively. These two types override the cacheability specified by the UVM
// coherence model.
//------------------------------------------------------------------------------
typedef enum
{
UvmGpuCachingTypeDefault = 0,
UvmGpuCachingTypeForceUncached = 1,
UvmGpuCachingTypeForceCached = 2,
UvmGpuCachingTypeCount = 3
} UvmGpuCachingType;
//------------------------------------------------------------------------------
// UVM GPU format types
//
// These types indicate the memory format of the specified virtual address
// range for a given GPU. The "Default" type specifies that the UVM driver will
// detect the format based on the allocation and is mutually inclusive with
// UvmGpuFormatElementBitsDefault.
//------------------------------------------------------------------------------
typedef enum {
UvmGpuFormatTypeDefault = 0,
UvmGpuFormatTypeBlockLinear = 1,
UvmGpuFormatTypeCount = 2
} UvmGpuFormatType;
//------------------------------------------------------------------------------
// UVM GPU Element bits types
//
// These types indicate the element size of the specified virtual address range
// for a given GPU. The "Default" type specifies that the UVM driver will
// detect the element size based on the allocation and is mutually inclusive
// with UvmGpuFormatTypeDefault. The element size is specified in bits:
// UvmGpuFormatElementBits8 uses the 8-bits format.
//------------------------------------------------------------------------------
typedef enum {
UvmGpuFormatElementBitsDefault = 0,
UvmGpuFormatElementBits8 = 1,
UvmGpuFormatElementBits16 = 2,
// Cuda does not support 24-bit width
UvmGpuFormatElementBits32 = 4,
UvmGpuFormatElementBits64 = 5,
UvmGpuFormatElementBits128 = 6,
UvmGpuFormatElementBitsCount = 7
} UvmGpuFormatElementBits;
//------------------------------------------------------------------------------
// UVM GPU Compression types
//
// These types indicate the compression type of the specified virtual address
// range for a given GPU. The "Default" type specifies that the UVM driver will
// detect the compression attributes based on the allocation. Any type other
// than the default will override the compression behavior of the physical
// allocation. UvmGpuCompressionTypeEnabledNoPlc will disable PLC but enables
// generic compression. UvmGpuCompressionTypeEnabledNoPlc type is only supported
// on Turing plus GPUs. Since UvmGpuCompressionTypeEnabledNoPlc type enables
// generic compression, it can only be used when the compression attribute of
// the underlying physical allocation is enabled.
//------------------------------------------------------------------------------
typedef enum {
UvmGpuCompressionTypeDefault = 0,
UvmGpuCompressionTypeEnabledNoPlc = 1,
UvmGpuCompressionTypeCount = 2
} UvmGpuCompressionType;
//
// Mirrored in PMA (PMA_STATS)
//
typedef struct UvmPmaStatistics_tag
{
volatile NvU64 numPages2m; // PMA-wide 2MB pages count across all regions
volatile NvU64 numFreePages64k; // PMA-wide free 64KB page count across all regions
volatile NvU64 numFreePages2m; // PMA-wide free 2MB pages count across all regions
volatile NvU64 numPages2mProtected; // PMA-wide 2MB pages count in protected memory
volatile NvU64 numFreePages64kProtected; // PMA-wide free 64KB page count in protected memory
volatile NvU64 numFreePages2mProtected; // PMA-wide free 2MB pages count in protected memory
} UvmPmaStatistics;
typedef enum
{
UVM_ACCESS_COUNTER_GRANULARITY_64K = 1,
UVM_ACCESS_COUNTER_GRANULARITY_2M = 2,
UVM_ACCESS_COUNTER_GRANULARITY_16M = 3,
UVM_ACCESS_COUNTER_GRANULARITY_16G = 4,
} UVM_ACCESS_COUNTER_GRANULARITY;
#endif // _NV_UVM_USER_TYPES_H_

View File

@@ -562,9 +562,6 @@ struct NvKmsKapiCreateSurfaceParams {
* explicit_layout is NV_TRUE and layout is
* NvKmsSurfaceMemoryLayoutBlockLinear */
NvU8 log2GobsPerBlockY;
/* [IN] Whether a surface can be updated directly on the screen */
NvBool noDisplayCaching;
};
enum NvKmsKapiAllocationType {
@@ -573,6 +570,28 @@ enum NvKmsKapiAllocationType {
NVKMS_KAPI_ALLOCATION_TYPE_OFFSCREEN = 2,
};
struct NvKmsKapiAllocateMemoryParams {
/* [IN] BlockLinear or Pitch */
enum NvKmsSurfaceMemoryLayout layout;
/* [IN] Allocation type */
enum NvKmsKapiAllocationType type;
/* [IN] Size, in bytes, of the memory to allocate */
NvU64 size;
/* [IN] Whether memory can be updated directly on the screen */
NvBool noDisplayCaching;
/* [IN] Whether to allocate memory from video memory or system memory */
NvBool useVideoMemory;
/* [IN/OUT] For input, non-zero if compression backing store should be
* allocated for the memory, for output, non-zero if compression backing
* store was allocated for the memory */
NvU8 *compressible;
};
typedef enum NvKmsKapiRegisterWaiterResultRec {
NVKMS_KAPI_REG_WAITER_FAILED,
NVKMS_KAPI_REG_WAITER_SUCCESS,
@@ -602,14 +621,19 @@ struct NvKmsKapiFunctionsTable {
} systemInfo;
/*!
* Enumerate the available physical GPUs that can be used with NVKMS.
* Enumerate the available GPUs that can be used with NVKMS.
*
* \param [out] gpuInfo The information of the enumerated GPUs.
* It is an array of NVIDIA_MAX_GPUS elements.
* The gpuCallback will be called with a NvKmsKapiGpuInfo for each
* physical and MIG GPU currently available in the system.
*
* \param [in] gpuCallback Client function to handle each GPU.
*
* \return Count of enumerated gpus.
*/
NvU32 (*enumerateGpus)(struct NvKmsKapiGpuInfo *kapiGpuInfo);
NvU32 (*enumerateGpus)
(
void (*gpuCallback)(const struct NvKmsKapiGpuInfo *info)
);
/*!
* Allocate an NVK device using which you can query/allocate resources on
@@ -839,66 +863,22 @@ struct NvKmsKapiFunctionsTable {
);
/*!
* Allocate some unformatted video memory of the specified size.
* Allocate some unformatted video or system memory of the specified size.
*
* This function allocates video memory on the specified GPU.
* It should be suitable for mapping on the CPU as a pitch
* linear or block-linear surface.
* This function allocates video or system memory on the specified GPU. It
* should be suitable for mapping on the CPU as a pitch linear or
* block-linear surface.
*
* \param [in] device A device allocated using allocateDevice().
*
* \param [in] layout BlockLinear or Pitch.
*
* \param [in] type Allocation type.
*
* \param [in] size Size, in bytes, of the memory to allocate.
*
* \param [in/out] compressible For input, non-zero if compression
* backing store should be allocated for
* the memory, for output, non-zero if
* compression backing store was
* allocated for the memory.
* \param [in/out] params Parameters required for memory allocation.
*
* \return An valid memory handle on success, NULL on failure.
*/
struct NvKmsKapiMemory* (*allocateVideoMemory)
struct NvKmsKapiMemory* (*allocateMemory)
(
struct NvKmsKapiDevice *device,
enum NvKmsSurfaceMemoryLayout layout,
enum NvKmsKapiAllocationType type,
NvU64 size,
NvU8 *compressible
);
/*!
* Allocate some unformatted system memory of the specified size.
*
* This function allocates system memory . It should be suitable
* for mapping on the CPU as a pitch linear or block-linear surface.
*
* \param [in] device A device allocated using allocateDevice().
*
* \param [in] layout BlockLinear or Pitch.
*
* \param [in] type Allocation type.
*
* \param [in] size Size, in bytes, of the memory to allocate.
*
* \param [in/out] compressible For input, non-zero if compression
* backing store should be allocated for
* the memory, for output, non-zero if
* compression backing store was
* allocated for the memory.
*
* \return An valid memory handle on success, NULL on failure.
*/
struct NvKmsKapiMemory* (*allocateSystemMemory)
(
struct NvKmsKapiDevice *device,
enum NvKmsSurfaceMemoryLayout layout,
enum NvKmsKapiAllocationType type,
NvU64 size,
NvU8 *compressible
struct NvKmsKapiAllocateMemoryParams *params
);
/*!

View File

@@ -580,7 +580,6 @@ nvMaskPos32(const NvU32 mask, const NvU32 bitIdx)
n64 = BIT_IDX_64(LOWESTBIT(n64));\
}
// Destructive operation on n32
#define HIGHESTBITIDX_32(n32) \
{ \
@@ -592,6 +591,17 @@ nvMaskPos32(const NvU32 mask, const NvU32 bitIdx)
n32 = count; \
}
// Destructive operation on n64
#define HIGHESTBITIDX_64(n64) \
{ \
NvU64 count = 0; \
while (n64 >>= 1) \
{ \
count++; \
} \
n64 = count; \
}
// Destructive operation on n32
#define ROUNDUP_POW2(n32) \
{ \

View File

@@ -164,6 +164,7 @@ NV_STATUS_CODE(NV_ERR_RESOURCE_RETIREMENT_ERROR, 0x00000086, "An error occ
NV_STATUS_CODE(NV_ERR_FABRIC_STATE_OUT_OF_SYNC, 0x00000087, "NVLink fabric state cached by the driver is out of sync")
NV_STATUS_CODE(NV_ERR_BUFFER_FULL, 0x00000088, "Buffer is full")
NV_STATUS_CODE(NV_ERR_BUFFER_EMPTY, 0x00000089, "Buffer is empty")
NV_STATUS_CODE(NV_ERR_MC_FLA_OFFSET_TABLE_FULL, 0x0000008A, "Multicast FLA offset table has no available slots")
// Warnings:
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -24,10 +24,6 @@
#ifndef NVTYPES_INCLUDED
#define NVTYPES_INCLUDED
#ifdef __cplusplus
extern "C" {
#endif
#include "cpuopsys.h"
#ifndef NVTYPES_USE_STDINT
@@ -55,6 +51,10 @@ extern "C" {
#endif
#endif // __cplusplus
#ifdef __cplusplus
extern "C" {
#endif
#if defined(MAKE_NV64TYPES_8BYTES_ALIGNED) && defined(__i386__)
// ensure or force 8-bytes alignment of NV 64-bit types
#define OPTIONAL_ALIGN8_ATTR __attribute__((aligned(8)))

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -72,10 +72,10 @@ typedef struct os_wait_queue os_wait_queue;
NV_STATUS NV_API_CALL os_alloc_mem (void **, NvU64);
void NV_API_CALL os_free_mem (void *);
NV_STATUS NV_API_CALL os_get_current_time (NvU32 *, NvU32 *);
NvU64 NV_API_CALL os_get_current_tick (void);
NvU64 NV_API_CALL os_get_current_tick_hr (void);
NvU64 NV_API_CALL os_get_tick_resolution (void);
NV_STATUS NV_API_CALL os_get_system_time (NvU32 *, NvU32 *);
NvU64 NV_API_CALL os_get_monotonic_time_ns (void);
NvU64 NV_API_CALL os_get_monotonic_time_ns_hr (void);
NvU64 NV_API_CALL os_get_monotonic_tick_resolution_ns (void);
NV_STATUS NV_API_CALL os_delay (NvU32);
NV_STATUS NV_API_CALL os_delay_us (NvU32);
NvU64 NV_API_CALL os_get_cpu_frequency (void);
@@ -208,6 +208,7 @@ void NV_API_CALL os_nv_cap_close_fd (int);
NvS32 NV_API_CALL os_imex_channel_get (NvU64);
NvS32 NV_API_CALL os_imex_channel_count (void);
NV_STATUS NV_API_CALL os_get_tegra_platform (NvU32 *);
enum os_pci_req_atomics_type {
OS_INTF_PCIE_REQ_ATOMICS_32BIT,
OS_INTF_PCIE_REQ_ATOMICS_64BIT,
@@ -223,8 +224,11 @@ void* NV_API_CALL os_get_pid_info(void);
void NV_API_CALL os_put_pid_info(void *pid_info);
NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid);
NvBool NV_API_CALL os_is_init_ns(void);
NV_STATUS NV_API_CALL os_iommu_sva_bind(void *arg, void **handle, NvU32 *pasid);
void NV_API_CALL os_iommu_sva_unbind(void *handle);
extern NvU32 os_page_size;
extern NvU64 os_page_size;
extern NvU64 os_max_page_size;
extern NvU64 os_page_mask;
extern NvU8 os_page_shift;
extern NvBool os_cc_enabled;
@@ -266,4 +270,9 @@ int NV_API_CALL nv_printf(NvU32 debuglevel, const char *printf_format, ...);
#define NV_LOCK_USER_PAGES_FLAGS_WRITE_NO 0x00000000
#define NV_LOCK_USER_PAGES_FLAGS_WRITE_YES 0x00000001
// NV OS Tegra platform type defines
#define NV_OS_TEGRA_PLATFORM_SIM 0
#define NV_OS_TEGRA_PLATFORM_FPGA 1
#define NV_OS_TEGRA_PLATFORM_SILICON 2
#endif /* OS_INTERFACE_H */

View File

@@ -27,6 +27,8 @@ typedef enum
{
NV_OS_GPIO_FUNC_HOTPLUG_A,
NV_OS_GPIO_FUNC_HOTPLUG_B,
NV_OS_GPIO_FUNC_HOTPLUG_C,
NV_OS_GPIO_FUNC_HOTPLUG_D,
} NV_OS_GPIO_FUNC_NAMES;
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -4,106 +4,39 @@ NV_HEADER_PRESENCE_TESTS = \
asm/system.h \
drm/drmP.h \
drm/drm_aperture.h \
drm/drm_auth.h \
drm/drm_gem.h \
drm/drm_crtc.h \
drm/drm_color_mgmt.h \
drm/drm_atomic.h \
drm/drm_atomic_helper.h \
drm/drm_atomic_state_helper.h \
drm/drm_encoder.h \
drm/drm_atomic_uapi.h \
drm/drm_drv.h \
drm/drm_edid.h \
drm/drm_fbdev_generic.h \
drm/drm_fbdev_ttm.h \
drm/drm_client_setup.h \
drm/drm_framebuffer.h \
drm/drm_connector.h \
drm/drm_probe_helper.h \
drm/drm_blend.h \
drm/drm_fourcc.h \
drm/drm_prime.h \
drm/drm_plane.h \
drm/drm_vblank.h \
drm/drm_file.h \
drm/drm_ioctl.h \
drm/drm_device.h \
drm/drm_mode_config.h \
drm/drm_modeset_lock.h \
drm/drm_property.h \
drm/clients/drm_client_setup.h \
dt-bindings/interconnect/tegra_icc_id.h \
generated/autoconf.h \
generated/compile.h \
generated/utsrelease.h \
linux/pfn_t.h \
linux/aperture.h \
linux/dma-direct.h \
linux/efi.h \
linux/kconfig.h \
linux/platform/tegra/mc_utils.h \
linux/printk.h \
linux/ratelimit.h \
linux/prio_tree.h \
linux/log2.h \
linux/of.h \
linux/bug.h \
linux/sched.h \
linux/sched/mm.h \
linux/sched/signal.h \
linux/sched/task.h \
linux/sched/task_stack.h \
xen/ioemu.h \
linux/fence.h \
linux/dma-fence.h \
linux/dma-resv.h \
soc/tegra/chip-id.h \
soc/tegra/fuse.h \
soc/tegra/fuse-helper.h \
soc/tegra/tegra_bpmp.h \
video/nv_internal.h \
linux/platform/tegra/dce/dce-client-ipc.h \
linux/nvhost.h \
linux/nvhost_t194.h \
linux/host1x-next.h \
asm/set_memory.h \
asm/prom.h \
linux/atomic.h \
asm/barrier.h \
sound/hdaudio.h \
asm/pgtable_types.h \
asm/page.h \
linux/stringhash.h \
linux/dma-map-ops.h \
rdma/peer_mem.h \
sound/hda_codec.h \
linux/dma-buf.h \
linux/time.h \
linux/platform_device.h \
linux/mutex.h \
linux/reset.h \
linux/of_platform.h \
linux/of_device.h \
linux/of_gpio.h \
linux/gpio.h \
linux/gpio/consumer.h \
linux/interconnect.h \
linux/pm_runtime.h \
linux/clk.h \
linux/clk-provider.h \
linux/ioasid.h \
linux/stdarg.h \
linux/iosys-map.h \
asm/coco.h \
linux/vfio_pci_core.h \
linux/mdev.h \
soc/tegra/bpmp-abi.h \
soc/tegra/bpmp.h \
linux/sync_file.h \
linux/cc_platform.h \
asm/cpufeature.h \
linux/mpi.h \
asm/mshyperv.h \
crypto/sig.h \
linux/pfn_t.h
crypto/sig.h

View File

@@ -29,12 +29,7 @@
#include <linux/completion.h>
#include <linux/module.h>
#include <linux/mm.h>
#if defined(NV_LINUX_BUG_H_PRESENT)
#include <linux/bug.h>
#else
#include <asm/bug.h>
#endif
// Today's implementation is a little simpler and more limited than the
// API description allows for in nv-kthread-q.h. Details include:

View File

@@ -1,156 +0,0 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __NVIDIA_DMA_FENCE_HELPER_H__
#define __NVIDIA_DMA_FENCE_HELPER_H__
#include "nvidia-drm-conftest.h"
#if defined(NV_DRM_FENCE_AVAILABLE)
/*
* Fence headers are moved to file dma-fence.h and struct fence has
* been renamed to dma_fence by commit -
*
* 2016-10-25 : f54d1867005c3323f5d8ad83eed823e84226c429
*/
#if defined(NV_LINUX_FENCE_H_PRESENT)
#include <linux/fence.h>
#else
#include <linux/dma-fence.h>
#endif
#if defined(NV_LINUX_FENCE_H_PRESENT)
typedef struct fence nv_dma_fence_t;
typedef struct fence_ops nv_dma_fence_ops_t;
typedef struct fence_cb nv_dma_fence_cb_t;
typedef fence_func_t nv_dma_fence_func_t;
#else
typedef struct dma_fence nv_dma_fence_t;
typedef struct dma_fence_ops nv_dma_fence_ops_t;
typedef struct dma_fence_cb nv_dma_fence_cb_t;
typedef dma_fence_func_t nv_dma_fence_func_t;
#endif
#if defined(NV_LINUX_FENCE_H_PRESENT)
#define NV_DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT FENCE_FLAG_ENABLE_SIGNAL_BIT
#else
#define NV_DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT
#endif
static inline bool nv_dma_fence_is_signaled(nv_dma_fence_t *fence) {
#if defined(NV_LINUX_FENCE_H_PRESENT)
return fence_is_signaled(fence);
#else
return dma_fence_is_signaled(fence);
#endif
}
static inline nv_dma_fence_t *nv_dma_fence_get(nv_dma_fence_t *fence)
{
#if defined(NV_LINUX_FENCE_H_PRESENT)
return fence_get(fence);
#else
return dma_fence_get(fence);
#endif
}
static inline void nv_dma_fence_put(nv_dma_fence_t *fence) {
#if defined(NV_LINUX_FENCE_H_PRESENT)
fence_put(fence);
#else
dma_fence_put(fence);
#endif
}
static inline signed long
nv_dma_fence_default_wait(nv_dma_fence_t *fence,
bool intr, signed long timeout) {
#if defined(NV_LINUX_FENCE_H_PRESENT)
return fence_default_wait(fence, intr, timeout);
#else
return dma_fence_default_wait(fence, intr, timeout);
#endif
}
static inline int nv_dma_fence_signal(nv_dma_fence_t *fence) {
#if defined(NV_LINUX_FENCE_H_PRESENT)
return fence_signal(fence);
#else
return dma_fence_signal(fence);
#endif
}
static inline int nv_dma_fence_signal_locked(nv_dma_fence_t *fence) {
#if defined(NV_LINUX_FENCE_H_PRESENT)
return fence_signal_locked(fence);
#else
return dma_fence_signal_locked(fence);
#endif
}
static inline u64 nv_dma_fence_context_alloc(unsigned num) {
#if defined(NV_LINUX_FENCE_H_PRESENT)
return fence_context_alloc(num);
#else
return dma_fence_context_alloc(num);
#endif
}
static inline void
nv_dma_fence_init(nv_dma_fence_t *fence,
const nv_dma_fence_ops_t *ops,
spinlock_t *lock, u64 context, uint64_t seqno) {
#if defined(NV_LINUX_FENCE_H_PRESENT)
fence_init(fence, ops, lock, context, seqno);
#else
dma_fence_init(fence, ops, lock, context, seqno);
#endif
}
static inline void
nv_dma_fence_set_error(nv_dma_fence_t *fence,
int error) {
#if defined(NV_DMA_FENCE_SET_ERROR_PRESENT)
return dma_fence_set_error(fence, error);
#elif defined(NV_FENCE_SET_ERROR_PRESENT)
return fence_set_error(fence, error);
#else
fence->status = error;
#endif
}
static inline int
nv_dma_fence_add_callback(nv_dma_fence_t *fence,
nv_dma_fence_cb_t *cb,
nv_dma_fence_func_t func) {
#if defined(NV_LINUX_FENCE_H_PRESENT)
return fence_add_callback(fence, cb, func);
#else
return dma_fence_add_callback(fence, cb, func);
#endif
}
#endif /* defined(NV_DRM_FENCE_AVAILABLE) */
#endif /* __NVIDIA_DMA_FENCE_HELPER_H__ */

View File

@@ -25,8 +25,6 @@
#include "nvidia-drm-conftest.h"
#if defined(NV_DRM_FENCE_AVAILABLE)
/*
* linux/reservation.h is renamed to linux/dma-resv.h, by commit
* 52791eeec1d9 (dma-buf: rename reservation_object to dma_resv)
@@ -39,7 +37,7 @@
#include <linux/reservation.h>
#endif
#include <nvidia-dma-fence-helper.h>
#include <linux/dma-fence.h>
#if defined(NV_LINUX_DMA_RESV_H_PRESENT)
typedef struct dma_resv nv_dma_resv_t;
@@ -108,7 +106,7 @@ static inline int nv_dma_resv_reserve_fences(nv_dma_resv_t *obj,
}
static inline void nv_dma_resv_add_excl_fence(nv_dma_resv_t *obj,
nv_dma_fence_t *fence)
struct dma_fence *fence)
{
#if defined(NV_LINUX_DMA_RESV_H_PRESENT)
#if defined(NV_DMA_RESV_ADD_FENCE_PRESENT)
@@ -122,7 +120,7 @@ static inline void nv_dma_resv_add_excl_fence(nv_dma_resv_t *obj,
}
static inline void nv_dma_resv_add_shared_fence(nv_dma_resv_t *obj,
nv_dma_fence_t *fence)
struct dma_fence *fence)
{
#if defined(NV_LINUX_DMA_RESV_H_PRESENT)
#if defined(NV_DMA_RESV_ADD_FENCE_PRESENT)
@@ -135,6 +133,4 @@ static inline void nv_dma_resv_add_shared_fence(nv_dma_resv_t *obj,
#endif
}
#endif /* defined(NV_DRM_FENCE_AVAILABLE) */
#endif /* __NVIDIA_DMA_RESV_HELPER_H__ */

View File

@@ -55,11 +55,10 @@
#endif
#if defined(NV_DRM_DRIVER_HAS_GEM_PRIME_RES_OBJ) || \
defined(NV_DRM_GEM_OBJECT_HAS_RESV)
#define NV_DRM_FENCE_AVAILABLE
#else
#undef NV_DRM_FENCE_AVAILABLE
#include <linux/kconfig.h> // for IS_ENABLED()
#if IS_ENABLED(CONFIG_DRM) || defined(__FreeBSD__)
#define NV_DRM_AVAILABLE
#endif
#if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) && \
@@ -87,17 +86,6 @@
#endif
#endif
/*
* We can support color management if either drm_helper_crtc_enable_color_mgmt()
* or drm_crtc_enable_color_mgmt() exist.
*/
#if defined(NV_DRM_HELPER_CRTC_ENABLE_COLOR_MGMT_PRESENT) || \
defined(NV_DRM_CRTC_ENABLE_COLOR_MGMT_PRESENT)
#define NV_DRM_COLOR_MGMT_AVAILABLE
#else
#undef NV_DRM_COLOR_MGMT_AVAILABLE
#endif
/*
* Adapt to quirks in FreeBSD's Linux kernel compatibility layer.
*/

View File

@@ -20,9 +20,9 @@
* DEALINGS IN THE SOFTWARE.
*/
#include "nvidia-drm-conftest.h" /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#include "nvidia-drm-conftest.h" /* NV_DRM_AVAILABLE */
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#include "nvidia-drm-helper.h"
#include "nvidia-drm-priv.h"
@@ -228,9 +228,6 @@ nv_drm_connector_detect(struct drm_connector *connector, bool force)
}
static struct drm_connector_funcs nv_connector_funcs = {
#if defined NV_DRM_ATOMIC_HELPER_CONNECTOR_DPMS_PRESENT
.dpms = drm_atomic_helper_connector_dpms,
#endif
.destroy = nv_drm_connector_destroy,
.reset = drm_atomic_helper_connector_reset,
.force = __nv_drm_connector_force,
@@ -588,16 +585,11 @@ nv_drm_get_connector(struct drm_device *dev,
char dpAddress[NVKMS_DP_ADDRESS_STRING_LENGTH])
{
struct drm_connector *connector = NULL;
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
struct drm_connector_list_iter conn_iter;
nv_drm_connector_list_iter_begin(dev, &conn_iter);
#else
struct drm_mode_config *config = &dev->mode_config;
mutex_lock(&config->mutex);
#endif
drm_connector_list_iter_begin(dev, &conn_iter);
/* Lookup for existing connector with same physical index */
nv_drm_for_each_connector(connector, &conn_iter, dev) {
drm_for_each_connector_iter(connector, &conn_iter) {
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
if (nv_connector->physicalIndex == physicalIndex) {
@@ -612,11 +604,7 @@ nv_drm_get_connector(struct drm_device *dev,
connector = NULL;
done:
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_end(&conn_iter);
#else
mutex_unlock(&config->mutex);
#endif
drm_connector_list_iter_end(&conn_iter);
if (!connector) {
connector = nv_drm_connector_new(dev,

View File

@@ -25,15 +25,13 @@
#include "nvidia-drm-conftest.h"
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
#if defined(NV_DRM_DRM_CONNECTOR_H_PRESENT)
#include <drm/drm_connector.h>
#endif
#include "nvtypes.h"
#include "nvkms-api-types.h"
@@ -101,6 +99,6 @@ nv_drm_get_connector(struct drm_device *dev,
bool nv_drm_connector_revoke_permissions(struct drm_device *dev,
struct nv_drm_connector *nv_connector);
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#endif /* NV_DRM_AVAILABLE */
#endif /* __NVIDIA_DRM_CONNECTOR_H__ */

View File

@@ -20,9 +20,9 @@
* DEALINGS IN THE SOFTWARE.
*/
#include "nvidia-drm-conftest.h" /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#include "nvidia-drm-conftest.h" /* NV_DRM_AVAILABLE */
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#include "nvidia-drm-helper.h"
#include "nvidia-drm-priv.h"
@@ -42,10 +42,7 @@
#include <drm/drm_atomic.h>
#include <drm/drm_atomic_helper.h>
#if defined(NV_DRM_DRM_COLOR_MGMT_H_PRESENT)
#include <drm/drm_color_mgmt.h>
#endif
/*
* The two arrays below specify the PQ EOTF transfer function that's used to
@@ -150,15 +147,15 @@ nv_drm_atomic_replace_property_blob_from_id(struct drm_device *dev,
if ((expected_size > 0) &&
(new_blob->length != expected_size)) {
nv_drm_property_blob_put(new_blob);
drm_property_blob_put(new_blob);
return -EINVAL;
}
}
if (old_blob != new_blob) {
nv_drm_property_blob_put(old_blob);
drm_property_blob_put(old_blob);
if (new_blob) {
nv_drm_property_blob_get(new_blob);
drm_property_blob_get(new_blob);
}
*blob = new_blob;
*replaced = true;
@@ -166,7 +163,7 @@ nv_drm_atomic_replace_property_blob_from_id(struct drm_device *dev,
*replaced = false;
}
nv_drm_property_blob_put(new_blob);
drm_property_blob_put(new_blob);
return 0;
}
@@ -204,6 +201,15 @@ plane_req_config_disable(struct NvKmsKapiLayerRequestedConfig *req_config)
req_config->flags.srcWHChanged = NV_TRUE;
req_config->flags.dstXYChanged = NV_TRUE;
req_config->flags.dstWHChanged = NV_TRUE;
req_config->flags.cscChanged = NV_TRUE;
req_config->flags.inputTfChanged = NV_TRUE;
req_config->flags.outputTfChanged = NV_TRUE;
req_config->flags.inputColorSpaceChanged = NV_TRUE;
req_config->flags.inputColorRangeChanged = NV_TRUE;
req_config->flags.hdrMetadataChanged = NV_TRUE;
req_config->flags.matrixOverridesChanged = NV_TRUE;
req_config->flags.ilutChanged = NV_TRUE;
req_config->flags.tmoChanged = NV_TRUE;
}
static inline void
@@ -244,7 +250,6 @@ static NvU64 ctm_val_to_csc_val(NvU64 ctm_val)
return csc_val;
}
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
static void ctm_to_csc(struct NvKmsCscMatrix *nvkms_csc,
struct drm_color_ctm *drm_ctm)
{
@@ -261,7 +266,6 @@ static void ctm_to_csc(struct NvKmsCscMatrix *nvkms_csc,
}
}
}
#endif /* NV_DRM_COLOR_MGMT_AVAILABLE */
static void ctm_3x4_to_csc(struct NvKmsCscMatrix *nvkms_csc,
struct drm_color_ctm_3x4 *drm_ctm_3x4)
@@ -394,6 +398,14 @@ static int init_drm_nvkms_surface(struct nv_drm_device *nv_dev,
struct NvKmsKapiDevice *pDevice = nv_dev->pDevice;
NvU8 compressible = 0; // No compression
struct NvKmsKapiAllocateMemoryParams allocParams = {
.layout = NvKmsSurfaceMemoryLayoutPitch,
.type = NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
.size = surface_params->surface_size,
.useVideoMemory = nv_dev->hasVideoMemory,
.compressible = &compressible,
};
struct NvKmsKapiCreateSurfaceParams params = {};
struct NvKmsKapiMemory *surface_mem;
struct NvKmsKapiSurface *surface;
@@ -404,21 +416,7 @@ static int init_drm_nvkms_surface(struct nv_drm_device *nv_dev,
params.height = surface_params->height;
/* Allocate displayable memory. */
if (nv_dev->hasVideoMemory) {
surface_mem =
nvKms->allocateVideoMemory(pDevice,
NvKmsSurfaceMemoryLayoutPitch,
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
surface_params->surface_size,
&compressible);
} else {
surface_mem =
nvKms->allocateSystemMemory(pDevice,
NvKmsSurfaceMemoryLayoutPitch,
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
surface_params->surface_size,
&compressible);
}
surface_mem = nvKms->allocateMemory(nv_dev->pDevice, &allocParams);
if (surface_mem == NULL) {
return -ENOMEM;
}
@@ -1187,7 +1185,6 @@ plane_req_config_update(struct drm_plane *plane,
req_config->config.csc = old_config.csc;
#if defined(NV_DRM_ROTATION_AVAILABLE)
/*
* plane_state->rotation is only valid when plane->rotation_property
* is non-NULL.
@@ -1225,7 +1222,6 @@ plane_req_config_update(struct drm_plane *plane,
break;
}
}
#endif
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE)
if (plane->blend_mode_property != NULL && plane->alpha_property != NULL) {
@@ -1630,7 +1626,6 @@ static int nv_drm_plane_atomic_check(struct drm_plane *plane,
return ret;
}
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
if (crtc_state->color_mgmt_changed) {
/*
* According to the comment in the Linux kernel's
@@ -1646,7 +1641,6 @@ static int nv_drm_plane_atomic_check(struct drm_plane *plane,
plane_requested_config->config.cscUseMain = NV_FALSE;
plane_requested_config->flags.cscChanged = NV_TRUE;
}
#endif /* NV_DRM_COLOR_MGMT_AVAILABLE */
if (__is_async_flip_requested(plane, crtc_state)) {
/*
@@ -1666,7 +1660,6 @@ static int nv_drm_plane_atomic_check(struct drm_plane *plane,
return 0;
}
#if defined(NV_DRM_UNIVERSAL_PLANE_INIT_HAS_FORMAT_MODIFIERS_ARG)
static bool nv_drm_plane_format_mod_supported(struct drm_plane *plane,
uint32_t format,
uint64_t modifier)
@@ -1674,7 +1667,6 @@ static bool nv_drm_plane_format_mod_supported(struct drm_plane *plane,
/* All supported modifiers are compatible with all supported formats */
return true;
}
#endif
static int nv_drm_atomic_crtc_get_property(
struct drm_crtc *crtc,
@@ -1961,34 +1953,34 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
nv_plane_state->hdr_output_metadata = nv_old_plane_state->hdr_output_metadata;
if (nv_plane_state->hdr_output_metadata) {
nv_drm_property_blob_get(nv_plane_state->hdr_output_metadata);
drm_property_blob_get(nv_plane_state->hdr_output_metadata);
}
#endif
nv_plane_state->lms_ctm = nv_old_plane_state->lms_ctm;
if (nv_plane_state->lms_ctm) {
nv_drm_property_blob_get(nv_plane_state->lms_ctm);
drm_property_blob_get(nv_plane_state->lms_ctm);
}
nv_plane_state->lms_to_itp_ctm = nv_old_plane_state->lms_to_itp_ctm;
if (nv_plane_state->lms_to_itp_ctm) {
nv_drm_property_blob_get(nv_plane_state->lms_to_itp_ctm);
drm_property_blob_get(nv_plane_state->lms_to_itp_ctm);
}
nv_plane_state->itp_to_lms_ctm = nv_old_plane_state->itp_to_lms_ctm;
if (nv_plane_state->itp_to_lms_ctm) {
nv_drm_property_blob_get(nv_plane_state->itp_to_lms_ctm);
drm_property_blob_get(nv_plane_state->itp_to_lms_ctm);
}
nv_plane_state->blend_ctm = nv_old_plane_state->blend_ctm;
if (nv_plane_state->blend_ctm) {
nv_drm_property_blob_get(nv_plane_state->blend_ctm);
drm_property_blob_get(nv_plane_state->blend_ctm);
}
nv_plane_state->degamma_tf = nv_old_plane_state->degamma_tf;
nv_plane_state->degamma_lut = nv_old_plane_state->degamma_lut;
if (nv_plane_state->degamma_lut) {
nv_drm_property_blob_get(nv_plane_state->degamma_lut);
drm_property_blob_get(nv_plane_state->degamma_lut);
}
nv_plane_state->degamma_multiplier = nv_old_plane_state->degamma_multiplier;
nv_plane_state->degamma_changed = false;
@@ -2000,7 +1992,7 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
nv_plane_state->tmo_lut = nv_old_plane_state->tmo_lut;
if (nv_plane_state->tmo_lut) {
nv_drm_property_blob_get(nv_plane_state->tmo_lut);
drm_property_blob_get(nv_plane_state->tmo_lut);
}
nv_plane_state->tmo_changed = false;
nv_plane_state->tmo_drm_lut_surface =
@@ -2013,32 +2005,27 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
}
static inline void __nv_drm_plane_atomic_destroy_state(
struct drm_plane *plane,
struct drm_plane_state *state)
{
struct nv_drm_plane_state *nv_drm_plane_state =
to_nv_drm_plane_state(state);
#if defined(NV_DRM_ATOMIC_HELPER_PLANE_DESTROY_STATE_HAS_PLANE_ARG)
__drm_atomic_helper_plane_destroy_state(plane, state);
#else
__drm_atomic_helper_plane_destroy_state(state);
#endif
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
nv_drm_property_blob_put(nv_drm_plane_state->hdr_output_metadata);
drm_property_blob_put(nv_drm_plane_state->hdr_output_metadata);
#endif
nv_drm_property_blob_put(nv_drm_plane_state->lms_ctm);
nv_drm_property_blob_put(nv_drm_plane_state->lms_to_itp_ctm);
nv_drm_property_blob_put(nv_drm_plane_state->itp_to_lms_ctm);
nv_drm_property_blob_put(nv_drm_plane_state->blend_ctm);
drm_property_blob_put(nv_drm_plane_state->lms_ctm);
drm_property_blob_put(nv_drm_plane_state->lms_to_itp_ctm);
drm_property_blob_put(nv_drm_plane_state->itp_to_lms_ctm);
drm_property_blob_put(nv_drm_plane_state->blend_ctm);
nv_drm_property_blob_put(nv_drm_plane_state->degamma_lut);
drm_property_blob_put(nv_drm_plane_state->degamma_lut);
if (nv_drm_plane_state->degamma_drm_lut_surface != NULL) {
kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->base.refcount,
free_drm_lut_surface);
}
nv_drm_property_blob_put(nv_drm_plane_state->tmo_lut);
drm_property_blob_put(nv_drm_plane_state->tmo_lut);
if (nv_drm_plane_state->tmo_drm_lut_surface != NULL) {
kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->base.refcount,
free_drm_lut_surface);
@@ -2049,7 +2036,7 @@ static void nv_drm_plane_atomic_destroy_state(
struct drm_plane *plane,
struct drm_plane_state *state)
{
__nv_drm_plane_atomic_destroy_state(plane, state);
__nv_drm_plane_atomic_destroy_state(state);
nv_drm_free(to_nv_drm_plane_state(state));
}
@@ -2063,9 +2050,7 @@ static const struct drm_plane_funcs nv_plane_funcs = {
.atomic_set_property = nv_drm_plane_atomic_set_property,
.atomic_duplicate_state = nv_drm_plane_atomic_duplicate_state,
.atomic_destroy_state = nv_drm_plane_atomic_destroy_state,
#if defined(NV_DRM_UNIVERSAL_PLANE_INIT_HAS_FORMAT_MODIFIERS_ARG)
.format_mod_supported = nv_drm_plane_format_mod_supported,
#endif
};
static const struct drm_plane_helper_funcs nv_plane_helper_funcs = {
@@ -2081,17 +2066,6 @@ static void nv_drm_crtc_destroy(struct drm_crtc *crtc)
nv_drm_free(nv_crtc);
}
static inline void
__nv_drm_atomic_helper_crtc_destroy_state(struct drm_crtc *crtc,
struct drm_crtc_state *crtc_state)
{
#if defined(NV_DRM_ATOMIC_HELPER_CRTC_DESTROY_STATE_HAS_CRTC_ARG)
__drm_atomic_helper_crtc_destroy_state(crtc, crtc_state);
#else
__drm_atomic_helper_crtc_destroy_state(crtc_state);
#endif
}
static inline bool nv_drm_crtc_duplicate_req_head_modeset_config(
const struct NvKmsKapiHeadRequestedConfig *old,
struct NvKmsKapiHeadRequestedConfig *new)
@@ -2234,7 +2208,7 @@ nv_drm_atomic_crtc_duplicate_state(struct drm_crtc *crtc)
nv_state->regamma_tf = nv_old_state->regamma_tf;
nv_state->regamma_lut = nv_old_state->regamma_lut;
if (nv_state->regamma_lut) {
nv_drm_property_blob_get(nv_state->regamma_lut);
drm_property_blob_get(nv_state->regamma_lut);
}
nv_state->regamma_divisor = nv_old_state->regamma_divisor;
if (nv_state->regamma_drm_lut_surface) {
@@ -2263,9 +2237,9 @@ static void nv_drm_atomic_crtc_destroy_state(struct drm_crtc *crtc,
nv_state->nv_flip = NULL;
}
__nv_drm_atomic_helper_crtc_destroy_state(crtc, &nv_state->base);
__drm_atomic_helper_crtc_destroy_state(&nv_state->base);
nv_drm_property_blob_put(nv_state->regamma_lut);
drm_property_blob_put(nv_state->regamma_lut);
if (nv_state->regamma_drm_lut_surface != NULL) {
kref_put(&nv_state->regamma_drm_lut_surface->base.refcount,
free_drm_lut_surface);
@@ -2291,21 +2265,6 @@ static struct drm_crtc_funcs nv_crtc_funcs = {
#endif
};
/*
* In kernel versions before the addition of
* drm_crtc_state::connectors_changed, connector changes were
* reflected in drm_crtc_state::mode_changed.
*/
static inline bool
nv_drm_crtc_state_connectors_changed(struct drm_crtc_state *crtc_state)
{
#if defined(NV_DRM_CRTC_STATE_HAS_CONNECTORS_CHANGED)
return crtc_state->connectors_changed;
#else
return crtc_state->mode_changed;
#endif
}
static int head_modeset_config_attach_connector(
struct nv_drm_connector *nv_connector,
struct NvKmsKapiHeadModeSetConfig *head_modeset_config)
@@ -2322,7 +2281,6 @@ static int head_modeset_config_attach_connector(
return 0;
}
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
static int color_mgmt_config_copy_lut(struct NvKmsLutRamps *nvkms_lut,
struct drm_color_lut *drm_lut,
uint64_t lut_len)
@@ -2434,7 +2392,6 @@ static int color_mgmt_config_set_luts(struct nv_drm_crtc_state *nv_crtc_state,
return 0;
}
#endif /* NV_DRM_COLOR_MGMT_AVAILABLE */
/**
* nv_drm_crtc_atomic_check() can fail after it has modified
@@ -2466,7 +2423,7 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
req_config->flags.modeChanged = NV_TRUE;
}
if (nv_drm_crtc_state_connectors_changed(crtc_state)) {
if (crtc_state->connectors_changed) {
struct NvKmsKapiHeadModeSetConfig *config = &req_config->modeSetConfig;
struct drm_connector *connector;
struct drm_connector_state *connector_state;
@@ -2501,13 +2458,11 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
req_config->modeSetConfig.vrrEnabled = crtc_state->vrr_enabled;
#endif
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
if (crtc_state->color_mgmt_changed) {
if ((ret = color_mgmt_config_set_luts(nv_crtc_state, req_config)) != 0) {
return ret;
}
}
#endif
if (nv_crtc_state->regamma_changed) {
if (nv_crtc_state->regamma_drm_lut_surface != NULL) {
@@ -2754,7 +2709,6 @@ static void
__nv_drm_plane_create_rotation_property(struct drm_plane *plane,
NvU16 validLayerRRTransforms)
{
#if defined(NV_DRM_ROTATION_AVAILABLE)
enum NvKmsRotation curRotation;
NvU32 supported_rotations = 0;
struct NvKmsRRParams rrParams = {
@@ -2803,7 +2757,6 @@ __nv_drm_plane_create_rotation_property(struct drm_plane *plane,
drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
supported_rotations);
}
#endif
}
static struct drm_plane*
@@ -2813,13 +2766,11 @@ nv_drm_plane_create(struct drm_device *dev,
NvU32 head,
const struct NvKmsKapiDeviceResourcesInfo *pResInfo)
{
#if defined(NV_DRM_UNIVERSAL_PLANE_INIT_HAS_FORMAT_MODIFIERS_ARG)
struct nv_drm_device *nv_dev = to_nv_device(dev);
const NvU64 linear_modifiers[] = {
DRM_FORMAT_MOD_LINEAR,
DRM_FORMAT_MOD_INVALID,
};
#endif
enum NvKmsCompositionBlendingMode defaultCompositionMode;
struct nv_drm_plane *nv_plane = NULL;
struct nv_drm_plane_state *nv_plane_state = NULL;
@@ -2884,16 +2835,10 @@ nv_drm_plane_create(struct drm_device *dev,
(1 << head) : 0,
&nv_plane_funcs,
formats, formats_count,
#if defined(NV_DRM_UNIVERSAL_PLANE_INIT_HAS_FORMAT_MODIFIERS_ARG)
(plane_type == DRM_PLANE_TYPE_CURSOR) ?
linear_modifiers : nv_dev->modifiers,
#endif
plane_type
#if defined(NV_DRM_UNIVERSAL_PLANE_INIT_HAS_NAME_ARG)
, NULL
#endif
);
plane_type,
NULL);
if (ret != 0) {
goto failed_plane_init;
}
@@ -2990,12 +2935,8 @@ static struct drm_crtc *__nv_drm_crtc_create(struct nv_drm_device *nv_dev,
ret = drm_crtc_init_with_planes(nv_dev->dev,
&nv_crtc->base,
primary_plane, cursor_plane,
&nv_crtc_funcs
#if defined(NV_DRM_CRTC_INIT_WITH_PLANES_HAS_NAME_ARG)
, NULL
#endif
);
&nv_crtc_funcs,
NULL);
if (ret != 0) {
NV_DRM_DEV_LOG_ERR(
nv_dev,
@@ -3011,21 +2952,14 @@ static struct drm_crtc *__nv_drm_crtc_create(struct nv_drm_device *nv_dev,
nv_drm_crtc_install_properties(&nv_crtc->base);
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
#if defined(NV_DRM_CRTC_ENABLE_COLOR_MGMT_PRESENT)
drm_crtc_enable_color_mgmt(&nv_crtc->base, NVKMS_LUT_ARRAY_SIZE, true,
NVKMS_LUT_ARRAY_SIZE);
#else
drm_helper_crtc_enable_color_mgmt(&nv_crtc->base, NVKMS_LUT_ARRAY_SIZE,
NVKMS_LUT_ARRAY_SIZE);
#endif
ret = drm_mode_crtc_set_gamma_size(&nv_crtc->base, NVKMS_LUT_ARRAY_SIZE);
if (ret != 0) {
NV_DRM_DEV_LOG_WARN(
nv_dev,
"Failed to initialize legacy gamma support for head %u", head);
}
#endif
return &nv_crtc->base;
@@ -3160,7 +3094,7 @@ int nv_drm_get_crtc_crc32_v2_ioctl(struct drm_device *dev,
return -EOPNOTSUPP;
}
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
crtc = drm_crtc_find(dev, filep, params->crtc_id);
if (!crtc) {
return -ENOENT;
}
@@ -3188,7 +3122,7 @@ int nv_drm_get_crtc_crc32_ioctl(struct drm_device *dev,
return -EOPNOTSUPP;
}
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
crtc = drm_crtc_find(dev, filep, params->crtc_id);
if (!crtc) {
return -ENOENT;
}

View File

@@ -25,7 +25,7 @@
#include "nvidia-drm-conftest.h"
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#include "nvidia-drm-helper.h"
@@ -349,6 +349,6 @@ int nv_drm_get_crtc_crc32_ioctl(struct drm_device *dev,
int nv_drm_get_crtc_crc32_v2_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep);
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#endif /* NV_DRM_AVAILABLE */
#endif /* __NVIDIA_DRM_CRTC_H__ */

View File

@@ -20,7 +20,7 @@
* DEALINGS IN THE SOFTWARE.
*/
#include "nvidia-drm-conftest.h" /* NV_DRM_AVAILABLE and NV_DRM_DRM_GEM_H_PRESENT */
#include "nvidia-drm-conftest.h" /* NV_DRM_AVAILABLE */
#include "nvidia-drm-priv.h"
#include "nvidia-drm-drv.h"
@@ -50,21 +50,10 @@
#include <drm/drm_atomic_uapi.h>
#endif
#if defined(NV_DRM_DRM_VBLANK_H_PRESENT)
#include <drm/drm_vblank.h>
#endif
#if defined(NV_DRM_DRM_FILE_H_PRESENT)
#include <drm/drm_file.h>
#endif
#if defined(NV_DRM_DRM_PRIME_H_PRESENT)
#include <drm/drm_prime.h>
#endif
#if defined(NV_DRM_DRM_IOCTL_H_PRESENT)
#include <drm/drm_ioctl.h>
#endif
#if defined(NV_LINUX_APERTURE_H_PRESENT)
#include <linux/aperture.h>
@@ -103,18 +92,9 @@
#include <drm/drm_probe_helper.h>
#endif
#include <drm/drm_crtc_helper.h>
#if defined(NV_DRM_DRM_GEM_H_PRESENT)
#include <drm/drm_gem.h>
#endif
#if defined(NV_DRM_DRM_AUTH_H_PRESENT)
#include <drm/drm_auth.h>
#endif
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#include <drm/drm_atomic_helper.h>
#endif
static int nv_drm_revoke_modeset_permission(struct drm_device *dev,
struct drm_file *filep,
@@ -161,34 +141,25 @@ static char* nv_get_transfer_function_name(
}
};
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_OUTPUT_POLL_CHANGED_PRESENT)
static void nv_drm_output_poll_changed(struct drm_device *dev)
{
struct drm_connector *connector = NULL;
struct drm_mode_config *config = &dev->mode_config;
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
struct drm_connector_list_iter conn_iter;
nv_drm_connector_list_iter_begin(dev, &conn_iter);
#endif
drm_connector_list_iter_begin(dev, &conn_iter);
/*
* Here drm_mode_config::mutex has been acquired unconditionally:
*
* - In the non-NV_DRM_CONNECTOR_LIST_ITER_PRESENT case, the mutex must
* be held for the duration of walking over the connectors.
*
* - In the NV_DRM_CONNECTOR_LIST_ITER_PRESENT case, the mutex must be
* held for the duration of a fill_modes() call chain:
* Here drm_mode_config::mutex has been acquired unconditionally. The
* mutex must be held for the duration of a fill_modes() call chain:
* connector->funcs->fill_modes()
* |-> drm_helper_probe_single_connector_modes()
*
* It is easiest to always acquire the mutext for the entire connector
* It is easiest to always acquire the mutex for the entire connector
* loop.
*/
mutex_lock(&config->mutex);
nv_drm_for_each_connector(connector, &conn_iter, dev) {
drm_for_each_connector_iter(connector, &conn_iter) {
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
@@ -203,39 +174,10 @@ static void nv_drm_output_poll_changed(struct drm_device *dev)
}
mutex_unlock(&config->mutex);
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_end(&conn_iter);
#endif
drm_connector_list_iter_end(&conn_iter);
}
#endif /* NV_DRM_OUTPUT_POLL_CHANGED_PRESENT */
static struct drm_framebuffer *nv_drm_framebuffer_create(
struct drm_device *dev,
struct drm_file *file,
#if defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG)
const struct drm_mode_fb_cmd2 *cmd
#else
struct drm_mode_fb_cmd2 *cmd
#endif
)
{
struct drm_mode_fb_cmd2 local_cmd;
struct drm_framebuffer *fb;
local_cmd = *cmd;
fb = nv_drm_internal_framebuffer_create(
dev,
file,
&local_cmd);
#if !defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG)
*cmd = local_cmd;
#endif
return fb;
}
static const struct drm_mode_config_funcs nv_mode_config_funcs = {
.fb_create = nv_drm_framebuffer_create,
@@ -443,8 +385,7 @@ nv_drm_init_mode_config(struct nv_drm_device *nv_dev,
dev->mode_config.async_page_flip = false;
#endif
#if defined(NV_DRM_FORMAT_MODIFIERS_PRESENT) && \
defined(NV_DRM_MODE_CONFIG_HAS_ALLOW_FB_MODIFIERS)
#if defined(NV_DRM_MODE_CONFIG_HAS_ALLOW_FB_MODIFIERS)
/* Allow clients to define framebuffer layouts using DRM format modifiers */
dev->mode_config.allow_fb_modifiers = true;
#endif
@@ -539,8 +480,6 @@ static void nv_drm_enumerate_encoders_and_connectors
}
}
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
/*!
* 'NV_DRM_OUT_FENCE_PTR' is an atomic per-plane property that clients can use
* to request an out-fence fd for a particular plane that's being flipped.
@@ -701,7 +640,6 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
return 0;
}
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
/*
* We can't just call drm_kms_helper_hotplug_event directly because
* fbdev_generic may attempt to set a mode from inside the hotplug event
@@ -720,29 +658,22 @@ static void nv_drm_handle_hotplug_event(struct work_struct *work)
drm_kms_helper_hotplug_event(nv_dev->dev);
}
#endif
static int nv_drm_load(struct drm_device *dev, unsigned long flags)
static int nv_drm_dev_load(struct drm_device *dev)
{
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
struct NvKmsKapiDevice *pDevice;
struct NvKmsKapiAllocateDeviceParams allocateDeviceParams;
struct NvKmsKapiDeviceResourcesInfo resInfo;
#endif /* defined(NV_DRM_ATOMIC_MODESET_AVAILABLE) */
#if defined(NV_DRM_FORMAT_MODIFIERS_PRESENT)
NvU64 kind;
NvU64 gen;
int i;
#endif
int ret;
struct nv_drm_device *nv_dev = to_nv_device(dev);
NV_DRM_DEV_LOG_INFO(nv_dev, "Loading driver");
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
return 0;
}
@@ -826,7 +757,6 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
nv_dev->vtFbBaseAddress = resInfo.vtFbBaseAddress;
nv_dev->vtFbSize = resInfo.vtFbSize;
#if defined(NV_DRM_FORMAT_MODIFIERS_PRESENT)
gen = nv_dev->pageKindGeneration;
kind = nv_dev->genericPageKind;
@@ -843,7 +773,6 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
nv_dev->modifiers[i++] = DRM_FORMAT_MOD_LINEAR;
nv_dev->modifiers[i++] = DRM_FORMAT_MOD_INVALID;
#endif /* defined(NV_DRM_FORMAT_MODIFIERS_PRESENT) */
/* Initialize drm_device::mode_config */
@@ -897,23 +826,17 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
mutex_unlock(&nv_dev->lock);
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
return 0;
}
static void __nv_drm_unload(struct drm_device *dev)
static void nv_drm_dev_unload(struct drm_device *dev)
{
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
struct NvKmsKapiDevice *pDevice = NULL;
#endif
struct nv_drm_device *nv_dev = to_nv_device(dev);
NV_DRM_DEV_LOG_INFO(nv_dev, "Unloading driver");
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
return;
}
@@ -956,26 +879,8 @@ static void __nv_drm_unload(struct drm_device *dev)
mutex_unlock(&nv_dev->lock);
nvKms->freeDevice(pDevice);
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
}
#if defined(NV_DRM_DRIVER_UNLOAD_HAS_INT_RETURN_TYPE)
static int nv_drm_unload(struct drm_device *dev)
{
__nv_drm_unload(dev);
return 0;
}
#else
static void nv_drm_unload(struct drm_device *dev)
{
__nv_drm_unload(dev);
}
#endif
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
static int __nv_drm_master_set(struct drm_device *dev,
struct drm_file *file_priv, bool from_open)
{
@@ -1052,28 +957,15 @@ int nv_drm_reset_input_colorspace(struct drm_device *dev)
}
out:
#if defined(NV_DRM_ATOMIC_STATE_REF_COUNTING_PRESENT)
drm_atomic_state_put(state);
#else
// In case of success, drm_atomic_commit() takes care to cleanup and free state.
if (ret != 0) {
drm_atomic_state_free(state);
}
#endif
drm_modeset_drop_locks(&ctx);
drm_modeset_acquire_fini(&ctx);
return ret;
}
#if defined(NV_DRM_MASTER_DROP_HAS_FROM_RELEASE_ARG)
static
void nv_drm_master_drop(struct drm_device *dev,
struct drm_file *file_priv, bool from_release)
#else
static
void nv_drm_master_drop(struct drm_device *dev, struct drm_file *file_priv)
#endif
{
struct nv_drm_device *nv_dev = to_nv_device(dev);
@@ -1116,29 +1008,6 @@ void nv_drm_master_drop(struct drm_device *dev, struct drm_file *file_priv)
}
}
}
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#if defined(NV_DRM_BUS_PRESENT) || defined(NV_DRM_DRIVER_HAS_SET_BUSID)
static int nv_drm_pci_set_busid(struct drm_device *dev,
struct drm_master *master)
{
struct nv_drm_device *nv_dev = to_nv_device(dev);
master->unique = nv_drm_asprintf("pci:%04x:%02x:%02x.%d",
nv_dev->gpu_info.pci_info.domain,
nv_dev->gpu_info.pci_info.bus,
nv_dev->gpu_info.pci_info.slot,
nv_dev->gpu_info.pci_info.function);
if (master->unique == NULL) {
return -ENOMEM;
}
master->unique_len = strlen(master->unique);
return 0;
}
#endif
static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep)
@@ -1160,7 +1029,6 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
params->supports_sync_fd = false;
params->supports_semsurf = false;
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
/* Memory allocation and semaphore surfaces are only supported
* if the modeset = 1 parameter is set */
if (nv_dev->pDevice != NULL) {
@@ -1171,12 +1039,9 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
if (nv_dev->semsurf_stride != 0) {
params->supports_semsurf = true;
#if defined(NV_SYNC_FILE_GET_FENCE_PRESENT)
params->supports_sync_fd = true;
#endif /* defined(NV_SYNC_FILE_GET_FENCE_PRESENT) */
}
}
#endif /* defined(NV_DRM_ATOMIC_MODESET_AVAILABLE) */
return 0;
}
@@ -1228,7 +1093,6 @@ int nv_drm_get_client_capability_ioctl(struct drm_device *dev,
return 0;
}
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
static bool nv_drm_connector_is_dpy_id(struct drm_connector *connector,
NvU32 dpyId)
{
@@ -1250,10 +1114,10 @@ static int nv_drm_get_dpy_id_for_connector_id_ioctl(struct drm_device *dev,
return -EOPNOTSUPP;
}
// Importantly, drm_connector_lookup (with filep) will only return the
// Importantly, drm_connector_lookup will only return the
// connector if we are master, a lessee with the connector, or not master at
// all. It will return NULL if we are a lessee with other connectors.
connector = nv_drm_connector_lookup(dev, filep, params->connectorId);
connector = drm_connector_lookup(dev, filep, params->connectorId);
if (!connector) {
return -EINVAL;
@@ -1273,7 +1137,7 @@ static int nv_drm_get_dpy_id_for_connector_id_ioctl(struct drm_device *dev,
params->dpyId = nv_connector->nv_detected_encoder->hDisplay;
done:
nv_drm_connector_put(connector);
drm_connector_put(connector);
return ret;
}
@@ -1284,27 +1148,21 @@ static int nv_drm_get_connector_id_for_dpy_id_ioctl(struct drm_device *dev,
struct drm_nvidia_get_connector_id_for_dpy_id_params *params = data;
struct drm_connector *connector;
int ret = -EINVAL;
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
struct drm_connector_list_iter conn_iter;
#endif
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
return -EOPNOTSUPP;
}
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_begin(dev, &conn_iter);
#endif
/* Lookup for existing connector with same dpyId */
nv_drm_for_each_connector(connector, &conn_iter, dev) {
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter) {
if (nv_drm_connector_is_dpy_id(connector, params->dpyId)) {
params->connectorId = connector->base.id;
ret = 0;
break;
}
}
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_end(&conn_iter);
#endif
drm_connector_list_iter_end(&conn_iter);
return ret;
}
@@ -1337,9 +1195,7 @@ static int nv_drm_grant_modeset_permission(struct drm_device *dev,
struct drm_crtc *crtc;
NvU32 head = 0, freeHeadBits, targetHeadBit, possible_crtcs;
int ret = 0;
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
struct drm_connector_list_iter conn_iter;
#endif
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
struct drm_modeset_acquire_ctx ctx;
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
@@ -1349,19 +1205,15 @@ static int nv_drm_grant_modeset_permission(struct drm_device *dev,
#endif
/* Get the connector for the dpyId. */
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_begin(dev, &conn_iter);
#endif
nv_drm_for_each_connector(connector, &conn_iter, dev) {
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter) {
if (nv_drm_connector_is_dpy_id(connector, params->dpyId)) {
target_connector =
nv_drm_connector_lookup(dev, filep, connector->base.id);
drm_connector_lookup(dev, filep, connector->base.id);
break;
}
}
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_end(&conn_iter);
#endif
drm_connector_list_iter_end(&conn_iter);
// Importantly, drm_connector_lookup/drm_crtc_find (with filep) will only
// return the object if we are master, a lessee with the object, or not
@@ -1384,7 +1236,7 @@ static int nv_drm_grant_modeset_permission(struct drm_device *dev,
freeHeadBits = 0;
nv_drm_for_each_crtc(crtc, dev) {
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
if (nv_drm_crtc_find(dev, filep, crtc->base.id) &&
if (drm_crtc_find(dev, filep, crtc->base.id) &&
!nv_crtc->modeset_permission_filep &&
(drm_crtc_mask(crtc) & possible_crtcs)) {
freeHeadBits |= NVBIT(nv_crtc->head);
@@ -1397,15 +1249,11 @@ static int nv_drm_grant_modeset_permission(struct drm_device *dev,
freeHeadBits = targetHeadBit;
} else {
/* Otherwise, remove heads that are in use by other connectors. */
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_begin(dev, &conn_iter);
#endif
nv_drm_for_each_connector(connector, &conn_iter, dev) {
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter) {
freeHeadBits &= ~nv_drm_get_head_bit_from_connector(connector);
}
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_end(&conn_iter);
#endif
drm_connector_list_iter_end(&conn_iter);
}
/* Fail if no heads are available. */
@@ -1439,7 +1287,7 @@ static int nv_drm_grant_modeset_permission(struct drm_device *dev,
done:
if (target_connector) {
nv_drm_connector_put(target_connector);
drm_connector_put(target_connector);
}
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
@@ -1554,9 +1402,7 @@ static int nv_drm_revoke_modeset_permission(struct drm_device *dev,
struct drm_connector *connector;
struct drm_crtc *crtc;
int ret = 0;
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
struct drm_connector_list_iter conn_iter;
#endif
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
struct drm_modeset_acquire_ctx ctx;
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
@@ -1578,10 +1424,8 @@ static int nv_drm_revoke_modeset_permission(struct drm_device *dev,
* If dpyId is set, only revoke those specific resources. Otherwise,
* it is from closing the file so revoke all resources for that filep.
*/
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_begin(dev, &conn_iter);
#endif
nv_drm_for_each_connector(connector, &conn_iter, dev) {
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter) {
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
if (nv_connector->modeset_permission_filep == filep &&
(!dpyId || nv_drm_connector_is_dpy_id(connector, dpyId))) {
@@ -1594,9 +1438,7 @@ static int nv_drm_revoke_modeset_permission(struct drm_device *dev,
nv_drm_connector_revoke_permissions(dev, nv_connector);
}
}
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_end(&conn_iter);
#endif
drm_connector_list_iter_end(&conn_iter);
nv_drm_for_each_crtc(crtc, dev) {
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
@@ -1607,22 +1449,7 @@ static int nv_drm_revoke_modeset_permission(struct drm_device *dev,
ret = drm_atomic_commit(state);
done:
#if defined(NV_DRM_ATOMIC_STATE_REF_COUNTING_PRESENT)
drm_atomic_state_put(state);
#else
if (ret != 0) {
drm_atomic_state_free(state);
} else {
/*
* In case of success, drm_atomic_commit() takes care to cleanup and
* free @state.
*
* Comment placed above drm_atomic_commit() says: The caller must not
* free or in any other way access @state. If the function fails then
* the caller must clean up @state itself.
*/
}
#endif
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
@@ -1703,7 +1530,6 @@ static void nv_drm_postclose(struct drm_device *dev, struct drm_file *filep)
nv_drm_revoke_modeset_permission(dev, filep, 0);
}
}
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
static int nv_drm_open(struct drm_device *dev, struct drm_file *filep)
{
@@ -1716,7 +1542,6 @@ static int nv_drm_open(struct drm_device *dev, struct drm_file *filep)
return 0;
}
#if defined(NV_DRM_MASTER_HAS_LEASES)
static struct drm_master *nv_drm_find_lessee(struct drm_master *master,
int lessee_id)
{
@@ -1812,9 +1637,7 @@ static void nv_drm_finish_revoking_objects(struct drm_device *dev,
{
struct drm_connector *connector;
struct drm_crtc *crtc;
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
struct drm_connector_list_iter conn_iter;
#endif
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
int ret = 0;
struct drm_modeset_acquire_ctx ctx;
@@ -1824,19 +1647,15 @@ static void nv_drm_finish_revoking_objects(struct drm_device *dev,
mutex_lock(&dev->mode_config.mutex);
#endif
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_begin(dev, &conn_iter);
#endif
nv_drm_for_each_connector(connector, &conn_iter, dev) {
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter) {
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
if (nv_connector->modeset_permission_filep &&
nv_drm_is_in_objects(connector->base.id, objects, objects_count)) {
nv_drm_connector_revoke_permissions(dev, nv_connector);
}
}
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
nv_drm_connector_list_iter_end(&conn_iter);
#endif
drm_connector_list_iter_end(&conn_iter);
nv_drm_for_each_crtc(crtc, dev) {
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
@@ -1852,38 +1671,6 @@ static void nv_drm_finish_revoking_objects(struct drm_device *dev,
mutex_unlock(&dev->mode_config.mutex);
#endif
}
#endif /* NV_DRM_MASTER_HAS_LEASES */
#if defined(NV_DRM_BUS_PRESENT)
#if defined(NV_DRM_BUS_HAS_GET_IRQ)
static int nv_drm_bus_get_irq(struct drm_device *dev)
{
return 0;
}
#endif
#if defined(NV_DRM_BUS_HAS_GET_NAME)
static const char *nv_drm_bus_get_name(struct drm_device *dev)
{
return "nvidia-drm";
}
#endif
static struct drm_bus nv_drm_bus = {
#if defined(NV_DRM_BUS_HAS_BUS_TYPE)
.bus_type = DRIVER_BUS_PCI,
#endif
#if defined(NV_DRM_BUS_HAS_GET_IRQ)
.get_irq = nv_drm_bus_get_irq,
#endif
#if defined(NV_DRM_BUS_HAS_GET_NAME)
.get_name = nv_drm_bus_get_name,
#endif
.set_busid = nv_drm_pci_set_busid,
};
#endif /* NV_DRM_BUS_PRESENT */
/*
* Wrapper around drm_ioctl to hook in to upstream ioctl.
@@ -1894,7 +1681,6 @@ static long nv_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
long retcode;
#if defined(NV_DRM_MASTER_HAS_LEASES)
struct drm_file *file_priv = filp->private_data;
struct drm_device *dev = file_priv->minor->dev;
int *objects = NULL;
@@ -1905,11 +1691,9 @@ static long nv_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
nv_drm_get_revoked_objects(dev, file_priv, cmd, arg, &objects,
&objects_count);
}
#endif
retcode = drm_ioctl(filp, cmd, arg);
#if defined(NV_DRM_MASTER_HAS_LEASES)
if (cmd == DRM_IOCTL_MODE_REVOKE_LEASE && objects) {
if (retcode == 0) {
// If revoking was successful, finish revoking the objects.
@@ -1918,7 +1702,6 @@ static long nv_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
nv_drm_free(objects);
}
#endif
return retcode;
}
@@ -1933,9 +1716,7 @@ static const struct file_operations nv_drm_fops = {
.compat_ioctl = drm_compat_ioctl,
#endif
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
.mmap = nv_drm_mmap,
#endif
.poll = drm_poll,
.read = drm_read,
@@ -1948,12 +1729,9 @@ static const struct file_operations nv_drm_fops = {
};
static const struct drm_ioctl_desc nv_drm_ioctls[] = {
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_IMPORT_NVKMS_MEMORY,
nv_drm_gem_import_nvkms_memory_ioctl,
DRM_RENDER_ALLOW|DRM_UNLOCKED),
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_IMPORT_USERSPACE_MEMORY,
nv_drm_gem_import_userspace_memory_ioctl,
DRM_RENDER_ALLOW|DRM_UNLOCKED),
@@ -1967,7 +1745,6 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
nv_drm_get_drm_file_unique_id_ioctl,
DRM_RENDER_ALLOW|DRM_UNLOCKED),
#if defined(NV_DRM_FENCE_AVAILABLE)
DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_SUPPORTED,
nv_drm_fence_supported_ioctl,
DRM_RENDER_ALLOW|DRM_UNLOCKED),
@@ -1989,7 +1766,6 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(NVIDIA_SEMSURF_FENCE_ATTACH,
nv_drm_semsurf_fence_attach_ioctl,
DRM_RENDER_ALLOW|DRM_UNLOCKED),
#endif
/*
* DRM_UNLOCKED is implicit for all non-legacy DRM driver IOCTLs since Linux
@@ -2006,7 +1782,6 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
nv_drm_get_client_capability_ioctl,
0),
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
DRM_IOCTL_DEF_DRV(NVIDIA_GET_CRTC_CRC32,
nv_drm_get_crtc_crc32_ioctl,
DRM_RENDER_ALLOW|DRM_UNLOCKED),
@@ -2040,7 +1815,6 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(NVIDIA_REVOKE_PERMISSIONS,
nv_drm_revoke_permission_ioctl,
DRM_UNLOCKED|DRM_MASTER),
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
};
static struct drm_driver nv_drm_driver = {
@@ -2114,23 +1888,11 @@ static struct drm_driver nv_drm_driver = {
.gem_prime_res_obj = nv_drm_gem_prime_res_obj,
#endif
#if defined(NV_DRM_DRIVER_HAS_SET_BUSID)
.set_busid = nv_drm_pci_set_busid,
#endif
.load = nv_drm_load,
.unload = nv_drm_unload,
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
.postclose = nv_drm_postclose,
#endif
.open = nv_drm_open,
.fops = &nv_drm_fops,
#if defined(NV_DRM_BUS_PRESENT)
.bus = &nv_drm_bus,
#endif
.name = "nvidia-drm",
.desc = "NVIDIA DRM driver",
@@ -2139,9 +1901,7 @@ static struct drm_driver nv_drm_driver = {
.date = "20160202",
#endif
#if defined(NV_DRM_DRIVER_HAS_DEVICE_LIST)
.device_list = LIST_HEAD_INIT(nv_drm_driver.device_list),
#elif defined(NV_DRM_DRIVER_HAS_LEGACY_DEV_LIST)
#if defined(NV_DRM_DRIVER_HAS_LEGACY_DEV_LIST)
.legacy_dev_list = LIST_HEAD_INIT(nv_drm_driver.legacy_dev_list),
#endif
// XXX implement nvidia-drm's own .fbdev_probe callback that uses NVKMS kapi directly
@@ -2160,8 +1920,6 @@ static struct drm_driver nv_drm_driver = {
*/
void nv_drm_update_drm_driver_features(void)
{
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
if (!nv_drm_modeset_module_param) {
return;
}
@@ -2176,7 +1934,6 @@ void nv_drm_update_drm_driver_features(void)
#if defined(NV_DRM_DRIVER_HAS_DUMB_DESTROY)
nv_drm_driver.dumb_destroy = nv_drm_dumb_destroy;
#endif /* NV_DRM_DRIVER_HAS_DUMB_DESTROY */
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
}
@@ -2208,9 +1965,7 @@ void nv_drm_register_drm_device(const struct NvKmsKapiGpuInfo *gpu_info)
nv_dev->gpu_info = gpu_info->gpuInfo;
nv_dev->gpu_mig_device = gpu_info->migDevice;
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
mutex_init(&nv_dev->lock);
#endif
/* Allocate DRM device */
@@ -2237,6 +1992,12 @@ void nv_drm_register_drm_device(const struct NvKmsKapiGpuInfo *gpu_info)
}
#endif
/* Load DRM device before registering it */
if (nv_drm_dev_load(dev) != 0) {
NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to load device");
goto failed_drm_load;
}
/* Register DRM device to DRM sub-system */
if (drm_dev_register(dev, 0) != 0) {
@@ -2304,7 +2065,11 @@ void nv_drm_register_drm_device(const struct NvKmsKapiGpuInfo *gpu_info)
failed_drm_register:
nv_drm_dev_free(dev);
nv_drm_dev_unload(dev);
failed_drm_load:
drm_dev_put(dev);
failed_drm_alloc:
@@ -2317,45 +2082,18 @@ failed_drm_alloc:
#if defined(NV_LINUX)
int nv_drm_probe_devices(void)
{
struct NvKmsKapiGpuInfo *gpu_info = NULL;
NvU32 gpu_count = 0;
NvU32 i;
int ret = 0;
NvU32 gpu_count;
nv_drm_update_drm_driver_features();
/* Enumerate NVIDIA GPUs */
gpu_info = nv_drm_calloc(NV_MAX_GPUS, sizeof(*gpu_info));
if (gpu_info == NULL) {
ret = -ENOMEM;
NV_DRM_LOG_ERR("Failed to allocate gpu ids arrays");
goto done;
}
gpu_count = nvKms->enumerateGpus(gpu_info);
/* Register DRM device for each NVIDIA GPU available via NVKMS. */
gpu_count = nvKms->enumerateGpus(nv_drm_register_drm_device);
if (gpu_count == 0) {
NV_DRM_LOG_INFO("Not found NVIDIA GPUs");
goto done;
NV_DRM_LOG_INFO("No NVIDIA GPUs found");
}
WARN_ON(gpu_count > NV_MAX_GPUS);
/* Register DRM device for each NVIDIA GPU */
for (i = 0; i < gpu_count; i++) {
nv_drm_register_drm_device(&gpu_info[i]);
}
done:
nv_drm_free(gpu_info);
return ret;
return 0;
}
#endif
@@ -2369,7 +2107,8 @@ void nv_drm_remove_devices(void)
struct drm_device *dev = dev_list->dev;
drm_dev_unregister(dev);
nv_drm_dev_free(dev);
nv_drm_dev_unload(dev);
drm_dev_put(dev);
nv_drm_free(dev_list);
@@ -2420,7 +2159,6 @@ void nv_drm_suspend_resume(NvBool suspend)
}
}
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
nv_dev = dev_list;
/*
@@ -2446,7 +2184,6 @@ void nv_drm_suspend_resume(NvBool suspend)
drm_kms_helper_poll_enable(dev);
}
}
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
done:
mutex_unlock(&nv_drm_suspend_mutex);

View File

@@ -20,9 +20,9 @@
* DEALINGS IN THE SOFTWARE.
*/
#include "nvidia-drm-conftest.h" /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#include "nvidia-drm-conftest.h" /* NV_DRM_AVAILABLE */
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#include "nvidia-drm-priv.h"
#include "nvidia-drm-encoder.h"
@@ -139,12 +139,8 @@ nv_drm_encoder_new(struct drm_device *dev,
ret = drm_encoder_init(dev,
&nv_encoder->base, &nv_encoder_funcs,
nvkms_connector_signal_to_drm_encoder_signal(format)
#if defined(NV_DRM_ENCODER_INIT_HAS_NAME_ARG)
, NULL
#endif
);
nvkms_connector_signal_to_drm_encoder_signal(format),
NULL);
if (ret != 0) {
nv_drm_free(nv_encoder);
@@ -336,17 +332,6 @@ void nv_drm_handle_dynamic_display_connected(struct nv_drm_device *nv_dev,
return;
}
/*
* On some kernels, DRM has the notion of a "primary group" that
* tracks the global mode setting state for the device.
*
* On kernels where DRM has a primary group, we need to reinitialize
* after adding encoders and connectors.
*/
#if defined(NV_DRM_REINIT_PRIMARY_MODE_GROUP_PRESENT)
drm_reinit_primary_mode_group(dev);
#endif
schedule_delayed_work(&nv_dev->hotplug_event_work, 0);
}
#endif

View File

@@ -25,15 +25,11 @@
#include "nvidia-drm-conftest.h"
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#include "nvidia-drm-priv.h"
#if defined(NV_DRM_DRM_ENCODER_H_PRESENT)
#include <drm/drm_encoder.h>
#else
#include <drm/drmP.h>
#endif
#include "nvkms-kapi.h"
@@ -63,6 +59,6 @@ void nv_drm_handle_display_change(struct nv_drm_device *nv_dev,
void nv_drm_handle_dynamic_display_connected(struct nv_drm_device *nv_dev,
NvKmsKapiDisplay hDisplay);
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#endif /* NV_DRM_AVAILABLE */
#endif /* __NVIDIA_DRM_ENCODER_H__ */

View File

@@ -20,9 +20,9 @@
* DEALINGS IN THE SOFTWARE.
*/
#include "nvidia-drm-conftest.h" /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#include "nvidia-drm-conftest.h" /* NV_DRM_AVAILABLE */
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#include "nvidia-drm-priv.h"
#include "nvidia-drm-ioctl.h"
@@ -41,7 +41,7 @@ static void __nv_drm_framebuffer_free(struct nv_drm_framebuffer *nv_fb)
/* Unreference gem object */
for (i = 0; i < NVKMS_MAX_PLANES_PER_SURFACE; i++) {
struct drm_gem_object *gem = nv_fb_get_gem_obj(fb, i);
struct drm_gem_object *gem = fb->obj[i];
if (gem != NULL) {
struct nv_drm_gem_object *nv_gem = to_nv_gem_object(gem);
nv_drm_gem_object_unreference_unlocked(nv_gem);
@@ -73,7 +73,7 @@ nv_drm_framebuffer_create_handle(struct drm_framebuffer *fb,
struct drm_file *file, unsigned int *handle)
{
return nv_drm_gem_handle_create(file,
to_nv_gem_object(nv_fb_get_gem_obj(fb, 0)),
to_nv_gem_object(fb->obj[0]),
handle);
}
@@ -83,11 +83,10 @@ static struct drm_framebuffer_funcs nv_framebuffer_funcs = {
};
static struct nv_drm_framebuffer *nv_drm_framebuffer_alloc(
struct drm_device *dev,
struct nv_drm_device *nv_dev,
struct drm_file *file,
struct drm_mode_fb_cmd2 *cmd)
const struct drm_mode_fb_cmd2 *cmd)
{
struct nv_drm_device *nv_dev = to_nv_device(dev);
struct nv_drm_framebuffer *nv_fb;
struct nv_drm_gem_object *nv_gem;
const int num_planes = nv_drm_format_num_planes(cmd->pixel_format);
@@ -109,7 +108,7 @@ static struct nv_drm_framebuffer *nv_drm_framebuffer_alloc(
}
for (i = 0; i < num_planes; i++) {
nv_gem = nv_drm_gem_object_lookup(dev, file, cmd->handles[i]);
nv_gem = nv_drm_gem_object_lookup(file, cmd->handles[i]);
if (nv_gem == NULL) {
NV_DRM_DEV_DEBUG_DRIVER(
@@ -118,7 +117,7 @@ static struct nv_drm_framebuffer *nv_drm_framebuffer_alloc(
goto failed;
}
nv_fb_set_gem_obj(&nv_fb->base, i, &nv_gem->base);
nv_fb->base.obj[i] = &nv_gem->base;
}
return nv_fb;
@@ -154,27 +153,13 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
}
for (i = 0; i < NVKMS_MAX_PLANES_PER_SURFACE; i++) {
struct drm_gem_object *gem = nv_fb_get_gem_obj(fb, i);
struct drm_gem_object *gem = fb->obj[i];
if (gem != NULL) {
nv_gem = to_nv_gem_object(gem);
params.planes[i].memory = nv_gem->pMemory;
params.planes[i].offset = fb->offsets[i];
params.planes[i].pitch = fb->pitches[i];
/*
* XXX Use drm_framebuffer_funcs.dirty and
* drm_fb_helper_funcs.fb_dirty instead
*
* Currently using noDisplayCaching when registering surfaces with
* NVKMS that are using memory allocated through the DRM
* Dumb-Buffers API. This prevents Display Idle Frame Rate from
* kicking in and preventing CPU updates to the surface memory from
* not being reflected on the display. Ideally, DIFR would be
* dynamically disabled whenever a user of the memory blits to the
* frontbuffer. DRM provides the needed callbacks to achieve this.
*/
params.noDisplayCaching |= !!nv_gem->is_drm_dumb;
}
}
params.height = fb->height;
@@ -254,19 +239,17 @@ fail:
return -EINVAL;
}
struct drm_framebuffer *nv_drm_internal_framebuffer_create(
struct drm_framebuffer *nv_drm_framebuffer_create(
struct drm_device *dev,
struct drm_file *file,
struct drm_mode_fb_cmd2 *cmd)
const struct drm_mode_fb_cmd2 *cmd)
{
struct nv_drm_device *nv_dev = to_nv_device(dev);
struct nv_drm_framebuffer *nv_fb;
uint64_t modifier = 0;
int ret;
enum NvKmsSurfaceMemoryFormat format;
#if defined(NV_DRM_FORMAT_MODIFIERS_PRESENT)
int i;
#endif
bool have_modifier = false;
/* Check whether NvKms supports the given pixel format */
@@ -277,7 +260,6 @@ struct drm_framebuffer *nv_drm_internal_framebuffer_create(
return ERR_PTR(-EINVAL);
}
#if defined(NV_DRM_FORMAT_MODIFIERS_PRESENT)
if (cmd->flags & DRM_MODE_FB_MODIFIERS) {
have_modifier = true;
modifier = cmd->modifier[0];
@@ -296,9 +278,8 @@ struct drm_framebuffer *nv_drm_internal_framebuffer_create(
return ERR_PTR(-EINVAL);
}
}
#endif
nv_fb = nv_drm_framebuffer_alloc(dev, file, cmd);
nv_fb = nv_drm_framebuffer_alloc(nv_dev, file, cmd);
if (IS_ERR(nv_fb)) {
return (struct drm_framebuffer *)nv_fb;
}
@@ -306,9 +287,7 @@ struct drm_framebuffer *nv_drm_internal_framebuffer_create(
/* Fill out framebuffer metadata from the userspace fb creation request */
drm_helper_mode_fill_fb_struct(
#if defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_DEV_ARG)
dev,
#endif
&nv_fb->base,
cmd);

View File

@@ -25,27 +25,19 @@
#include "nvidia-drm-conftest.h"
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
#if defined(NV_DRM_DRM_FRAMEBUFFER_H_PRESENT)
#include <drm/drm_framebuffer.h>
#endif
#include "nvidia-drm-gem-nvkms-memory.h"
#include "nvkms-kapi.h"
struct nv_drm_framebuffer {
struct NvKmsKapiSurface *pSurface;
#if !defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
struct drm_gem_object*
obj[NVKMS_MAX_PLANES_PER_SURFACE];
#endif
struct drm_framebuffer base;
};
@@ -58,34 +50,11 @@ static inline struct nv_drm_framebuffer *to_nv_framebuffer(
return container_of(fb, struct nv_drm_framebuffer, base);
}
static inline struct drm_gem_object *nv_fb_get_gem_obj(
struct drm_framebuffer *fb,
uint32_t plane)
{
#if defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
return fb->obj[plane];
#else
return to_nv_framebuffer(fb)->obj[plane];
#endif
}
static inline void nv_fb_set_gem_obj(
struct drm_framebuffer *fb,
uint32_t plane,
struct drm_gem_object *obj)
{
#if defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
fb->obj[plane] = obj;
#else
to_nv_framebuffer(fb)->obj[plane] = obj;
#endif
}
struct drm_framebuffer *nv_drm_internal_framebuffer_create(
struct drm_framebuffer *nv_drm_framebuffer_create(
struct drm_device *dev,
struct drm_file *file,
struct drm_mode_fb_cmd2 *cmd);
const struct drm_mode_fb_cmd2 *cmd);
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#endif /* NV_DRM_AVAILABLE */
#endif /* __NVIDIA_DRM_FB_H__ */

View File

@@ -34,9 +34,7 @@
#include "nvidia-drm-fence.h"
#include "nvidia-dma-resv-helper.h"
#if defined(NV_DRM_FENCE_AVAILABLE)
#include "nvidia-dma-fence-helper.h"
#include <linux/dma-fence.h>
#define NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS 5000
@@ -83,42 +81,42 @@ struct nv_drm_prime_fence_context {
struct nv_drm_prime_fence {
struct list_head list_entry;
nv_dma_fence_t base;
struct dma_fence base;
spinlock_t lock;
};
static inline
struct nv_drm_prime_fence *to_nv_drm_prime_fence(nv_dma_fence_t *fence)
struct nv_drm_prime_fence *to_nv_drm_prime_fence(struct dma_fence *fence)
{
return container_of(fence, struct nv_drm_prime_fence, base);
}
static const char*
nv_drm_gem_fence_op_get_driver_name(nv_dma_fence_t *fence)
nv_drm_gem_fence_op_get_driver_name(struct dma_fence *fence)
{
return "NVIDIA";
}
static const char*
nv_drm_gem_prime_fence_op_get_timeline_name(nv_dma_fence_t *fence)
nv_drm_gem_prime_fence_op_get_timeline_name(struct dma_fence *fence)
{
return "nvidia.prime";
}
static bool nv_drm_gem_prime_fence_op_enable_signaling(nv_dma_fence_t *fence)
static bool nv_drm_gem_prime_fence_op_enable_signaling(struct dma_fence *fence)
{
// DO NOTHING
return true;
}
static void nv_drm_gem_prime_fence_op_release(nv_dma_fence_t *fence)
static void nv_drm_gem_prime_fence_op_release(struct dma_fence *fence)
{
struct nv_drm_prime_fence *nv_fence = to_nv_drm_prime_fence(fence);
nv_drm_free(nv_fence);
}
static signed long
nv_drm_gem_prime_fence_op_wait(nv_dma_fence_t *fence,
nv_drm_gem_prime_fence_op_wait(struct dma_fence *fence,
bool intr, signed long timeout)
{
/*
@@ -131,12 +129,12 @@ nv_drm_gem_prime_fence_op_wait(nv_dma_fence_t *fence,
* that it should never get hit during normal operation, but not so long
* that the system becomes unresponsive.
*/
return nv_dma_fence_default_wait(fence, intr,
return dma_fence_default_wait(fence, intr,
(timeout == MAX_SCHEDULE_TIMEOUT) ?
msecs_to_jiffies(96) : timeout);
}
static const nv_dma_fence_ops_t nv_drm_gem_prime_fence_ops = {
static const struct dma_fence_ops nv_drm_gem_prime_fence_ops = {
.get_driver_name = nv_drm_gem_fence_op_get_driver_name,
.get_timeline_name = nv_drm_gem_prime_fence_op_get_timeline_name,
.enable_signaling = nv_drm_gem_prime_fence_op_enable_signaling,
@@ -148,8 +146,8 @@ static inline void
__nv_drm_prime_fence_signal(struct nv_drm_prime_fence *nv_fence)
{
list_del(&nv_fence->list_entry);
nv_dma_fence_signal(&nv_fence->base);
nv_dma_fence_put(&nv_fence->base);
dma_fence_signal(&nv_fence->base);
dma_fence_put(&nv_fence->base);
}
static void nv_drm_gem_prime_force_fence_signal(
@@ -289,13 +287,13 @@ __nv_drm_prime_fence_context_new(
}
/*
* nv_dma_fence_context_alloc() cannot fail, so we do not need
* dma_fence_context_alloc() cannot fail, so we do not need
* to check a return value.
*/
nv_prime_fence_context->base.ops = &nv_drm_prime_fence_context_ops;
nv_prime_fence_context->base.nv_dev = nv_dev;
nv_prime_fence_context->base.context = nv_dma_fence_context_alloc(1);
nv_prime_fence_context->base.context = dma_fence_context_alloc(1);
nv_prime_fence_context->base.fenceSemIndex = p->index;
nv_prime_fence_context->pSemSurface = pSemSurface;
nv_prime_fence_context->pLinearAddress = pLinearAddress;
@@ -343,7 +341,7 @@ failed:
return NULL;
}
static nv_dma_fence_t *__nv_drm_prime_fence_context_create_fence(
static struct dma_fence *__nv_drm_prime_fence_context_create_fence(
struct nv_drm_prime_fence_context *nv_prime_fence_context,
unsigned int seqno)
{
@@ -369,12 +367,12 @@ static nv_dma_fence_t *__nv_drm_prime_fence_context_create_fence(
spin_lock_init(&nv_fence->lock);
nv_dma_fence_init(&nv_fence->base, &nv_drm_gem_prime_fence_ops,
dma_fence_init(&nv_fence->base, &nv_drm_gem_prime_fence_ops,
&nv_fence->lock, nv_prime_fence_context->base.context,
seqno);
/* The context maintains a reference to any pending fences. */
nv_dma_fence_get(&nv_fence->base);
dma_fence_get(&nv_fence->base);
list_add_tail(&nv_fence->list_entry, &nv_prime_fence_context->pending);
@@ -424,12 +422,11 @@ const struct nv_drm_gem_object_funcs nv_fence_context_gem_ops = {
static inline
struct nv_drm_fence_context *
__nv_drm_fence_context_lookup(
struct drm_device *dev,
struct drm_file *filp,
u32 handle)
{
struct nv_drm_gem_object *nv_gem =
nv_drm_gem_object_lookup(dev, filp, handle);
nv_drm_gem_object_lookup(filp, handle);
if (nv_gem != NULL && nv_gem->ops != &nv_fence_context_gem_ops) {
nv_drm_gem_object_unreference_unlocked(nv_gem);
@@ -491,7 +488,7 @@ done:
}
static int __nv_drm_gem_attach_fence(struct nv_drm_gem_object *nv_gem,
nv_dma_fence_t *fence,
struct dma_fence *fence,
bool shared)
{
nv_dma_resv_t *resv = nv_drm_gem_res_obj(nv_gem);
@@ -524,7 +521,7 @@ int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
struct nv_drm_gem_object *nv_gem;
struct nv_drm_fence_context *nv_fence_context;
nv_dma_fence_t *fence;
struct dma_fence *fence;
if (nv_dev->pDevice == NULL) {
ret = -EOPNOTSUPP;
@@ -536,7 +533,7 @@ int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
goto done;
}
nv_gem = nv_drm_gem_object_lookup(nv_dev->dev, filep, p->handle);
nv_gem = nv_drm_gem_object_lookup(filep, p->handle);
if (!nv_gem) {
NV_DRM_DEV_LOG_ERR(
@@ -548,7 +545,6 @@ int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
}
if((nv_fence_context = __nv_drm_fence_context_lookup(
nv_dev->dev,
filep,
p->fence_context_handle)) == NULL) {
@@ -587,7 +583,7 @@ int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
ret = __nv_drm_gem_attach_fence(nv_gem, fence, true /* exclusive */);
nv_dma_fence_put(fence);
dma_fence_put(fence);
fence_context_create_fence_failed:
nv_drm_gem_object_unreference_unlocked(&nv_fence_context->base);
@@ -600,7 +596,7 @@ done:
}
struct nv_drm_semsurf_fence {
nv_dma_fence_t base;
struct dma_fence base;
spinlock_t lock;
/*
@@ -628,7 +624,7 @@ struct nv_drm_semsurf_fence_callback {
};
struct nv_drm_sync_fd_wait_data {
nv_dma_fence_cb_t dma_fence_cb;
struct dma_fence_cb dma_fence_cb;
struct nv_drm_semsurf_fence_ctx *ctx;
nv_drm_work work; /* Deferred second half of fence wait callback */
@@ -759,15 +755,15 @@ __nv_drm_semsurf_force_complete_pending(struct nv_drm_semsurf_fence_ctx *ctx)
&ctx->pending_fences,
typeof(*nv_fence),
pending_node);
nv_dma_fence_t *fence = &nv_fence->base;
struct dma_fence *fence = &nv_fence->base;
list_del(&nv_fence->pending_node);
nv_dma_fence_set_error(fence, -ETIMEDOUT);
nv_dma_fence_signal(fence);
dma_fence_set_error(fence, -ETIMEDOUT);
dma_fence_signal(fence);
/* Remove the pending list's reference */
nv_dma_fence_put(fence);
dma_fence_put(fence);
}
/*
@@ -824,7 +820,7 @@ __nv_drm_semsurf_ctx_process_completed(struct nv_drm_semsurf_fence_ctx *ctx,
struct list_head finished;
struct list_head timed_out;
struct nv_drm_semsurf_fence *nv_fence;
nv_dma_fence_t *fence;
struct dma_fence *fence;
NvU64 currentSeqno = __nv_drm_get_semsurf_ctx_seqno(ctx);
NvU64 fenceSeqno = 0;
unsigned long flags;
@@ -888,8 +884,8 @@ __nv_drm_semsurf_ctx_process_completed(struct nv_drm_semsurf_fence_ctx *ctx,
nv_fence = list_first_entry(&finished, typeof(*nv_fence), pending_node);
list_del_init(&nv_fence->pending_node);
fence = &nv_fence->base;
nv_dma_fence_signal(fence);
nv_dma_fence_put(fence); /* Drops the pending list's reference */
dma_fence_signal(fence);
dma_fence_put(fence); /* Drops the pending list's reference */
}
while (!list_empty(&timed_out)) {
@@ -897,9 +893,9 @@ __nv_drm_semsurf_ctx_process_completed(struct nv_drm_semsurf_fence_ctx *ctx,
pending_node);
list_del_init(&nv_fence->pending_node);
fence = &nv_fence->base;
nv_dma_fence_set_error(fence, -ETIMEDOUT);
nv_dma_fence_signal(fence);
nv_dma_fence_put(fence); /* Drops the pending list's reference */
dma_fence_set_error(fence, -ETIMEDOUT);
dma_fence_signal(fence);
dma_fence_put(fence); /* Drops the pending list's reference */
}
}
@@ -1265,13 +1261,13 @@ __nv_drm_semsurf_fence_ctx_new(
}
/*
* nv_dma_fence_context_alloc() cannot fail, so we do not need
* dma_fence_context_alloc() cannot fail, so we do not need
* to check a return value.
*/
ctx->base.ops = &nv_drm_semsurf_fence_ctx_ops;
ctx->base.nv_dev = nv_dev;
ctx->base.context = nv_dma_fence_context_alloc(1);
ctx->base.context = dma_fence_context_alloc(1);
ctx->base.fenceSemIndex = p->index;
ctx->pSemSurface = pSemSurface;
ctx->pSemMapping.pVoid = semMapping;
@@ -1343,26 +1339,26 @@ int nv_drm_semsurf_fence_ctx_create_ioctl(struct drm_device *dev,
}
static inline struct nv_drm_semsurf_fence*
to_nv_drm_semsurf_fence(nv_dma_fence_t *fence)
to_nv_drm_semsurf_fence(struct dma_fence *fence)
{
return container_of(fence, struct nv_drm_semsurf_fence, base);
}
static const char*
__nv_drm_semsurf_fence_op_get_timeline_name(nv_dma_fence_t *fence)
__nv_drm_semsurf_fence_op_get_timeline_name(struct dma_fence *fence)
{
return "nvidia.semaphore_surface";
}
static bool
__nv_drm_semsurf_fence_op_enable_signaling(nv_dma_fence_t *fence)
__nv_drm_semsurf_fence_op_enable_signaling(struct dma_fence *fence)
{
// DO NOTHING - Could defer RM callback registration until this point
return true;
}
static void
__nv_drm_semsurf_fence_op_release(nv_dma_fence_t *fence)
__nv_drm_semsurf_fence_op_release(struct dma_fence *fence)
{
struct nv_drm_semsurf_fence *nv_fence =
to_nv_drm_semsurf_fence(fence);
@@ -1370,12 +1366,12 @@ __nv_drm_semsurf_fence_op_release(nv_dma_fence_t *fence)
nv_drm_free(nv_fence);
}
static const nv_dma_fence_ops_t nv_drm_semsurf_fence_ops = {
static const struct dma_fence_ops nv_drm_semsurf_fence_ops = {
.get_driver_name = nv_drm_gem_fence_op_get_driver_name,
.get_timeline_name = __nv_drm_semsurf_fence_op_get_timeline_name,
.enable_signaling = __nv_drm_semsurf_fence_op_enable_signaling,
.release = __nv_drm_semsurf_fence_op_release,
.wait = nv_dma_fence_default_wait,
.wait = dma_fence_default_wait,
#if defined(NV_DMA_FENCE_OPS_HAS_USE_64BIT_SEQNO)
.use_64bit_seqno = true,
#endif
@@ -1401,7 +1397,7 @@ __nv_drm_semsurf_ctx_add_pending(struct nv_drm_semsurf_fence_ctx *ctx,
}
/* Add a reference to the fence for the list */
nv_dma_fence_get(&nv_fence->base);
dma_fence_get(&nv_fence->base);
INIT_LIST_HEAD(&nv_fence->pending_node);
nv_fence->timeout = nv_drm_timeout_from_ms(timeoutMS);
@@ -1434,14 +1430,14 @@ __nv_drm_semsurf_ctx_add_pending(struct nv_drm_semsurf_fence_ctx *ctx,
__nv_drm_semsurf_ctx_reg_callbacks(ctx);
}
static nv_dma_fence_t *__nv_drm_semsurf_fence_ctx_create_fence(
static struct dma_fence *__nv_drm_semsurf_fence_ctx_create_fence(
struct nv_drm_device *nv_dev,
struct nv_drm_semsurf_fence_ctx *ctx,
NvU64 wait_value,
NvU64 timeout_value_ms)
{
struct nv_drm_semsurf_fence *nv_fence;
nv_dma_fence_t *fence;
struct dma_fence *fence;
int ret = 0;
if (timeout_value_ms == 0 ||
@@ -1461,7 +1457,7 @@ static nv_dma_fence_t *__nv_drm_semsurf_fence_ctx_create_fence(
#endif
/* Initializes the fence with one reference (for the caller) */
nv_dma_fence_init(fence, &nv_drm_semsurf_fence_ops,
dma_fence_init(fence, &nv_drm_semsurf_fence_ops,
&nv_fence->lock,
ctx->base.context, wait_value);
@@ -1479,7 +1475,7 @@ int nv_drm_semsurf_fence_create_ioctl(struct drm_device *dev,
struct nv_drm_device *nv_dev = to_nv_device(dev);
struct drm_nvidia_semsurf_fence_create_params *p = data;
struct nv_drm_fence_context *nv_fence_context;
nv_dma_fence_t *fence;
struct dma_fence *fence;
int ret = -EINVAL;
int fd;
@@ -1494,7 +1490,6 @@ int nv_drm_semsurf_fence_create_ioctl(struct drm_device *dev,
}
if ((nv_fence_context = __nv_drm_fence_context_lookup(
nv_dev->dev,
filep,
p->fence_context_handle)) == NULL) {
NV_DRM_DEV_LOG_ERR(
@@ -1550,7 +1545,7 @@ fence_context_create_sync_failed:
* FD will still hold a reference, and the pending list (if the fence hasn't
* already been signaled) will also retain a reference.
*/
nv_dma_fence_put(fence);
dma_fence_put(fence);
fence_context_create_fence_failed:
nv_drm_gem_object_unreference_unlocked(&nv_fence_context->base);
@@ -1608,8 +1603,8 @@ __nv_drm_semsurf_wait_fence_work_cb
static void
__nv_drm_semsurf_wait_fence_cb
(
nv_dma_fence_t *fence,
nv_dma_fence_cb_t *cb
struct dma_fence *fence,
struct dma_fence_cb *cb
)
{
struct nv_drm_sync_fd_wait_data *wait_data =
@@ -1634,7 +1629,7 @@ __nv_drm_semsurf_wait_fence_cb
}
/* Don't need to reference the fence anymore, just the fence context. */
nv_dma_fence_put(fence);
dma_fence_put(fence);
}
int nv_drm_semsurf_fence_wait_ioctl(struct drm_device *dev,
@@ -1646,7 +1641,7 @@ int nv_drm_semsurf_fence_wait_ioctl(struct drm_device *dev,
struct nv_drm_fence_context *nv_fence_context;
struct nv_drm_semsurf_fence_ctx *ctx;
struct nv_drm_sync_fd_wait_data *wait_data = NULL;
nv_dma_fence_t *fence;
struct dma_fence *fence;
unsigned long flags;
int ret = -EINVAL;
@@ -1663,7 +1658,6 @@ int nv_drm_semsurf_fence_wait_ioctl(struct drm_device *dev,
}
if ((nv_fence_context = __nv_drm_fence_context_lookup(
nv_dev->dev,
filep,
p->fence_context_handle)) == NULL) {
NV_DRM_DEV_LOG_ERR(
@@ -1716,7 +1710,7 @@ int nv_drm_semsurf_fence_wait_ioctl(struct drm_device *dev,
list_add(&wait_data->pending_node, &ctx->pending_waits);
spin_unlock_irqrestore(&ctx->lock, flags);
ret = nv_dma_fence_add_callback(fence,
ret = dma_fence_add_callback(fence,
&wait_data->dma_fence_cb,
__nv_drm_semsurf_wait_fence_cb);
@@ -1730,7 +1724,7 @@ int nv_drm_semsurf_fence_wait_ioctl(struct drm_device *dev,
}
/* Execute second half of wait immediately, avoiding the worker thread */
nv_dma_fence_put(fence);
dma_fence_put(fence);
__nv_drm_semsurf_wait_fence_work_cb(wait_data);
}
@@ -1759,7 +1753,7 @@ int nv_drm_semsurf_fence_attach_ioctl(struct drm_device *dev,
struct drm_nvidia_semsurf_fence_attach_params *p = data;
struct nv_drm_gem_object *nv_gem = NULL;
struct nv_drm_fence_context *nv_fence_context = NULL;
nv_dma_fence_t *fence;
struct dma_fence *fence;
int ret = -EINVAL;
if (nv_dev->pDevice == NULL) {
@@ -1767,7 +1761,7 @@ int nv_drm_semsurf_fence_attach_ioctl(struct drm_device *dev,
goto done;
}
nv_gem = nv_drm_gem_object_lookup(nv_dev->dev, filep, p->handle);
nv_gem = nv_drm_gem_object_lookup(filep, p->handle);
if (!nv_gem) {
NV_DRM_DEV_LOG_ERR(
@@ -1779,7 +1773,6 @@ int nv_drm_semsurf_fence_attach_ioctl(struct drm_device *dev,
}
nv_fence_context = __nv_drm_fence_context_lookup(
nv_dev->dev,
filep,
p->fence_context_handle);
@@ -1819,7 +1812,7 @@ int nv_drm_semsurf_fence_attach_ioctl(struct drm_device *dev,
ret = __nv_drm_gem_attach_fence(nv_gem, fence, p->shared);
nv_dma_fence_put(fence);
dma_fence_put(fence);
done:
if (nv_fence_context) {
@@ -1833,6 +1826,4 @@ done:
return ret;
}
#endif /* NV_DRM_FENCE_AVAILABLE */
#endif /* NV_DRM_AVAILABLE */

View File

@@ -30,8 +30,6 @@
struct drm_file;
struct drm_device;
#if defined(NV_DRM_FENCE_AVAILABLE)
int nv_drm_fence_supported_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep);
@@ -57,8 +55,6 @@ int nv_drm_semsurf_fence_attach_ioctl(struct drm_device *dev,
void *data,
struct drm_file *filep);
#endif /* NV_DRM_FENCE_AVAILABLE */
#endif /* NV_DRM_AVAILABLE */
#endif /* __NVIDIA_DRM_PRIME_FENCE_H__ */

View File

@@ -20,9 +20,9 @@
* DEALINGS IN THE SOFTWARE.
*/
#include "nvidia-drm-conftest.h" /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#include "nvidia-drm-conftest.h" /* NV_DRM_AVAILABLE */
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>

View File

@@ -25,7 +25,7 @@
#include "nvidia-drm-conftest.h"
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#include <drm/drm_fourcc.h>
@@ -40,6 +40,6 @@ uint32_t *nv_drm_format_array_alloc(
bool nv_drm_format_is_yuv(u32 format);
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#endif /* NV_DRM_AVAILABLE */
#endif /* __NVIDIA_DRM_FORMAT_H__ */

View File

@@ -24,17 +24,13 @@
#if defined(NV_DRM_AVAILABLE)
#if defined(NV_DRM_DRM_PRIME_H_PRESENT)
#include <drm/drm_prime.h>
#endif
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
#include <drm/drm_drv.h>
#endif
#include "nvidia-drm-gem-dma-buf.h"
#include "nvidia-drm-ioctl.h"
@@ -47,12 +43,10 @@ void __nv_drm_gem_dma_buf_free(struct nv_drm_gem_object *nv_gem)
struct nv_drm_device *nv_dev = nv_gem->nv_dev;
struct nv_drm_gem_dma_buf *nv_dma_buf = to_nv_dma_buf(nv_gem);
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
if (nv_dma_buf->base.pMemory) {
/* Free NvKmsKapiMemory handle associated with this gem object */
nvKms->freeMemory(nv_dev->pDevice, nv_dma_buf->base.pMemory);
}
#endif
drm_prime_gem_destroy(&nv_gem->base, nv_dma_buf->sgt);
@@ -157,13 +151,11 @@ nv_drm_gem_prime_import_sg_table(struct drm_device *dev,
BUG_ON(dma_buf->size % PAGE_SIZE);
pMemory = NULL;
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
if (drm_core_check_feature(dev, DRIVER_MODESET)) {
pMemory = nvKms->getSystemMemoryHandleFromDmaBuf(nv_dev->pDevice,
(NvP64)(NvUPtr)dma_buf,
dma_buf->size - 1);
}
#endif
nv_drm_gem_object_init(nv_dev, &nv_dma_buf->base,
&__nv_gem_dma_buf_ops, dma_buf->size, pMemory);
@@ -194,7 +186,7 @@ int nv_drm_gem_export_dmabuf_memory_ioctl(struct drm_device *dev,
}
if ((nv_dma_buf = nv_drm_gem_object_dma_buf_lookup(
dev, filep, p->handle)) == NULL) {
filep, p->handle)) == NULL) {
ret = -EINVAL;
NV_DRM_DEV_LOG_ERR(
nv_dev,
@@ -203,7 +195,6 @@ int nv_drm_gem_export_dmabuf_memory_ioctl(struct drm_device *dev,
goto done;
}
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
if (drm_core_check_feature(dev, DRIVER_MODESET)) {
if (!nv_dma_buf->base.pMemory) {
/*
@@ -218,7 +209,6 @@ int nv_drm_gem_export_dmabuf_memory_ioctl(struct drm_device *dev,
nv_dma_buf->base.base.size - 1);
}
}
#endif
if (!nv_dma_buf->base.pMemory && !pTmpMemory) {
ret = -ENOMEM;

View File

@@ -48,12 +48,11 @@ static inline struct nv_drm_gem_dma_buf *to_nv_dma_buf(
static inline
struct nv_drm_gem_dma_buf *nv_drm_gem_object_dma_buf_lookup(
struct drm_device *dev,
struct drm_file *filp,
u32 handle)
{
struct nv_drm_gem_object *nv_gem =
nv_drm_gem_object_lookup(dev, filp, handle);
nv_drm_gem_object_lookup(filp, handle);
if (nv_gem != NULL && nv_gem->ops != &__nv_gem_dma_buf_ops) {
nv_drm_gem_object_unreference_unlocked(nv_gem);

View File

@@ -22,27 +22,20 @@
#include "nvidia-drm-conftest.h"
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#include "nvidia-drm-gem-nvkms-memory.h"
#include "nvidia-drm-helper.h"
#include "nvidia-drm-ioctl.h"
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
#include <drm/drm_drv.h>
#endif
#if defined(NV_DRM_DRM_PRIME_H_PRESENT)
#include <drm/drm_prime.h>
#endif
#include <linux/io.h>
#if defined(NV_BSD)
#include <vm/vm_pageout.h>
#endif
#include "nv-mm.h"
static void __nv_drm_gem_nvkms_memory_free(struct nv_drm_gem_object *nv_gem)
{
struct nv_drm_device *nv_dev = nv_gem->nv_dev;
@@ -94,10 +87,9 @@ static vm_fault_t __nv_drm_gem_nvkms_handle_vma_fault(
struct vm_area_struct *vma,
struct vm_fault *vmf)
{
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
to_nv_nvkms_memory(nv_gem);
unsigned long address = nv_page_fault_va(vmf);
unsigned long address = vmf->address;
struct drm_gem_object *gem = vma->vm_private_data;
unsigned long page_offset, pfn;
vm_fault_t ret;
@@ -146,8 +138,6 @@ static vm_fault_t __nv_drm_gem_nvkms_handle_vma_fault(
}
#endif /* defined(NV_VMF_INSERT_PFN_PRESENT) */
return ret;
#endif /* defined(NV_DRM_ATOMIC_MODESET_AVAILABLE) */
return VM_FAULT_SIGBUS;
}
static struct drm_gem_object *__nv_drm_gem_nvkms_prime_dup(
@@ -340,6 +330,7 @@ int nv_drm_dumb_create(
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory;
uint8_t compressible = 0;
struct NvKmsKapiMemory *pMemory;
struct NvKmsKapiAllocateMemoryParams allocParams = { };
int ret = 0;
args->pitch = roundup(args->width * ((args->bpp + 7) >> 3),
@@ -357,20 +348,14 @@ int nv_drm_dumb_create(
goto fail;
}
if (nv_dev->hasVideoMemory) {
pMemory = nvKms->allocateVideoMemory(nv_dev->pDevice,
NvKmsSurfaceMemoryLayoutPitch,
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
args->size,
&compressible);
} else {
pMemory = nvKms->allocateSystemMemory(nv_dev->pDevice,
NvKmsSurfaceMemoryLayoutPitch,
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
args->size,
&compressible);
}
allocParams.layout = NvKmsSurfaceMemoryLayoutPitch;
allocParams.type = NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT;
allocParams.size = args->size;
allocParams.noDisplayCaching = true;
allocParams.useVideoMemory = nv_dev->hasVideoMemory;
allocParams.compressible = &compressible;
pMemory = nvKms->allocateMemory(nv_dev->pDevice, &allocParams);
if (pMemory == NULL) {
ret = -ENOMEM;
NV_DRM_DEV_LOG_ERR(
@@ -385,8 +370,6 @@ int nv_drm_dumb_create(
goto nvkms_gem_obj_init_failed;
}
nv_nvkms_memory->base.is_drm_dumb = true;
/* Always map dumb buffer memory up front. Clients are only expected
* to use dumb buffers for software rendering, so they're not much use
* without a CPU mapping.
@@ -482,7 +465,6 @@ int nv_drm_gem_export_nvkms_memory_ioctl(struct drm_device *dev,
}
if ((nv_nvkms_memory = nv_drm_gem_object_nvkms_memory_lookup(
dev,
filep,
p->handle)) == NULL) {
ret = -EINVAL;
@@ -519,8 +501,7 @@ int nv_drm_gem_alloc_nvkms_memory_ioctl(struct drm_device *dev,
struct drm_nvidia_gem_alloc_nvkms_memory_params *p = data;
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory = NULL;
struct NvKmsKapiMemory *pMemory;
enum NvKmsSurfaceMemoryLayout layout;
enum NvKmsKapiAllocationType type;
struct NvKmsKapiAllocateMemoryParams allocParams = { };
int ret = 0;
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
@@ -540,25 +521,15 @@ int nv_drm_gem_alloc_nvkms_memory_ioctl(struct drm_device *dev,
goto failed;
}
layout = p->block_linear ?
allocParams.layout = p->block_linear ?
NvKmsSurfaceMemoryLayoutBlockLinear : NvKmsSurfaceMemoryLayoutPitch;
type = (p->flags & NV_GEM_ALLOC_NO_SCANOUT) ?
allocParams.type = (p->flags & NV_GEM_ALLOC_NO_SCANOUT) ?
NVKMS_KAPI_ALLOCATION_TYPE_OFFSCREEN : NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT;
allocParams.size = p->memory_size;
allocParams.useVideoMemory = nv_dev->hasVideoMemory;
allocParams.compressible = &p->compressible;
if (nv_dev->hasVideoMemory) {
pMemory = nvKms->allocateVideoMemory(nv_dev->pDevice,
layout,
type,
p->memory_size,
&p->compressible);
} else {
pMemory = nvKms->allocateSystemMemory(nv_dev->pDevice,
layout,
type,
p->memory_size,
&p->compressible);
}
pMemory = nvKms->allocateMemory(nv_dev->pDevice, &allocParams);
if (pMemory == NULL) {
ret = -EINVAL;
NV_DRM_DEV_LOG_ERR(nv_dev,
@@ -640,7 +611,6 @@ int nv_drm_dumb_map_offset(struct drm_file *file,
int ret = -EINVAL;
if ((nv_nvkms_memory = nv_drm_gem_object_nvkms_memory_lookup(
dev,
file,
handle)) == NULL) {
NV_DRM_DEV_LOG_ERR(

View File

@@ -25,7 +25,7 @@
#include "nvidia-drm-conftest.h"
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#include "nvidia-drm-gem.h"
@@ -72,12 +72,11 @@ static inline struct nv_drm_gem_nvkms_memory *to_nv_nvkms_memory_const(
static inline
struct nv_drm_gem_nvkms_memory *nv_drm_gem_object_nvkms_memory_lookup(
struct drm_device *dev,
struct drm_file *filp,
u32 handle)
{
struct nv_drm_gem_object *nv_gem =
nv_drm_gem_object_lookup(dev, filp, handle);
nv_drm_gem_object_lookup(filp, handle);
if (nv_gem != NULL && nv_gem->ops != &nv_gem_nvkms_memory_ops) {
nv_drm_gem_object_unreference_unlocked(nv_gem);

View File

@@ -24,9 +24,7 @@
#if defined(NV_DRM_AVAILABLE)
#if defined(NV_DRM_DRM_PRIME_H_PRESENT)
#include <drm/drm_prime.h>
#endif
#include "nvidia-drm-gem-user-memory.h"
#include "nvidia-drm-helper.h"
@@ -116,11 +114,7 @@ static vm_fault_t __nv_vm_insert_mixed_helper(
{
int ret;
#if defined(NV_PFN_TO_PFN_T_PRESENT)
ret = vm_insert_mixed(vma, address, pfn_to_pfn_t(pfn));
#else
ret = vm_insert_mixed(vma, address, pfn);
#endif
switch (ret) {
case 0:
@@ -145,7 +139,7 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
struct vm_fault *vmf)
{
struct nv_drm_gem_user_memory *nv_user_memory = to_nv_user_memory(nv_gem);
unsigned long address = nv_page_fault_va(vmf);
unsigned long address = vmf->address;
struct drm_gem_object *gem = vma->vm_private_data;
unsigned long page_offset;
unsigned long pfn;
@@ -157,7 +151,13 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
#if !defined(NV_LINUX)
return vmf_insert_pfn(vma, address, pfn);
#elif defined(NV_VMF_INSERT_MIXED_PRESENT)
#if defined(NV_LINUX_PFN_T_H_PRESENT)
return vmf_insert_mixed(vma, address, pfn_to_pfn_t(pfn));
#else
return vmf_insert_mixed(vma, address, pfn);
#endif
#else
return __nv_vm_insert_mixed_helper(vma, address, pfn);
#endif

View File

@@ -52,12 +52,11 @@ int nv_drm_gem_import_userspace_memory_ioctl(struct drm_device *dev,
static inline
struct nv_drm_gem_user_memory *nv_drm_gem_object_user_memory_lookup(
struct drm_device *dev,
struct drm_file *filp,
u32 handle)
{
struct nv_drm_gem_object *nv_gem =
nv_drm_gem_object_lookup(dev, filp, handle);
nv_drm_gem_object_lookup(filp, handle);
if (nv_gem != NULL && nv_gem->ops != &__nv_gem_user_memory_ops) {
nv_drm_gem_object_unreference_unlocked(nv_gem);

View File

@@ -35,17 +35,10 @@
#include "nvidia-drm-gem-dma-buf.h"
#include "nvidia-drm-gem-nvkms-memory.h"
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
#include <drm/drm_drv.h>
#endif
#if defined(NV_DRM_DRM_PRIME_H_PRESENT)
#include <drm/drm_prime.h>
#endif
#if defined(NV_DRM_DRM_FILE_H_PRESENT)
#include <drm/drm_file.h>
#endif
#include <drm/drm_vma_manager.h>
#include "linux/dma-buf.h"
@@ -58,7 +51,7 @@ void nv_drm_gem_free(struct drm_gem_object *gem)
/* Cleanup core gem object */
drm_gem_object_release(&nv_gem->base);
#if defined(NV_DRM_FENCE_AVAILABLE) && !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
#if !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
nv_dma_resv_fini(&nv_gem->resv);
#endif
@@ -135,7 +128,7 @@ void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
/* Initialize the gem object */
#if defined(NV_DRM_FENCE_AVAILABLE) && !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
#if !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
nv_dma_resv_init(&nv_gem->resv);
#endif
@@ -155,7 +148,6 @@ void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
struct drm_gem_object *nv_drm_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf)
{
#if defined(NV_DMA_BUF_OWNER_PRESENT)
struct drm_gem_object *gem_dst;
struct nv_drm_gem_object *nv_gem_src;
@@ -179,7 +171,6 @@ struct drm_gem_object *nv_drm_gem_prime_import(struct drm_device *dev,
return gem_dst;
}
}
#endif /* NV_DMA_BUF_OWNER_PRESENT */
return drm_gem_prime_import(dev, dma_buf);
}
@@ -231,8 +222,7 @@ int nv_drm_gem_map_offset_ioctl(struct drm_device *dev,
struct nv_drm_gem_object *nv_gem;
int ret;
if ((nv_gem = nv_drm_gem_object_lookup(dev,
filep,
if ((nv_gem = nv_drm_gem_object_lookup(filep,
params->handle)) == NULL) {
NV_DRM_DEV_LOG_ERR(
nv_dev,
@@ -257,7 +247,6 @@ int nv_drm_gem_map_offset_ioctl(struct drm_device *dev,
return ret;
}
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
int nv_drm_mmap(struct file *file, struct vm_area_struct *vma)
{
struct drm_file *priv = file->private_data;
@@ -268,7 +257,7 @@ int nv_drm_mmap(struct file *file, struct vm_area_struct *vma)
struct nv_drm_gem_object *nv_gem;
drm_vma_offset_lock_lookup(dev->vma_offset_manager);
node = nv_drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager,
node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager,
vma->vm_pgoff, vma_pages(vma));
if (likely(node)) {
obj = container_of(node, struct drm_gem_object, vma_node);
@@ -295,7 +284,7 @@ int nv_drm_mmap(struct file *file, struct vm_area_struct *vma)
goto done;
}
if (!nv_drm_vma_node_is_allowed(node, file)) {
if (!drm_vma_node_is_allowed(node, file->private_data)) {
ret = -EACCES;
goto done;
}
@@ -317,7 +306,6 @@ done:
return ret;
}
#endif
int nv_drm_gem_identify_object_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep)
@@ -332,23 +320,21 @@ int nv_drm_gem_identify_object_ioctl(struct drm_device *dev,
return -EOPNOTSUPP;
}
nv_dma_buf = nv_drm_gem_object_dma_buf_lookup(dev, filep, p->handle);
nv_dma_buf = nv_drm_gem_object_dma_buf_lookup(filep, p->handle);
if (nv_dma_buf) {
p->object_type = NV_GEM_OBJECT_DMABUF;
nv_gem = &nv_dma_buf->base;
goto done;
}
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
nv_nvkms_memory = nv_drm_gem_object_nvkms_memory_lookup(dev, filep, p->handle);
nv_nvkms_memory = nv_drm_gem_object_nvkms_memory_lookup(filep, p->handle);
if (nv_nvkms_memory) {
p->object_type = NV_GEM_OBJECT_NVKMS;
nv_gem = &nv_nvkms_memory->base;
goto done;
}
#endif
nv_user_memory = nv_drm_gem_object_user_memory_lookup(dev, filep, p->handle);
nv_user_memory = nv_drm_gem_object_user_memory_lookup(filep, p->handle);
if (nv_user_memory) {
p->object_type = NV_GEM_OBJECT_USERMEMORY;
nv_gem = &nv_user_memory->base;

View File

@@ -33,17 +33,12 @@
#include <drm/drmP.h>
#endif
#if defined(NV_DRM_DRM_GEM_H_PRESENT)
#include <drm/drm_gem.h>
#endif
#include "nvkms-kapi.h"
#include "nv-mm.h"
#if defined(NV_DRM_FENCE_AVAILABLE)
#include "nvidia-dma-fence-helper.h"
#include "nvidia-dma-resv-helper.h"
#endif
#include "linux/dma-buf.h"
@@ -73,9 +68,7 @@ struct nv_drm_gem_object {
struct NvKmsKapiMemory *pMemory;
bool is_drm_dumb;
#if defined(NV_DRM_FENCE_AVAILABLE) && !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
#if !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
nv_dma_resv_t resv;
#endif
};
@@ -90,47 +83,14 @@ static inline struct nv_drm_gem_object *to_nv_gem_object(
return NULL;
}
/*
* drm_gem_object_{get/put}() added by commit
* e6b62714e87c8811d5564b6a0738dcde63a51774 (2017-02-28) and
* drm_gem_object_{reference/unreference}() removed by commit
* 3e70fd160cf0b1945225eaa08dd2cb8544f21cb8 (2018-11-15).
*/
static inline void
nv_drm_gem_object_reference(struct nv_drm_gem_object *nv_gem)
{
#if defined(NV_DRM_GEM_OBJECT_GET_PRESENT)
drm_gem_object_get(&nv_gem->base);
#else
drm_gem_object_reference(&nv_gem->base);
#endif
}
static inline void
nv_drm_gem_object_unreference_unlocked(struct nv_drm_gem_object *nv_gem)
{
#if defined(NV_DRM_GEM_OBJECT_GET_PRESENT)
#if defined(NV_DRM_GEM_OBJECT_PUT_UNLOCK_PRESENT)
drm_gem_object_put_unlocked(&nv_gem->base);
#else
drm_gem_object_put(&nv_gem->base);
#endif
#else
drm_gem_object_unreference_unlocked(&nv_gem->base);
#endif
}
static inline void
nv_drm_gem_object_unreference(struct nv_drm_gem_object *nv_gem)
{
#if defined(NV_DRM_GEM_OBJECT_GET_PRESENT)
drm_gem_object_put(&nv_gem->base);
#else
drm_gem_object_unreference(&nv_gem->base);
#endif
}
static inline int nv_drm_gem_handle_create_drop_reference(
@@ -171,17 +131,10 @@ done:
void nv_drm_gem_free(struct drm_gem_object *gem);
static inline struct nv_drm_gem_object *nv_drm_gem_object_lookup(
struct drm_device *dev,
struct drm_file *filp,
u32 handle)
{
#if (NV_DRM_GEM_OBJECT_LOOKUP_ARGUMENT_COUNT == 3)
return to_nv_gem_object(drm_gem_object_lookup(dev, filp, handle));
#elif (NV_DRM_GEM_OBJECT_LOOKUP_ARGUMENT_COUNT == 2)
return to_nv_gem_object(drm_gem_object_lookup(filp, handle));
#else
#error "Unknown argument count of drm_gem_object_lookup()"
#endif
}
static inline int nv_drm_gem_handle_create(struct drm_file *filp,
@@ -191,7 +144,6 @@ static inline int nv_drm_gem_handle_create(struct drm_file *filp,
return drm_gem_handle_create(filp, &nv_gem->base, handle);
}
#if defined(NV_DRM_FENCE_AVAILABLE)
static inline nv_dma_resv_t *nv_drm_gem_res_obj(struct nv_drm_gem_object *nv_gem)
{
#if defined(NV_DRM_GEM_OBJECT_HAS_RESV)
@@ -200,7 +152,6 @@ static inline nv_dma_resv_t *nv_drm_gem_res_obj(struct nv_drm_gem_object *nv_gem
return nv_gem->base.dma_buf ? nv_gem->base.dma_buf->resv : &nv_gem->resv;
#endif
}
#endif
void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
struct nv_drm_gem_object *nv_gem,

View File

@@ -33,7 +33,7 @@
#include "nvmisc.h"
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
@@ -43,27 +43,7 @@
#include <drm/drm_atomic_uapi.h>
#endif
/*
* The inclusion of drm_framebuffer.h was removed from drm_crtc.h by commit
* 720cf96d8fec ("drm: Drop drm_framebuffer.h from drm_crtc.h") in v6.0.
*
* We only need drm_framebuffer.h for drm_framebuffer_put(), and it is always
* present (v4.9+) when drm_framebuffer_{put,get}() is present (v4.12+), so it
* is safe to unconditionally include it when drm_framebuffer_get() is present.
*/
#if defined(NV_DRM_FRAMEBUFFER_GET_PRESENT)
#include <drm/drm_framebuffer.h>
#endif
static void __nv_drm_framebuffer_put(struct drm_framebuffer *fb)
{
#if defined(NV_DRM_FRAMEBUFFER_GET_PRESENT)
drm_framebuffer_put(fb);
#else
drm_framebuffer_unreference(fb);
#endif
}
/*
* drm_atomic_helper_disable_all() has been added by commit
@@ -149,7 +129,6 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
goto free;
}
#if defined(NV_DRM_ROTATION_AVAILABLE)
nv_drm_for_each_plane(plane, dev) {
plane_state = drm_atomic_get_plane_state(state, plane);
if (IS_ERR(plane_state)) {
@@ -159,7 +138,6 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
plane_state->rotation = DRM_MODE_ROTATE_0;
}
#endif
nv_drm_for_each_connector_in_state(state, conn, conn_state, i) {
ret = drm_atomic_set_crtc_for_connector(conn_state, NULL);
@@ -189,29 +167,15 @@ free:
WARN_ON(plane->state->crtc);
if (plane->old_fb)
__nv_drm_framebuffer_put(plane->old_fb);
drm_framebuffer_put(plane->old_fb);
}
plane->old_fb = NULL;
}
}
#if defined(NV_DRM_ATOMIC_STATE_REF_COUNTING_PRESENT)
drm_atomic_state_put(state);
#else
if (ret != 0) {
drm_atomic_state_free(state);
} else {
/*
* In case of success, drm_atomic_commit() takes care to cleanup and
* free @state.
*
* Comment placed above drm_atomic_commit() says: The caller must not
* free or in any other way access @state. If the function fails then
* the caller must clean up @state itself.
*/
}
#endif
return ret;
}
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#endif /* NV_DRM_AVAILABLE */

View File

@@ -31,43 +31,17 @@
#include <drm/drmP.h>
#endif
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
#include <drm/drm_drv.h>
#endif
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE) || defined(NV_DRM_ROTATION_AVAILABLE)
/* For DRM_ROTATE_* , DRM_REFLECT_* */
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE)
#include <drm/drm_blend.h>
#endif
#if defined(NV_DRM_ROTATION_AVAILABLE) || \
defined(NV_DRM_COLOR_CTM_3X4_PRESENT) || \
defined(NV_DRM_COLOR_LUT_PRESENT)
/*
* For DRM_MODE_ROTATE_*, DRM_MODE_REFLECT_*, struct drm_color_ctm_3x4, and
* struct drm_color_lut.
*/
#include <uapi/drm/drm_mode.h>
#endif
#if defined(NV_DRM_ROTATION_AVAILABLE)
/*
* 19-05-2017 c2c446ad29437bb92b157423c632286608ebd3ec has added
* DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* to UAPI and removed
* DRM_ROTATE_* and DRM_REFLECT_*
*/
#if !defined(DRM_MODE_ROTATE_0)
#define DRM_MODE_ROTATE_0 DRM_ROTATE_0
#define DRM_MODE_ROTATE_90 DRM_ROTATE_90
#define DRM_MODE_ROTATE_180 DRM_ROTATE_180
#define DRM_MODE_ROTATE_270 DRM_ROTATE_270
#define DRM_MODE_REFLECT_X DRM_REFLECT_X
#define DRM_MODE_REFLECT_Y DRM_REFLECT_Y
#define DRM_MODE_ROTATE_MASK DRM_ROTATE_MASK
#define DRM_MODE_REFLECT_MASK DRM_REFLECT_MASK
#endif
#endif //NV_DRM_ROTATION_AVAILABLE
/*
* Commit 1e13c5644c44 ("drm/drm_mode_object: increase max objects to
@@ -76,29 +50,7 @@
*/
#define NV_DRM_USE_EXTENDED_PROPERTIES (DRM_OBJECT_MAX_PROPERTY >= 64)
/*
* drm_dev_put() is added by commit 9a96f55034e41b4e002b767e9218d55f03bdff7d
* (2017-09-26) and drm_dev_unref() is removed by
* ba1d345401476a5f7fbad622607c5a1f95e59b31 (2018-11-15).
*
* drm_dev_unref() has been added and drm_dev_free() removed by commit -
*
* 2014-01-29: 099d1c290e2ebc3b798961a6c177c3aef5f0b789
*/
static inline void nv_drm_dev_free(struct drm_device *dev)
{
#if defined(NV_DRM_DEV_PUT_PRESENT)
drm_dev_put(dev);
#elif defined(NV_DRM_DEV_UNREF_PRESENT)
drm_dev_unref(dev);
#else
drm_dev_free(dev);
#endif
}
#if defined(NV_DRM_DRM_PRIME_H_PRESENT)
#include <drm/drm_prime.h>
#endif
static inline struct sg_table*
nv_drm_prime_pages_to_sg(struct drm_device *dev,
@@ -111,8 +63,6 @@ nv_drm_prime_pages_to_sg(struct drm_device *dev,
#endif
}
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
/*
* drm_for_each_connector(), drm_for_each_crtc(), drm_for_each_fb(),
* drm_for_each_encoder and drm_for_each_plane() were added by kernel
@@ -166,18 +116,6 @@ nv_drm_prime_pages_to_sg(struct drm_device *dev,
list_for_each_entry(crtc, &(dev)->mode_config.crtc_list, head)
#endif
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
#define nv_drm_for_each_connector(connector, conn_iter, dev) \
drm_for_each_connector_iter(connector, conn_iter)
#elif defined(drm_for_each_connector)
#define nv_drm_for_each_connector(connector, conn_iter, dev) \
drm_for_each_connector(connector, dev)
#else
#define nv_drm_for_each_connector(connector, conn_iter, dev) \
WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); \
list_for_each_entry(connector, &(dev)->mode_config.connector_list, head)
#endif
#if defined(drm_for_each_encoder)
#define nv_drm_for_each_encoder(encoder, dev) \
drm_for_each_encoder(encoder, dev)
@@ -348,72 +286,8 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
for_each_new_plane_in_state(__state, plane, new_plane_state, __i)
#endif
static inline struct drm_connector *
nv_drm_connector_lookup(struct drm_device *dev, struct drm_file *filep,
uint32_t id)
{
#if !defined(NV_DRM_CONNECTOR_LOOKUP_PRESENT)
return drm_connector_find(dev, id);
#elif defined(NV_DRM_MODE_OBJECT_FIND_HAS_FILE_PRIV_ARG)
return drm_connector_lookup(dev, filep, id);
#else
return drm_connector_lookup(dev, id);
#endif
}
static inline void nv_drm_connector_put(struct drm_connector *connector)
{
#if defined(NV_DRM_CONNECTOR_PUT_PRESENT)
drm_connector_put(connector);
#elif defined(NV_DRM_CONNECTOR_LOOKUP_PRESENT)
drm_connector_unreference(connector);
#endif
}
static inline void nv_drm_property_blob_put(struct drm_property_blob *blob)
{
#if defined(NV_DRM_PROPERTY_BLOB_PUT_PRESENT)
drm_property_blob_put(blob);
#else
drm_property_unreference_blob(blob);
#endif
}
static inline void nv_drm_property_blob_get(struct drm_property_blob *blob)
{
#if defined(NV_DRM_PROPERTY_BLOB_PUT_PRESENT)
drm_property_blob_get(blob);
#else
drm_property_reference_blob(blob);
#endif
}
static inline struct drm_crtc *
nv_drm_crtc_find(struct drm_device *dev, struct drm_file *filep, uint32_t id)
{
#if defined(NV_DRM_MODE_OBJECT_FIND_HAS_FILE_PRIV_ARG)
return drm_crtc_find(dev, filep, id);
#else
return drm_crtc_find(dev, id);
#endif
}
static inline struct drm_encoder *nv_drm_encoder_find(struct drm_device *dev,
uint32_t id)
{
#if defined(NV_DRM_MODE_OBJECT_FIND_HAS_FILE_PRIV_ARG)
return drm_encoder_find(dev, NULL /* file_priv */, id);
#else
return drm_encoder_find(dev, id);
#endif
}
#if defined(NV_DRM_DRM_AUTH_H_PRESENT)
#include <drm/drm_auth.h>
#endif
#if defined(NV_DRM_DRM_FILE_H_PRESENT)
#include <drm/drm_file.h>
#endif
/*
* drm_file_get_master() added by commit 56f0729a510f ("drm: protect drm_master
@@ -438,10 +312,6 @@ static inline struct drm_master *nv_drm_file_get_master(struct drm_file *filep)
* Ville Syrjälä <ville.syrjala@linux.intel.com>
*
* drm_connector_for_each_possible_encoder() is copied from
* include/drm/drm_connector.h and modified to use nv_drm_encoder_find()
* instead of drm_encoder_find().
*
* drm_connector_for_each_possible_encoder() is copied from
* include/drm/drm_connector.h @
* 83aefbb887b59df0b3520965c3701e01deacfc52
* which has the following copyright and license information:
@@ -467,9 +337,7 @@ static inline struct drm_master *nv_drm_file_get_master(struct drm_file *filep)
* OF THIS SOFTWARE.
*/
#if defined(NV_DRM_DRM_CONNECTOR_H_PRESENT)
#include <drm/drm_connector.h>
#endif
/**
* nv_drm_connector_for_each_possible_encoder - iterate connector's possible
@@ -488,9 +356,10 @@ static inline struct drm_master *nv_drm_file_get_master(struct drm_file *filep)
for ((__i) = 0; (__i) < ARRAY_SIZE((connector)->encoder_ids) && \
(connector)->encoder_ids[(__i)] != 0; (__i)++) \
for_each_if((encoder) = \
nv_drm_encoder_find((connector)->dev, \
drm_encoder_find((connector)->dev, NULL, \
(connector)->encoder_ids[(__i)]))
#define nv_drm_connector_for_each_possible_encoder(connector, encoder) \
{ \
unsigned int __i; \
@@ -544,80 +413,14 @@ nv_drm_connector_update_edid_property(struct drm_connector *connector,
#endif
}
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
#include <drm/drm_connector.h>
static inline
void nv_drm_connector_list_iter_begin(struct drm_device *dev,
struct drm_connector_list_iter *iter)
{
#if defined(NV_DRM_CONNECTOR_LIST_ITER_BEGIN_PRESENT)
drm_connector_list_iter_begin(dev, iter);
#else
drm_connector_list_iter_get(dev, iter);
#endif
}
static inline
void nv_drm_connector_list_iter_end(struct drm_connector_list_iter *iter)
{
#if defined(NV_DRM_CONNECTOR_LIST_ITER_BEGIN_PRESENT)
drm_connector_list_iter_end(iter);
#else
drm_connector_list_iter_put(iter);
#endif
}
#endif
/*
* The drm_format_num_planes() function was added by commit d0d110e09629 drm:
* Add drm_format_num_planes() utility function in v3.3 (2011-12-20). Prototype
* was moved from drm_crtc.h to drm_fourcc.h by commit ae4df11a0f53 (drm: Move
* format-related helpers to drm_fourcc.c) in v4.8 (2016-06-09).
* drm_format_num_planes() has been removed by commit 05c452c115bf (drm: Remove
* users of drm_format_num_planes) in v5.3 (2019-05-16).
*
* drm_format_info() is available only from v4.10 (2016-10-18), added by commit
* 84770cc24f3a (drm: Centralize format information).
*/
#include <drm/drm_crtc.h>
#include <drm/drm_fourcc.h>
static inline int nv_drm_format_num_planes(uint32_t format)
{
#if defined(NV_DRM_FORMAT_NUM_PLANES_PRESENT)
return drm_format_num_planes(format);
#else
const struct drm_format_info *info = drm_format_info(format);
return info != NULL ? info->num_planes : 1;
#endif
}
#if defined(NV_DRM_FORMAT_MODIFIERS_PRESENT)
/*
* DRM_FORMAT_MOD_LINEAR was also defined after the original modifier support
* was added to the kernel, as a more explicit alias of DRM_FORMAT_MOD_NONE
*/
#if !defined(DRM_FORMAT_MOD_VENDOR_NONE)
#define DRM_FORMAT_MOD_VENDOR_NONE 0
#endif
#if !defined(DRM_FORMAT_MOD_LINEAR)
#define DRM_FORMAT_MOD_LINEAR fourcc_mod_code(NONE, 0)
#endif
/*
* DRM_FORMAT_MOD_INVALID was defined after the original modifier support was
* added to the kernel, for use as a sentinel value.
*/
#if !defined(DRM_FORMAT_RESERVED)
#define DRM_FORMAT_RESERVED ((1ULL << 56) - 1)
#endif
#if !defined(DRM_FORMAT_MOD_INVALID)
#define DRM_FORMAT_MOD_INVALID fourcc_mod_code(NONE, DRM_FORMAT_RESERVED)
#endif
/*
* DRM_FORMAT_MOD_VENDOR_NVIDIA was previously called
* DRM_FORMAT_MOD_VNEDOR_NV.
@@ -640,8 +443,6 @@ static inline int nv_drm_format_num_planes(uint32_t format)
(((c) & 0x7) << 23)))
#endif
#endif /* defined(NV_DRM_FORMAT_MODIFIERS_PRESENT) */
/*
* DRM_UNLOCKED was removed with commit 2798ffcc1d6a ("drm: Remove locking for
* legacy ioctls and DRM_UNLOCKED") in v6.8, but it was previously made
@@ -666,92 +467,6 @@ struct drm_color_ctm_3x4 {
};
#endif
/*
* struct drm_color_lut was added by commit 5488dc16fde7 ("drm: introduce pipe
* color correction properties") in v4.6. For backwards compatibility, define it
* when not present.
*/
#if !defined(NV_DRM_COLOR_LUT_PRESENT)
struct drm_color_lut {
__u16 red;
__u16 green;
__u16 blue;
__u16 reserved;
};
#endif
/*
* drm_vma_offset_exact_lookup_locked() were added
* by kernel commit 2225cfe46bcc which was Signed-off-by:
* Daniel Vetter <daniel.vetter@intel.com>
*
* drm_vma_offset_exact_lookup_locked() were copied from
* include/drm/drm_vma_manager.h @ 2225cfe46bcc
* which has the following copyright and license information:
*
* Copyright (c) 2013 David Herrmann <dh.herrmann@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <drm/drm_vma_manager.h>
/**
* nv_drm_vma_offset_exact_lookup_locked() - Look up node by exact address
* @mgr: Manager object
* @start: Start address (page-based, not byte-based)
* @pages: Size of object (page-based)
*
* Same as drm_vma_offset_lookup_locked() but does not allow any offset into the node.
* It only returns the exact object with the given start address.
*
* RETURNS:
* Node at exact start address @start.
*/
static inline struct drm_vma_offset_node *
nv_drm_vma_offset_exact_lookup_locked(struct drm_vma_offset_manager *mgr,
unsigned long start,
unsigned long pages)
{
#if defined(NV_DRM_VMA_OFFSET_EXACT_LOOKUP_LOCKED_PRESENT)
return drm_vma_offset_exact_lookup_locked(mgr, start, pages);
#else
struct drm_vma_offset_node *node;
node = drm_vma_offset_lookup_locked(mgr, start, pages);
return (node && node->vm_node.start == start) ? node : NULL;
#endif
}
static inline bool
nv_drm_vma_node_is_allowed(struct drm_vma_offset_node *node,
struct file *filp)
{
#if defined(NV_DRM_VMA_NODE_IS_ALLOWED_HAS_TAG_ARG)
return drm_vma_node_is_allowed(node, filp->private_data);
#else
return drm_vma_node_is_allowed(node, filp);
#endif
}
#endif /* defined(NV_DRM_ATOMIC_MODESET_AVAILABLE) */
#endif /* defined(NV_DRM_AVAILABLE) */
#endif /* __NVIDIA_DRM_HELPER_H__ */

View File

@@ -20,9 +20,9 @@
* DEALINGS IN THE SOFTWARE.
*/
#include "nvidia-drm-conftest.h" /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#include "nvidia-drm-conftest.h" /* NV_DRM_AVAILABLE */
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#include "nvidia-drm-priv.h"
#include "nvidia-drm-modeset.h"
@@ -34,10 +34,7 @@
#include <drm/drmP.h>
#endif
#if defined(NV_DRM_DRM_VBLANK_H_PRESENT)
#include <drm/drm_vblank.h>
#endif
#include <drm/drm_atomic.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_crtc.h>
@@ -48,9 +45,7 @@
#include <linux/host1x-next.h>
#endif
#if defined(NV_DRM_FENCE_AVAILABLE)
#include "nvidia-dma-fence-helper.h"
#endif
#include <linux/dma-fence.h>
struct nv_drm_atomic_state {
struct NvKmsKapiRequestedModeSetConfig config;
@@ -156,24 +151,23 @@ static int __nv_drm_put_back_post_fence_fd(
return ret;
}
#if defined(NV_DRM_FENCE_AVAILABLE)
struct nv_drm_plane_fence_cb_data {
nv_dma_fence_cb_t dma_fence_cb;
struct dma_fence_cb dma_fence_cb;
struct nv_drm_device *nv_dev;
NvU32 semaphore_index;
};
static void
__nv_drm_plane_fence_cb(
nv_dma_fence_t *fence,
nv_dma_fence_cb_t *cb_data
struct dma_fence *fence,
struct dma_fence_cb *cb_data
)
{
struct nv_drm_plane_fence_cb_data *fence_data =
container_of(cb_data, typeof(*fence_data), dma_fence_cb);
struct nv_drm_device *nv_dev = fence_data->nv_dev;
nv_dma_fence_put(fence);
dma_fence_put(fence);
nvKms->signalDisplaySemaphore(nv_dev->pDevice, fence_data->semaphore_index);
nv_drm_free(fence_data);
}
@@ -279,7 +273,7 @@ static int __nv_drm_convert_in_fences(
fence_data->nv_dev = nv_dev;
fence_data->semaphore_index = semaphore_index;
ret = nv_dma_fence_add_callback(plane_state->fence,
ret = dma_fence_add_callback(plane_state->fence,
&fence_data->dma_fence_cb,
__nv_drm_plane_fence_cb);
@@ -313,7 +307,6 @@ static int __nv_drm_convert_in_fences(
return 0;
}
#endif /* defined(NV_DRM_FENCE_AVAILABLE) */
static int __nv_drm_get_syncpt_data(
struct nv_drm_device *nv_dev,
@@ -414,7 +407,6 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
return -EINVAL;
}
#if defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
if (commit) {
/*
* This function does what is necessary to prepare the framebuffers
@@ -426,10 +418,6 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
* in the new state, prefering explicit sync fences when appropriate.
* This must be done prior to converting the per-plane fences to
* semaphore waits below.
*
* Note this only works when the drm_framebuffer:obj[] field is present
* and populated, so skip calling this function on kernels where that
* field is not present.
*/
ret = drm_atomic_helper_prepare_planes(dev, state);
@@ -437,7 +425,6 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
return ret;
}
}
#endif /* defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT) */
memset(requested_config, 0, sizeof(*requested_config));
@@ -472,7 +459,6 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
nv_new_crtc_state->nv_flip = NULL;
}
#if defined(NV_DRM_FENCE_AVAILABLE)
ret = __nv_drm_convert_in_fences(nv_dev,
state,
crtc,
@@ -481,7 +467,6 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
if (ret != 0) {
return ret;
}
#endif /* defined(NV_DRM_FENCE_AVAILABLE) */
}
/*
@@ -534,7 +519,6 @@ int nv_drm_atomic_check(struct drm_device *dev,
{
int ret = 0;
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
struct drm_crtc *crtc;
struct drm_crtc_state *crtc_state;
int i;
@@ -550,7 +534,6 @@ int nv_drm_atomic_check(struct drm_device *dev,
}
}
}
#endif /* NV_DRM_COLOR_MGMT_AVAILABLE */
if ((ret = drm_atomic_helper_check(dev, state)) != 0) {
goto done;
@@ -678,7 +661,6 @@ int nv_drm_atomic_commit(struct drm_device *dev,
}
}
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
/*
* If the legacy LUT needs to be updated, ensure that the previous LUT
* update is complete first.
@@ -703,11 +685,8 @@ int nv_drm_atomic_commit(struct drm_device *dev,
}
}
}
#endif
}
#if defined(NV_DRM_ATOMIC_HELPER_SWAP_STATE_HAS_STALL_ARG)
/*
* nv_drm_atomic_commit_internal()
* implements blocking/non-blocking atomic commit using
@@ -718,18 +697,10 @@ int nv_drm_atomic_commit(struct drm_device *dev,
* expected.
*/
#if defined(NV_DRM_ATOMIC_HELPER_SWAP_STATE_RETURN_INT)
ret = drm_atomic_helper_swap_state(state, false /* stall */);
if (WARN_ON(ret != 0)) {
return ret;
}
#else
drm_atomic_helper_swap_state(state, false /* stall */);
#endif
#else
drm_atomic_helper_swap_state(dev, state);
#endif
/*
* nv_drm_atomic_commit_internal() must not return failure after
@@ -831,7 +802,6 @@ int nv_drm_atomic_commit(struct drm_device *dev,
}
}
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
if (crtc_state->color_mgmt_changed) {
NvBool complete = nvKms->checkLutNotifier(nv_dev->pDevice,
nv_crtc->head,
@@ -842,20 +812,14 @@ int nv_drm_atomic_commit(struct drm_device *dev,
"LUT notifier timeout on head %u", nv_crtc->head);
}
}
#endif
}
}
done:
#if defined(NV_DRM_ATOMIC_STATE_REF_COUNTING_PRESENT)
/*
* If ref counting is present, state will be freed when the caller
* drops its reference after we return.
* State will be freed when the caller drops its reference after we return.
*/
#else
drm_atomic_state_free(state);
#endif
return 0;
}

View File

@@ -25,7 +25,7 @@
#include "nvidia-drm-conftest.h"
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#include "nvkms-kapi.h"
@@ -48,6 +48,6 @@ void nv_drm_handle_flip_occurred(struct nv_drm_device *nv_dev,
int nv_drm_shut_down_all_crtcs(struct drm_device *dev);
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#endif /* NV_DRM_AVAILABLE */
#endif /* __NVIDIA_DRM_MODESET_H__ */

View File

@@ -26,11 +26,8 @@
#if defined(NV_DRM_AVAILABLE)
#if defined(NV_LINUX_SYNC_FILE_H_PRESENT)
#include <linux/file.h>
#include <linux/sync_file.h>
#endif
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/device.h>
@@ -66,18 +63,6 @@ void nv_drm_free(void *ptr)
kfree(ptr);
}
char *nv_drm_asprintf(const char *fmt, ...)
{
va_list ap;
char *p;
va_start(ap, fmt);
p = kvasprintf(GFP_KERNEL, fmt, ap);
va_end(ap);
return p;
}
#if defined(NVCPU_X86) || defined(NVCPU_X86_64)
#define WRITE_COMBINE_FLUSH() asm volatile("sfence":::"memory")
#elif defined(NVCPU_PPC64LE)
@@ -236,10 +221,8 @@ unsigned long nv_drm_timeout_from_ms(NvU64 relative_timeout_ms)
return jiffies + msecs_to_jiffies(relative_timeout_ms);
}
#if defined(NV_DRM_FENCE_AVAILABLE)
int nv_drm_create_sync_file(nv_dma_fence_t *fence)
int nv_drm_create_sync_file(struct dma_fence *fence)
{
#if defined(NV_LINUX_SYNC_FILE_H_PRESENT)
struct sync_file *sync;
int fd = get_unused_fd_flags(O_CLOEXEC);
@@ -258,20 +241,12 @@ int nv_drm_create_sync_file(nv_dma_fence_t *fence)
fd_install(fd, sync->file);
return fd;
#else /* defined(NV_LINUX_SYNC_FILE_H_PRESENT) */
return -EINVAL;
#endif /* defined(NV_LINUX_SYNC_FILE_H_PRESENT) */
}
nv_dma_fence_t *nv_drm_sync_file_get_fence(int fd)
struct dma_fence *nv_drm_sync_file_get_fence(int fd)
{
#if defined(NV_SYNC_FILE_GET_FENCE_PRESENT)
return sync_file_get_fence(fd);
#else /* defined(NV_SYNC_FILE_GET_FENCE_PRESENT) */
return NULL;
#endif /* defined(NV_SYNC_FILE_GET_FENCE_PRESENT) */
}
#endif /* defined(NV_DRM_FENCE_AVAILABLE) */
void nv_drm_yield(void)
{

View File

@@ -29,9 +29,7 @@
#if defined(NV_DRM_AVAILABLE)
#if defined(NV_DRM_FENCE_AVAILABLE)
#include "nvidia-dma-fence-helper.h"
#endif
#include "linux/dma-fence.h"
#if defined(NV_LINUX) || defined(NV_BSD)
#include "nv-kthread-q.h"
@@ -71,8 +69,6 @@ void *nv_drm_calloc(size_t nmemb, size_t size);
void nv_drm_free(void *ptr);
char *nv_drm_asprintf(const char *fmt, ...);
void nv_drm_write_combine_flush(void);
int nv_drm_lock_user_pages(unsigned long address,
@@ -105,11 +101,9 @@ unsigned long nv_drm_timer_now(void);
unsigned long nv_drm_timeout_from_ms(NvU64 relative_timeout_ms);
#if defined(NV_DRM_FENCE_AVAILABLE)
int nv_drm_create_sync_file(nv_dma_fence_t *fence);
int nv_drm_create_sync_file(struct dma_fence *fence);
nv_dma_fence_t *nv_drm_sync_file_get_fence(int fd);
#endif /* defined(NV_DRM_FENCE_AVAILABLE) */
struct dma_fence *nv_drm_sync_file_get_fence(int fd);
void nv_drm_yield(void);

View File

@@ -31,13 +31,8 @@
#include <drm/drmP.h>
#endif
#if defined(NV_DRM_DRM_DEVICE_H_PRESENT)
#include <drm/drm_device.h>
#endif
#if defined(NV_DRM_DRM_GEM_H_PRESENT)
#include <drm/drm_gem.h>
#endif
#include "nvidia-drm-os-interface.h"
@@ -99,7 +94,6 @@ struct nv_drm_device {
struct NvKmsKapiDevice *pDevice;
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
/*
* Lock to protect drm-subsystem and fields of this structure
* from concurrent access.
@@ -129,9 +123,7 @@ struct nv_drm_device {
NvU8 genericPageKind;
NvU8 pageKindGeneration;
NvU8 sectorLayout;
#if defined(NV_DRM_FORMAT_MODIFIERS_PRESENT)
NvU64 modifiers[6 /* block linear */ + 1 /* linear */ + 1 /* terminator */];
#endif
struct delayed_work hotplug_event_work;
atomic_t enable_event_handling;
@@ -144,12 +136,8 @@ struct nv_drm_device {
*/
wait_queue_head_t flip_event_wq;
#endif
#if defined(NV_DRM_FENCE_AVAILABLE)
NvU64 semsurf_stride;
NvU64 semsurf_max_submitted_offset;
#endif
NvBool hasVideoMemory;

View File

@@ -30,8 +30,6 @@ NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-os-interface.c
# Register the conftests needed by nvidia-drm.ko
#
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_atomic_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_inc
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_dec_and_test
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_alpha_blending_available
@@ -40,33 +38,18 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_hand
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl___vma_start_write
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages_remote
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages_remote
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_lookup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_state_ref_counting
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_driver_has_gem_prime_res_obj
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_connector_dpms
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_funcs_have_mode_in_name
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_has_vrr_capable_property
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_framebuffer_get
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_put
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_format_num_planes
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_for_each_possible_encoder
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_rotation_available
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_vma_offset_exact_lookup_locked
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_put_unlocked
NV_CONFTEST_FUNCTION_COMPILE_TESTS += nvhost_dma_fence_unpack
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_fence_set_error
NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_devices
@@ -75,43 +58,20 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_generic_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_ttm_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_client_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_plane_create_color_properties
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_legacy_gamma_set
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_mixed
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pfn_to_pfn_t
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_prime_mmap
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_bus_type
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_get_irq
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_get_name
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_device_list
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_legacy_dev_list
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_set_busid
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_connectors_changed
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_init_function_args
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_helper_mode_fill_fb_struct
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_drop_has_from_release_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_unload_has_int_return_type
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_atomic_helper_crtc_destroy_state_has_crtc_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_atomic_helper_plane_destroy_state_has_plane_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_object_find_has_file_priv_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_buf_owner
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_list_iter
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_atomic_helper_swap_state_has_stall_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_prime_flag_present
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_has_resv
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_async_flip
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_pageflip_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_vrr_enabled
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_modifiers_present
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_node_is_allowed_has_tag_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_offset_node_has_readonly
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_display_mode_has_vrefresh
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_master_set_has_int_return_type
@@ -129,11 +89,8 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_add_fence
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_reserve_fences
NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fences_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_has_leases
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_lookup
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
@@ -143,11 +100,8 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffe
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_syncobj_features_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_framebuffer_obj_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_ctm_3x4_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_lut
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_info_has_is_yuv
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_property_blob_put
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_prime_mmap
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date

View File

@@ -20,18 +20,15 @@
* DEALINGS IN THE SOFTWARE.
*/
#include "nvidia-drm-conftest.h" /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#include "nvidia-drm-conftest.h" /* NV_DRM_AVAILABLE */
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
#if defined(NV_DRM_DRM_PLANE_H_PRESENT)
#include <drm/drm_plane.h>
#endif
#include <drm/drm_modes.h>
#include <uapi/drm/drm_fourcc.h>

View File

@@ -25,7 +25,7 @@
#include "nvidia-drm-conftest.h"
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_AVAILABLE)
#include "nvkms-kapi.h"
@@ -49,6 +49,6 @@ void nvkms_display_mode_to_drm_mode(
void drm_mode_to_nvkms_display_mode(const struct drm_display_mode *src,
struct NvKmsKapiDisplayMode *dst);
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
#endif /* NV_DRM_AVAILABLE */
#endif /* __NVIDIA_DRM_UTILS_H__ */

View File

@@ -29,12 +29,7 @@
#include <linux/completion.h>
#include <linux/module.h>
#include <linux/mm.h>
#if defined(NV_LINUX_BUG_H_PRESENT)
#include <linux/bug.h>
#else
#include <asm/bug.h>
#endif
// Today's implementation is a little simpler and more limited than the
// API description allows for in nv-kthread-q.h. Details include:

View File

@@ -209,6 +209,225 @@ NvBool nvkms_kernel_supports_syncpts(void)
/*************************************************************************
* NVKMS interface for nvhost unit for sync point APIs.
*************************************************************************/
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
#undef NVKMS_SYNCPT_STUBS_NEEDED
#include <linux/nvhost.h>
NvBool nvkms_syncpt_op(
enum NvKmsSyncPtOp op,
NvKmsSyncPtOpParams *params)
{
struct platform_device *pdev = nvhost_get_default_device();
switch (op) {
case NVKMS_SYNCPT_OP_ALLOC:
params->alloc.id = nvhost_get_syncpt_client_managed(
pdev, params->alloc.syncpt_name);
break;
case NVKMS_SYNCPT_OP_PUT:
nvhost_syncpt_put_ref_ext(pdev, params->put.id);
break;
case NVKMS_SYNCPT_OP_FD_TO_ID_AND_THRESH: {
struct nvhost_fence *fence;
NvU32 id, thresh;
fence = nvhost_fence_get(params->fd_to_id_and_thresh.fd);
if (fence == NULL) {
return NV_FALSE;
}
if (nvhost_fence_num_pts(fence) > 1) {
/*! Syncpoint fence fd contains more than one syncpoint */
nvhost_fence_put(fence);
return NV_FALSE;
}
if (nvhost_fence_get_pt(fence, 0, &id, &thresh) != 0) {
nvhost_fence_put(fence);
return NV_FALSE;
}
params->fd_to_id_and_thresh.id = id;
params->fd_to_id_and_thresh.thresh = thresh;
nvhost_fence_put(fence);
break;
}
case NVKMS_SYNCPT_OP_ID_AND_THRESH_TO_FD:
nvhost_syncpt_create_fence_single_ext(
pdev,
params->id_and_thresh_to_fd.id,
params->id_and_thresh_to_fd.thresh,
"nvkms-fence",
&params->id_and_thresh_to_fd.fd);
break;
case NVKMS_SYNCPT_OP_READ_MINVAL:
params->read_minval.minval =
nvhost_syncpt_read_minval(pdev, params->read_minval.id);
break;
}
return NV_TRUE;
}
#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT) && defined(NV_LINUX_NVHOST_H_PRESENT)
#include <linux/dma-fence.h>
#include <linux/file.h>
#include <linux/host1x-next.h>
#include <linux/sync_file.h>
/*
* If the host1x.h header is present, then we are using the upstream
* host1x driver and so make sure CONFIG_TEGRA_HOST1X is defined to pick
* up the correct prototypes/definitions in nvhost.h.
*/
#define CONFIG_TEGRA_HOST1X
#include <linux/nvhost.h>
#undef NVKMS_SYNCPT_STUBS_NEEDED
NvBool nvkms_syncpt_op(
enum NvKmsSyncPtOp op,
NvKmsSyncPtOpParams *params)
{
struct host1x_syncpt *host1x_sp;
struct platform_device *pdev;
struct host1x *host1x;
pdev = nvhost_get_default_device();
if (pdev == NULL) {
nvkms_log(NVKMS_LOG_LEVEL_ERROR, NVKMS_LOG_PREFIX,
"Failed to get nvhost default pdev");
return NV_FALSE;
}
host1x = nvhost_get_host1x(pdev);
if (host1x == NULL) {
nvkms_log(NVKMS_LOG_LEVEL_ERROR, NVKMS_LOG_PREFIX,
"Failed to get host1x");
return NV_FALSE;
}
switch (op) {
case NVKMS_SYNCPT_OP_ALLOC:
host1x_sp = host1x_syncpt_alloc(host1x,
HOST1X_SYNCPT_CLIENT_MANAGED,
params->alloc.syncpt_name);
if (host1x_sp == NULL) {
return NV_FALSE;
}
params->alloc.id = host1x_syncpt_id(host1x_sp);
break;
case NVKMS_SYNCPT_OP_PUT:
host1x_sp = host1x_syncpt_get_by_id_noref(host1x, params->put.id);
if (host1x_sp == NULL) {
return NV_FALSE;
}
host1x_syncpt_put(host1x_sp);
break;
case NVKMS_SYNCPT_OP_FD_TO_ID_AND_THRESH: {
struct dma_fence *f;
NvU32 id, thresh;
int err;
f = sync_file_get_fence(params->fd_to_id_and_thresh.fd);
if (f == NULL) {
return NV_FALSE;
}
if (dma_fence_is_array(f)) {
struct dma_fence_array *array = to_dma_fence_array(f);
if (array->num_fences > 1) {
/* Syncpoint fence fd contains more than one syncpoint */
dma_fence_put(f);
return NV_FALSE;
}
f = array->fences[0];
}
err = host1x_fence_extract(f, &id, &thresh);
dma_fence_put(f);
if (err < 0) {
return NV_FALSE;
}
params->fd_to_id_and_thresh.id = id;
params->fd_to_id_and_thresh.thresh = thresh;
break;
}
case NVKMS_SYNCPT_OP_ID_AND_THRESH_TO_FD: {
struct sync_file *file;
struct dma_fence *f;
int fd;
host1x_sp = host1x_syncpt_get_by_id_noref(host1x,
params->id_and_thresh_to_fd.id);
if (host1x_sp == NULL) {
return NV_FALSE;
}
f = host1x_fence_create(host1x_sp,
params->id_and_thresh_to_fd.thresh, true);
if (IS_ERR(f)) {
return NV_FALSE;
}
fd = get_unused_fd_flags(O_CLOEXEC);
if (fd < 0) {
dma_fence_put(f);
return NV_FALSE;
}
file = sync_file_create(f);
dma_fence_put(f);
if (!file) {
return NV_FALSE;
}
fd_install(fd, file->file);
params->id_and_thresh_to_fd.fd = fd;
break;
}
case NVKMS_SYNCPT_OP_READ_MINVAL:
host1x_sp = host1x_syncpt_get_by_id_noref(host1x, params->read_minval.id);
if (host1x_sp == NULL) {
return NV_FALSE;
}
params->read_minval.minval = host1x_syncpt_read(host1x_sp);
break;
}
return NV_TRUE;
}
#endif
#ifdef NVKMS_SYNCPT_STUBS_NEEDED
/* Unsupported STUB for nvkms_syncpt APIs */
@@ -819,12 +1038,6 @@ inline static void nvkms_timer_callback_typed_data(struct timer_list *timer)
_nvkms_timer_callback_internal(nvkms_timer);
}
inline static void nvkms_timer_callback_anon_data(unsigned long arg)
{
struct nvkms_timer_t *nvkms_timer = (struct nvkms_timer_t *) arg;
_nvkms_timer_callback_internal(nvkms_timer);
}
static void
nvkms_init_timer(struct nvkms_timer_t *timer, nvkms_timer_proc_t *proc,
void *dataPtr, NvU32 dataU32, NvBool isRefPtr, NvU64 usec)
@@ -857,13 +1070,7 @@ nvkms_init_timer(struct nvkms_timer_t *timer, nvkms_timer_proc_t *proc,
timer->kernel_timer_created = NV_FALSE;
nvkms_queue_work(&nvkms_kthread_q, &timer->nv_kthread_q_item);
} else {
#if defined(NV_TIMER_SETUP_PRESENT)
timer_setup(&timer->kernel_timer, nvkms_timer_callback_typed_data, 0);
#else
init_timer(&timer->kernel_timer);
timer->kernel_timer.function = nvkms_timer_callback_anon_data;
timer->kernel_timer.data = (unsigned long) timer;
#endif
timer->kernel_timer_created = NV_TRUE;
mod_timer(&timer->kernel_timer, jiffies + NVKMS_USECS_TO_JIFFIES(usec));
@@ -1142,6 +1349,7 @@ static void nvkms_kapi_event_kthread_q_callback(void *arg)
static struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type,
struct NvKmsKapiDevice *device,
NvBool interruptible,
int *status)
{
struct nvkms_per_open *popen = NULL;
@@ -1155,11 +1363,14 @@ static struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type,
popen->type = type;
if (interruptible) {
*status = down_interruptible(&nvkms_lock);
if (*status != 0) {
goto failed;
}
} else {
down(&nvkms_lock);
}
popen->data = nvKmsOpen(current->tgid, type, popen);
@@ -1258,16 +1469,20 @@ static void nvkms_close_popen(struct nvkms_per_open *popen)
static int nvkms_ioctl_common
(
struct nvkms_per_open *popen,
NvU32 cmd, NvU64 address, const size_t size
NvU32 cmd, NvU64 address, const size_t size,
NvBool interruptible
)
{
int status;
NvBool ret;
status = down_interruptible(&nvkms_lock);
if (interruptible) {
int status = down_interruptible(&nvkms_lock);
if (status != 0) {
return status;
}
} else {
down(&nvkms_lock);
}
if (popen->data != NULL) {
ret = nvKmsIoctl(popen->data, cmd, address, size);
@@ -1293,7 +1508,10 @@ struct nvkms_per_open* nvkms_open_from_kapi
struct nvkms_per_open *ret;
nvkms_read_lock_pm_lock();
ret = nvkms_open_common(NVKMS_CLIENT_KERNEL_SPACE, device, &status);
ret = nvkms_open_common(NVKMS_CLIENT_KERNEL_SPACE,
device,
NV_FALSE /* interruptible */,
&status);
nvkms_read_unlock_pm_lock();
return ret;
@@ -1312,13 +1530,15 @@ NvBool nvkms_ioctl_from_kapi_try_pmlock
{
NvBool ret;
// XXX PM lock must be allowed to fail, see bug 4432810.
if (nvkms_read_trylock_pm_lock()) {
return NV_FALSE;
}
ret = nvkms_ioctl_common(popen,
cmd,
(NvU64)(NvUPtr)params_address, param_size) == 0;
(NvU64)(NvUPtr)params_address, param_size,
NV_FALSE /* interruptible */) == 0;
nvkms_read_unlock_pm_lock();
return ret;
@@ -1335,7 +1555,8 @@ NvBool nvkms_ioctl_from_kapi
nvkms_read_lock_pm_lock();
ret = nvkms_ioctl_common(popen,
cmd,
(NvU64)(NvUPtr)params_address, param_size) == 0;
(NvU64)(NvUPtr)params_address, param_size,
NV_FALSE /* interruptible */) == 0;
nvkms_read_unlock_pm_lock();
return ret;
@@ -1504,9 +1725,7 @@ static size_t nvkms_config_file_open
struct inode *file_inode;
size_t file_size = 0;
size_t read_size = 0;
#if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
loff_t pos = 0;
#endif
*buff = NULL;
@@ -1549,14 +1768,8 @@ static size_t nvkms_config_file_open
* kernel_read_file for kernels >= 4.6
*/
while ((read_size < file_size) && (i++ < NVKMS_READ_FILE_MAX_LOOPS)) {
#if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
ssize_t ret = kernel_read(file, *buff + read_size,
file_size - read_size, &pos);
#else
ssize_t ret = kernel_read(file, read_size,
*buff + read_size,
file_size - read_size);
#endif
if (ret <= 0) {
break;
}
@@ -1677,7 +1890,10 @@ static int nvkms_open(struct inode *inode, struct file *filp)
}
filp->private_data =
nvkms_open_common(NVKMS_CLIENT_USER_SPACE, NULL, &status);
nvkms_open_common(NVKMS_CLIENT_USER_SPACE,
NULL,
NV_TRUE /* interruptible */,
&status);
nvkms_read_unlock_pm_lock();
@@ -1736,7 +1952,8 @@ static int nvkms_ioctl(struct inode *inode, struct file *filp,
status = nvkms_ioctl_common(popen,
params.cmd,
params.address,
params.size);
params.size,
NV_TRUE /* interruptible */);
nvkms_read_unlock_pm_lock();

View File

@@ -94,14 +94,10 @@ $(obj)/$(NVIDIA_MODESET_INTERFACE): $(addprefix $(obj)/,$(NVIDIA_MODESET_OBJECTS
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_MODESET_OBJECTS)
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_register_backlight
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kernel_read_has_pointer_pos_arg

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2011-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2011-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -132,11 +132,16 @@ struct nvidia_p2p_page {
} registers;
} nvidia_p2p_page_t;
#define NVIDIA_P2P_PAGE_TABLE_VERSION 0x00010002
#define NVIDIA_P2P_PAGE_TABLE_VERSION 0x00020000
#define NVIDIA_P2P_PAGE_TABLE_VERSION_COMPATIBLE(p) \
NVIDIA_P2P_VERSION_COMPATIBLE(p, NVIDIA_P2P_PAGE_TABLE_VERSION)
/*
* Page Table Flags
*/
#define NVIDIA_P2P_PAGE_TABLE_FLAGS_CPU_CACHEABLE 0x1
typedef
struct nvidia_p2p_page_table {
uint32_t version;
@@ -144,6 +149,7 @@ struct nvidia_p2p_page_table {
struct nvidia_p2p_page **pages;
uint32_t entries;
uint8_t *gpu_uuid;
uint32_t flags;
} nvidia_p2p_page_table_t;
/*
@@ -153,6 +159,9 @@ struct nvidia_p2p_page_table {
*
* This API only supports pinned, GPU-resident memory, such as that provided
* by cudaMalloc().
* This API does not support Coherent Driver-based Memory Management(CDMM) mode.
* CDMM allows coherent GPU memory to be managed by the driver and not the OS.
* This is done by the driver not onlining the memory as a NUMA node.
*
* This API may sleep.
*
@@ -201,7 +210,7 @@ int nvidia_p2p_get_pages( uint64_t p2p_token, uint32_t va_space,
* accessible to a third-party device. The pages will persist until
* explicitly freed by nvidia_p2p_put_pages_persistent().
*
* Persistent GPU memory mappings are not supported on PowerPC,
* Persistent GPU memory mappings are not supported on
* MIG-enabled devices and vGPU.
*
* This API only supports pinned, GPU-resident memory, such as that provided

View File

@@ -242,6 +242,61 @@ err:
return 0;
}
/* acquire return code: 1 mine, 0 - not mine */
static int nv_mem_acquire_nc(unsigned long addr, size_t size, void *peer_mem_private_data,
char *peer_mem_name, void **client_context)
{
int ret = 0;
struct nv_mem_context *nv_mem_context;
nv_mem_context = kzalloc(sizeof *nv_mem_context, GFP_KERNEL);
if (!nv_mem_context)
/* Error case handled as not mine */
return 0;
nv_mem_context->pad1 = NV_MEM_CONTEXT_MAGIC;
nv_mem_context->page_virt_start = addr & GPU_PAGE_MASK;
nv_mem_context->page_virt_end = (addr + size + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK;
nv_mem_context->mapped_size = nv_mem_context->page_virt_end - nv_mem_context->page_virt_start;
nv_mem_context->pad2 = NV_MEM_CONTEXT_MAGIC;
#ifdef NVIDIA_P2P_CAP_GET_PAGES_PERSISTENT_API
ret = nvidia_p2p_get_pages_persistent(nv_mem_context->page_virt_start,
nv_mem_context->mapped_size,
&nv_mem_context->page_table, 0);
#else
ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
&nv_mem_context->page_table, NULL, NULL);
#endif
if (ret < 0)
goto err;
#ifdef NVIDIA_P2P_CAP_GET_PAGES_PERSISTENT_API
ret = nvidia_p2p_put_pages_persistent(nv_mem_context->page_virt_start,
nv_mem_context->page_table, 0);
#else
ret = nvidia_p2p_put_pages(0, 0, nv_mem_context->page_virt_start,
nv_mem_context->page_table);
#endif
if (ret < 0) {
peer_err("nv_mem_acquire -- error %d while calling nvidia_p2p_put_pages()\n", ret);
goto err;
}
/* 1 means mine */
*client_context = nv_mem_context;
__module_get(THIS_MODULE);
return 1;
err:
memset(nv_mem_context, 0, sizeof(*nv_mem_context));
kfree(nv_mem_context);
/* Error case handled as not mine */
return 0;
}
static int nv_dma_map(struct sg_table *sg_head, void *context,
struct device *dma_device, int dmasync,
int *nmap)
@@ -477,7 +532,7 @@ static int nv_mem_get_pages_nc(unsigned long addr,
}
static struct peer_memory_client nv_mem_client_nc = {
.acquire = nv_mem_acquire,
.acquire = nv_mem_acquire_nc,
.get_pages = nv_mem_get_pages_nc,
.dma_map = nv_dma_map,
.dma_unmap = nv_dma_unmap,

View File

@@ -69,6 +69,8 @@ typedef volatile struct Nvc96fControl_struct {
#define NVC96F_MEM_OP_D_OPERATION 31:27
#define NVC96F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009
#define NVC96F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED 0x0000000a
#define NVC96F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_NCOH_INVALIDATE 0x00000011
#define NVC96F_SEM_ADDR_LO (0x0000005c)
#define NVC96F_SEM_ADDR_LO_OFFSET 31:2

View File

@@ -534,9 +534,6 @@
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD 0x0000001F /* RW--V */
#define NV_MMU_VER3_PTE_KIND 11:8 /* RWXVF */
#define NV_MMU_VER3_PTE_ADDRESS 51:12 /* RWXVF */
#define NV_MMU_VER3_PTE_ADDRESS_SYS 51:12 /* RWXVF */
#define NV_MMU_VER3_PTE_ADDRESS_PEER 51:12 /* RWXVF */
#define NV_MMU_VER3_PTE_ADDRESS_VID 39:12 /* RWXVF */
#define NV_MMU_VER3_PTE_PEER_ID 63:(64-3) /* RWXVF */
#define NV_MMU_VER3_PTE_PEER_ID_0 0x00000000 /* RW--V */
#define NV_MMU_VER3_PTE_PEER_ID_1 0x00000001 /* RW--V */

View File

@@ -29,12 +29,7 @@
#include <linux/completion.h>
#include <linux/module.h>
#include <linux/mm.h>
#if defined(NV_LINUX_BUG_H_PRESENT)
#include <linux/bug.h>
#else
#include <asm/bug.h>
#endif
// Today's implementation is a little simpler and more limited than the
// API description allows for in nv-kthread-q.h. Details include:

View File

@@ -48,34 +48,23 @@ $(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_UVM_OBJECTS), $(NVIDIA_UVM_CFLAGS))
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_drop
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmgrab
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno
NV_CONFTEST_FUNCTION_COMPILE_TESTS += find_next_bit_wrap
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_is_dma_domain
NV_CONFTEST_FUNCTION_COMPILE_TESTS += for_each_sgtable_dma_page
NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache
NV_CONFTEST_FUNCTION_COMPILE_TESTS += page_pgmap
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_invalidate_range
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_arch_invalidate_secondary_tlbs
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_device_range
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
NV_CONFTEST_TYPE_COMPILE_TESTS += fault_flag_remote_present
NV_CONFTEST_TYPE_COMPILE_TESTS += sg_dma_page_iter
NV_CONFTEST_TYPE_COMPILE_TESTS += struct_page_has_zone_device_data

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -322,41 +322,23 @@ static void uvm_destroy_vma_semaphore_pool(struct vm_area_struct *vma)
uvm_mem_unmap_cpu_user(semaphore_pool_range->mem);
}
// If a fault handler is not set, paths like handle_pte_fault in older kernels
// assume the memory is anonymous. That would make debugging this failure harder
// so we force it to fail instead.
static vm_fault_t uvm_vm_fault_sigbus(struct vm_area_struct *vma, struct vm_fault *vmf)
// The kernel will also SIGBUS faults to vmas with valid ops but no fault
// handler, but it didn't always do that. Make it explicit so we don't rely on
// the kernel's implementation.
static vm_fault_t uvm_vm_fault_sigbus(struct vm_fault *vmf)
{
UVM_DBG_PRINT_RL("Fault to address 0x%lx in disabled vma\n", nv_page_fault_va(vmf));
UVM_DBG_PRINT_RL("Fault to address 0x%lx in disabled vma\n", vmf->address);
return VM_FAULT_SIGBUS;
}
static vm_fault_t uvm_vm_fault_sigbus_entry(struct vm_area_struct *vma, struct vm_fault *vmf)
static vm_fault_t uvm_vm_fault_sigbus_entry(struct vm_fault *vmf)
{
UVM_ENTRY_RET(uvm_vm_fault_sigbus(vma, vmf));
}
static vm_fault_t uvm_vm_fault_sigbus_wrapper(struct vm_fault *vmf)
{
#if defined(NV_VM_OPS_FAULT_REMOVED_VMA_ARG)
return uvm_vm_fault_sigbus(vmf->vma, vmf);
#else
return uvm_vm_fault_sigbus(NULL, vmf);
#endif
}
static vm_fault_t uvm_vm_fault_sigbus_wrapper_entry(struct vm_fault *vmf)
{
UVM_ENTRY_RET(uvm_vm_fault_sigbus_wrapper(vmf));
UVM_ENTRY_RET(uvm_vm_fault_sigbus(vmf));
}
static struct vm_operations_struct uvm_vm_ops_disabled =
{
#if defined(NV_VM_OPS_FAULT_REMOVED_VMA_ARG)
.fault = uvm_vm_fault_sigbus_wrapper_entry
#else
.fault = uvm_vm_fault_sigbus_entry
#endif
.fault = uvm_vm_fault_sigbus_entry,
};
static void uvm_disable_vma(struct vm_area_struct *vma)
@@ -573,44 +555,23 @@ static void uvm_vm_close_managed_entry(struct vm_area_struct *vma)
UVM_ENTRY_VOID(uvm_vm_close_managed(vma));
}
static vm_fault_t uvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
static vm_fault_t uvm_vm_fault(struct vm_fault *vmf)
{
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
return uvm_va_space_cpu_fault_managed(va_space, vma, vmf);
uvm_va_space_t *va_space = uvm_va_space_get(vmf->vma->vm_file);
return uvm_va_space_cpu_fault_managed(va_space, vmf);
}
static vm_fault_t uvm_vm_fault_entry(struct vm_area_struct *vma, struct vm_fault *vmf)
static vm_fault_t uvm_vm_fault_entry(struct vm_fault *vmf)
{
UVM_ENTRY_RET(uvm_vm_fault(vma, vmf));
}
static vm_fault_t uvm_vm_fault_wrapper(struct vm_fault *vmf)
{
#if defined(NV_VM_OPS_FAULT_REMOVED_VMA_ARG)
return uvm_vm_fault(vmf->vma, vmf);
#else
return uvm_vm_fault(NULL, vmf);
#endif
}
static vm_fault_t uvm_vm_fault_wrapper_entry(struct vm_fault *vmf)
{
UVM_ENTRY_RET(uvm_vm_fault_wrapper(vmf));
UVM_ENTRY_RET(uvm_vm_fault(vmf));
}
static struct vm_operations_struct uvm_vm_ops_managed =
{
.open = uvm_vm_open_managed_entry,
.close = uvm_vm_close_managed_entry,
#if defined(NV_VM_OPS_FAULT_REMOVED_VMA_ARG)
.fault = uvm_vm_fault_wrapper_entry,
.page_mkwrite = uvm_vm_fault_wrapper_entry,
#else
.fault = uvm_vm_fault_entry,
.page_mkwrite = uvm_vm_fault_entry,
#endif
};
// vm operations on semaphore pool allocations only control CPU mappings. Unmapping GPUs,
@@ -706,12 +667,7 @@ static struct vm_operations_struct uvm_vm_ops_semaphore_pool =
{
.open = uvm_vm_open_semaphore_pool_entry,
.close = uvm_vm_close_semaphore_pool_entry,
#if defined(NV_VM_OPS_FAULT_REMOVED_VMA_ARG)
.fault = uvm_vm_fault_sigbus_wrapper_entry,
#else
.fault = uvm_vm_fault_sigbus_entry,
#endif
};
static void uvm_vm_open_device_p2p(struct vm_area_struct *vma)
@@ -753,7 +709,7 @@ static void uvm_vm_open_device_p2p(struct vm_area_struct *vma)
origin_vma->vm_private_data = NULL;
origin_vma->vm_ops = &uvm_vm_ops_disabled;
vma->vm_ops = &uvm_vm_ops_disabled;
unmap_mapping_range(va_space->mapping, va_range->node.start, va_range->node.end - va_range->node.start + 1, 1);
unmap_mapping_range(va_space->mapping, va_range->node.start, uvm_va_range_size(va_range), 1);
}
uvm_va_space_up_write(va_space);
@@ -781,14 +737,25 @@ static struct vm_operations_struct uvm_vm_ops_device_p2p =
{
.open = uvm_vm_open_device_p2p_entry,
.close = uvm_vm_close_device_p2p_entry,
#if defined(NV_VM_OPS_FAULT_REMOVED_VMA_ARG)
.fault = uvm_vm_fault_sigbus_wrapper_entry,
#else
.fault = uvm_vm_fault_sigbus_entry,
#endif
};
static bool va_range_type_expects_mmap(uvm_va_range_type_t type)
{
switch (type) {
case UVM_VA_RANGE_TYPE_SEMAPHORE_POOL:
case UVM_VA_RANGE_TYPE_DEVICE_P2P:
return true;
// Although UVM_VA_RANGE_TYPE_MANAGED does support mmap, it doesn't
// expect mmap to be called on a pre-existing range. mmap itself creates
// the managed va range.
default:
return false;
}
}
static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
{
void *fd_type_ptr;
@@ -891,28 +858,38 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
status = uvm_va_range_create_mmap(va_space, current->mm, vma->vm_private_data, NULL);
if (status == NV_ERR_UVM_ADDRESS_IN_USE) {
uvm_va_range_semaphore_pool_t *semaphore_pool_range;
uvm_va_range_device_p2p_t *device_p2p_range;
// If the mmap is for a semaphore pool, the VA range will have been
// allocated by a previous ioctl, and the mmap just creates the CPU
// mapping.
semaphore_pool_range = uvm_va_range_semaphore_pool_find(va_space, vma->vm_start);
device_p2p_range = uvm_va_range_device_p2p_find(va_space, vma->vm_start);
if (semaphore_pool_range && semaphore_pool_range->va_range.node.start == vma->vm_start &&
semaphore_pool_range->va_range.node.end + 1 == vma->vm_end) {
uvm_va_range_t *existing_range = uvm_va_range_find(va_space, vma->vm_start);
// Does the existing range exactly match the vma and expects mmap?
if (existing_range &&
existing_range->node.start == vma->vm_start &&
existing_range->node.end + 1 == vma->vm_end &&
va_range_type_expects_mmap(existing_range->type)) {
// We speculatively initialized the managed vma before checking for
// collisions because we expect successful insertion to be the
// common case. Undo that.
uvm_vma_wrapper_destroy(vma->vm_private_data);
vma_wrapper_allocated = false;
vma->vm_private_data = vma;
switch (existing_range->type) {
case UVM_VA_RANGE_TYPE_SEMAPHORE_POOL:
vma->vm_ops = &uvm_vm_ops_semaphore_pool;
status = uvm_mem_map_cpu_user(semaphore_pool_range->mem, semaphore_pool_range->va_range.va_space, vma);
}
else if (device_p2p_range && device_p2p_range->va_range.node.start == vma->vm_start &&
device_p2p_range->va_range.node.end + 1 == vma->vm_end) {
uvm_vma_wrapper_destroy(vma->vm_private_data);
vma_wrapper_allocated = false;
vma->vm_private_data = vma;
status = uvm_mem_map_cpu_user(uvm_va_range_to_semaphore_pool(existing_range)->mem, va_space, vma);
break;
case UVM_VA_RANGE_TYPE_DEVICE_P2P:
vma->vm_ops = &uvm_vm_ops_device_p2p;
status = uvm_va_range_device_p2p_map_cpu(va_space, vma, device_p2p_range);
status = uvm_va_range_device_p2p_map_cpu(va_space,
vma,
uvm_va_range_to_device_p2p(existing_range));
break;
default:
UVM_ASSERT(0);
break;
}
}
}
@@ -1039,6 +1016,7 @@ static long uvm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
UVM_ROUTE_CMD_ALLOC_INIT_CHECK(UVM_MAP_EXTERNAL_ALLOCATION, uvm_api_map_external_allocation);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_MAP_EXTERNAL_SPARSE, uvm_api_map_external_sparse);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_FREE, uvm_api_free);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_DISCARD, uvm_api_discard);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_PREVENT_MIGRATION_RANGE_GROUPS, uvm_api_prevent_migration_range_groups);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_ALLOW_MIGRATION_RANGE_GROUPS, uvm_api_allow_migration_range_groups);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_SET_PREFERRED_LOCATION, uvm_api_set_preferred_location);

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2013-2024 NVIDIA Corporation
Copyright (c) 2013-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -1456,17 +1456,19 @@ NV_STATUS UvmAllocDeviceP2P(NvProcessorUuid gpuUuid,
// they will not be migrated.
//
// If the input virtual range corresponds to system-allocated pageable memory,
// and UvmIsPageableMemoryAccessSupported reports that pageable memory access
// is supported, then the driver will populate any unpopulated pages at the
// and UvmIsPageableMemoryAccessSupported reports that pageable memory access is
// supported, then the driver will populate any unpopulated pages at the
// destination processor and migrate the data from any source location to the
// destination. Pages in the VA range are migrated even if their preferred
// location is set to a processor other than the destination processor.
// If the accessed-by list of any of the pages in the VA range is not empty,
// then mappings to those pages from all the appropriate processors are updated
// to refer to the new location if establishing such a mapping is possible.
// Otherwise, those mappings are cleared.
// Note that in this case, software managed pageable memory does not support
// migration of MAP_SHARED, file-backed, or PROT_NONE mappings.
// destination. On coherent platforms, if the destination processor is a GPU and
// the GPU is not a NUMA node, pages will be populated on the closest CPU NUMA
// node if they are not already populated anywhere. Pages in the VA range are
// migrated even if their preferred location is set to a processor other than
// the destination processor. If the accessed-by list of any of the pages in the
// VA range is not empty, then mappings to those pages from all the appropriate
// processors are updated to refer to the new location if establishing such a
// mapping is possible. Otherwise, those mappings are cleared. Note that in this
// case, software managed pageable memory does not support migration of
// MAP_SHARED, file-backed, or PROT_NONE mappings.
//
// If any pages in the given VA range are associated with a range group which
// has been made non-migratable via UvmPreventMigrationRangeGroups, then those
@@ -2124,11 +2126,6 @@ NV_STATUS UvmUnmapExternal(void *base,
NvLength length,
const NvProcessorUuid *gpuUuid);
// TODO: Bug 2732305: Remove this declaration when the new external APIs have
// been implemented.
NV_STATUS UvmUnmapExternalAllocation(void *base,
const NvProcessorUuid *gpuUuid);
//------------------------------------------------------------------------------
// UvmMapDynamicParallelismRegion
//
@@ -2359,6 +2356,11 @@ NV_STATUS UvmDisableReadDuplication(void *base,
// not cause a migration if a mapping for that page from that processor can be
// established without migrating the page.
//
// If the specified processor is a GPU and the GPU is not a NUMA node and the
// input range is system-allocated pageable memory and the system supports
// transparent access to pageable memory, preferred location will be set to the
// closest CPU NUMA node.
//
// When a page that was allocated via either UvmAlloc or UvmMemMap migrates away
// from its preferred location, the mapping on the preferred location's
// processor is cleared so that the next access from that processor will cause a
@@ -2675,6 +2677,111 @@ NV_STATUS UvmUnsetAccessedBy(void *base,
NvLength length,
const NvProcessorUuid *accessedByUuid);
//------------------------------------------------------------------------------
// UvmDiscard
//
// Inform the UVM driver that the data in the specified virtual address range
// is no longer needed.
//
// Both base and length must be aligned to the smallest page size supported by
// the CPU. The VA range must lie within the largest possible virtual address
// supported by the specified processor.
//
// The virtual address range specified by (base, length) must have been
// allocated via a call to either UvmAlloc or UvmMemMap, or be supported
// system-allocated pageable memory.
//
// If the input virtual range corresponds to system-allocated pageable memory,
// and there is at least one GPU in the system that supports transparent access
// to pageable memory, the behavior described in the next paragraphs does not
// take effect.
//
// When a virtual address range is discarded using this API, any
// read-duplicated copies are collapsed. UvmDiscard will not cause the
// immediate migration of any pages covered by the specified virtual address
// range.
//
// A discard operation done without the flag UVM_DISCARD_FLAGS_UNMAP will be
// faster than an operation with that flag, but clearing the discard status can
// only be done with a migration API call as described below.
//
// After a page has been discarded, reads and writes to/from discarded memory
// have the following behavior:
//
// * Reads and writes to/from memory discarded without
// UVM_DISCARD_FLAGS_UNMAP have undefined behavior.
// * Reads and writes to/from memory discarded without
// UVM_DISCARD_FLAGS_UNMAP will not clear the discard status.
// * Reads from memory discarded with UVM_DISCARD_FLAGS_UNMAP, including
// back-to-back reads, return indeterminate data.
// * Writes to memory discarded with UVM_DISCARD_FLAGS_UNMAP
// - are guaranteed to land and remove the discard status of the virtual
// address,
// - any reads after a write will observe the written data,
// - will collapse any read-duplicated copies of the virtual address.
//
// UvmDiscard can happen concurrently with memory migrations.Therefore, the
// caller is responsible for serializing page accesses and/or migrations and
// discard operations.
//
// Calls to UvmMigrate, UvmMigrateAsync, UvmPreventMigrationRangeGroups, or
// UvmMigrateRangeGroup, on a discarded virtual address range will cause the
// collapsing of any discarded read-duplicated pages and will clear the discard
// status of all discarded pages in the migrated VA range.
//
// If discarded memory is evicted, the data in the pages backing the evicted
// virtual range is not copied to the eviction destination and the discard
// status of the evicted pages is not cleared. Pages that have been discarded
// will be selected for eviction before pages that require copies during
// eviction.
//
// If UvmEnableReadDuplication/UvmDisableReadDuplication is called on a
// discarded virtual address range, the discard status of the range is not
// cleared.
//
// If discarded pages are in a processor's accessed-by list, mappings to those
// pages are not guaranteed be created on the processor while the page is
// discarded.
//
// If UvmSetPreferredLocation is called on a discarded virtual address range,
// data from the pages backing the range will not be copied if those pages
// require migration as a result of setting the preferred location.
//
// Calling this API while a non-fault-capable GPU is registered, will result in
// the API returning an error. If a non-fault-capable GPU is registered after
// pages have been discarded, UvmPreventMigrationRangeGroup will clear the
// discard status of the range.
//
// Arguments:
// base: (INPUT)
// Base address of the virtual address range.
//
// length: (INPUT)
// Length, in bytes, of the range.
//
// flags: (INPUT)
// UvmDiscard operation flags.
// UVM_DISCARD_FLAGS_UNMAP: The discarded pages will be unmapped.
// This allows future writes to remove the discard status.
//
// Errors:
// NV_ERR_INVALID_ADDRESS:
// base and length are not properly aligned, the range does not
// represent a valid UVM allocation, or the range is not covered by a
// single UVM managed VA range.
//
// NV_ERR_INVALID_ARGUMENT:
// the flags parameter is not valid.
//
// NV_ERR_INVALID_DEVICE:
// At least one non-fault-capable GPU has a registered VA space.
//
// NV_ERR_NO_MEMORY:
// Internal memory allocation failed.
//
//------------------------------------------------------------------------------
NV_STATUS UvmDiscard(void *base, NvLength length, NvU64 flags);
//------------------------------------------------------------------------------
// UvmEnableSystemWideAtomics
//

View File

@@ -94,7 +94,7 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->plc_supported = true;
parent_gpu->no_ats_range_required = false;
parent_gpu->ats.no_ats_range_required = false;
parent_gpu->conf_computing.per_channel_key_rotation = false;
}

View File

@@ -103,7 +103,7 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->plc_supported = true;
parent_gpu->no_ats_range_required = false;
parent_gpu->ats.no_ats_range_required = false;
parent_gpu->conf_computing.per_channel_key_rotation = false;
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2019 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -32,7 +32,7 @@
#include "uvm_thread_context.h"
#include "uvm_kvmalloc.h"
#include "uvm_va_space.h"
#include "nv_uvm_types.h"
#include "nv_uvm_user_types.h"
// This weird number comes from UVM_PREVENT_MIGRATION_RANGE_GROUPS_PARAMS. That
// ioctl is called frequently so we don't want to allocate a copy every time.
@@ -212,6 +212,10 @@ typedef enum
// If the interval [base, base + length) is fully covered by VMAs which all have
// the same uvm_api_range_type_t, that range type is returned.
//
// If the platform supports ATS but no GPU has yet been registered and a
// possible ATS range is identified then the VA space state is updated such that
// it will only be possible to register GPUs that support ATS.
//
// LOCKING: va_space->lock must be held in at least read mode. If mm != NULL,
// mm->mmap_lock must also be held in at least read mode.
uvm_api_range_type_t uvm_api_range_type_check(uvm_va_space_t *va_space, struct mm_struct *mm, NvU64 base, NvU64 length);
@@ -228,6 +232,7 @@ NV_STATUS uvm_api_create_external_range(UVM_CREATE_EXTERNAL_RANGE_PARAMS *params
NV_STATUS uvm_api_map_external_allocation(UVM_MAP_EXTERNAL_ALLOCATION_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_map_external_sparse(UVM_MAP_EXTERNAL_SPARSE_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_free(UVM_FREE_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_discard(UVM_DISCARD_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_prevent_migration_range_groups(UVM_PREVENT_MIGRATION_RANGE_GROUPS_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_allow_migration_range_groups(UVM_ALLOW_MIGRATION_RANGE_GROUPS_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_set_preferred_location(const UVM_SET_PREFERRED_LOCATION_PARAMS *params, struct file *filp);

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2018-2024 NVIDIA Corporation
Copyright (c) 2018-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -44,7 +44,7 @@ void uvm_ats_init(const UvmPlatformInfo *platform_info)
NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
{
if (g_uvm_global.ats.enabled) {
if (g_uvm_global.ats.enabled && uvm_parent_gpu_supports_ats(parent_gpu)) {
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
return uvm_ats_sva_add_gpu(parent_gpu);
}
@@ -54,7 +54,7 @@ NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu)
{
if (g_uvm_global.ats.enabled) {
if (g_uvm_global.ats.enabled && uvm_parent_gpu_supports_ats(parent_gpu)) {
UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
uvm_ats_sva_remove_gpu(parent_gpu);
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2018-2024 NVIDIA Corporation
Copyright (c) 2018-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -32,6 +32,27 @@
#define UVM_ATS_SUPPORTED() UVM_ATS_SVA_SUPPORTED()
typedef enum
{
// The VA space has been initialized on a system that supports ATS but it is
// unknown yet whether it will hold ATS capable GPUs or not.
UVM_ATS_VA_SPACE_ATS_UNSET = 0,
// The VA space only allows registering GPUs that support ATS. This state is
// entered on a system that supports ATS by:
// - Registering an ATS capable GPU
// - Migrating or setting a policy on pageable memory
// Once entered, this state is never left.
UVM_ATS_VA_SPACE_ATS_SUPPORTED,
// The VA space only allows registering GPUs that do not support ATS. This
// state is entered by:
// - Initialization on platforms that do not support ATS
// - Registering a non ATS capable GPU on platforms that do support ATS
// Once entered, this state is never left.
UVM_ATS_VA_SPACE_ATS_UNSUPPORTED
} uvm_ats_va_space_state_t;
typedef struct
{
// Mask of gpu_va_spaces which are registered for ATS access. The mask is
@@ -43,6 +64,19 @@ typedef struct
uvm_rw_semaphore_t lock;
uvm_sva_va_space_t sva;
// Tracks if ATS is supported in this va_space. The state is set during GPU
// registration or some ATS related calls, and is protected as an atomic.
// This is because during some ATS related API calls a VA space can
// transition from UNSET to ATS_SUPPORTED. In these cases the va_space's
// semaphore is only held in a read-locked state. A race results in the
// duplicate writing of ATS_SUPPORTED to the state value.
// This OK as the only possible transition here is
// UVM_ATS_VA_SPACE_ATS_UNSET -> UVM_ATS_VA_SPACE_ATS_SUPPORTED.
// Entering UVM_ATS_VA_SPACE_ATS_UNSUPPORTED requires registering a
// GPU in the VA space which must hold the lock in write mode.
// Enums from uvm_ats_va_space_state_t are stored in this atomic_t value.
atomic_t state;
} uvm_ats_va_space_t;
typedef struct

View File

@@ -22,6 +22,7 @@
#include "uvm_api.h"
#include "uvm_tools.h"
#include "uvm_va_block_types.h"
#include "uvm_va_range.h"
#include "uvm_ats.h"
#include "uvm_ats_faults.h"
@@ -89,6 +90,15 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
// If we're faulting, let the GPU access special vmas
uvm_migrate_args.populate_flags |= UVM_POPULATE_PAGEABLE_FLAG_ALLOW_SPECIAL;
if (ats_context->client_type == UVM_FAULT_CLIENT_TYPE_GPC)
uvm_migrate_args.cause = UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT;
else
uvm_migrate_args.cause = UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT;
}
else {
uvm_migrate_args.cause = UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
uvm_migrate_args.access_counters_buffer_index = ats_context->access_counters.buffer_index;
}
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
@@ -133,23 +143,28 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
{
uvm_gpu_t *gpu = gpu_va_space->gpu;
int residency;
bool cdmm_enabled = gpu->mem_info.cdmm_enabled;
if (gpu->parent->is_integrated_gpu || cdmm_enabled) {
residency = gpu->parent->closest_cpu_numa_node;
}
else {
UVM_ASSERT(gpu->mem_info.numa.enabled);
residency = uvm_gpu_numa_node(gpu);
#if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
struct mempolicy *vma_policy = vma_policy(vma);
unsigned short mode;
}
ats_context->prefetch_state.has_preferred_location = false;
// It's safe to read vma_policy since the mmap_lock is held in at least read
// mode in this path.
#if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
{
struct mempolicy *vma_policy = vma_policy(vma);
unsigned short mode;
// It's safe to read vma_policy since the mmap_lock is held in at least
// read mode in this path.
uvm_assert_mmap_lock_locked(vma->vm_mm);
if (!vma_policy)
goto done;
if (vma_policy) {
mode = vma_policy->mode;
if ((mode == MPOL_BIND)
@@ -164,17 +179,19 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
home_node = vma_policy->home_node;
#endif
// Prefer home_node if set. Otherwise, prefer the faulting GPU if it's
// in the list of preferred nodes, else prefer the closest_cpu_numa_node
// to the GPU if closest_cpu_numa_node is in the list of preferred
// nodes. Fallback to the faulting GPU if all else fails.
// Prefer home_node if set. Otherwise, prefer the faulting GPU
// if it's in the list of preferred nodes, else prefer the
// closest_cpu_numa_node to the GPU if closest_cpu_numa_node is
// in the list of preferred nodes. Fallback to the faulting GPU
// if all else fails.
if (home_node != NUMA_NO_NODE) {
residency = home_node;
}
else if (!node_isset(residency, vma_policy->nodes)) {
int closest_cpu_numa_node = gpu->parent->closest_cpu_numa_node;
if ((closest_cpu_numa_node != NUMA_NO_NODE) && node_isset(closest_cpu_numa_node, vma_policy->nodes))
if ((closest_cpu_numa_node != NUMA_NO_NODE) &&
node_isset(closest_cpu_numa_node, vma_policy->nodes))
residency = gpu->parent->closest_cpu_numa_node;
else
residency = first_node(vma_policy->nodes);
@@ -188,12 +205,11 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
if (residency != uvm_gpu_numa_node(gpu))
gpu = uvm_va_space_find_gpu_with_memory_node_id(gpu_va_space->va_space, residency);
done:
#else
ats_context->prefetch_state.has_preferred_location = false;
}
}
#endif
ats_context->residency_id = gpu ? gpu->id : UVM_ID_CPU;
ats_context->residency_id = gpu && !gpu->parent->is_integrated_gpu && !cdmm_enabled ? gpu->id : UVM_ID_CPU;
ats_context->residency_node = residency;
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2018-2023 NVIDIA Corporation
Copyright (c) 2018-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -164,7 +164,7 @@ static NV_STATUS uvm_ats_smmu_war_init(uvm_parent_gpu_t *parent_gpu)
if (!smmu_cmdqv_base)
return NV_ERR_NO_MEMORY;
parent_gpu->smmu_war.smmu_cmdqv_base = smmu_cmdqv_base;
parent_gpu->ats.smmu_war.smmu_cmdqv_base = smmu_cmdqv_base;
cmdqv_config = ioread32(smmu_cmdqv_base + SMMU_CMDQV_CONFIG);
if (!(cmdqv_config & SMMU_CMDQV_CONFIG_CMDQV_EN)) {
status = NV_ERR_OBJECT_NOT_FOUND;
@@ -172,8 +172,8 @@ static NV_STATUS uvm_ats_smmu_war_init(uvm_parent_gpu_t *parent_gpu)
}
// Allocate SMMU CMDQ pages for WAR
parent_gpu->smmu_war.smmu_cmdq = alloc_page(NV_UVM_GFP_FLAGS | __GFP_ZERO);
if (!parent_gpu->smmu_war.smmu_cmdq) {
parent_gpu->ats.smmu_war.smmu_cmdq = alloc_page(NV_UVM_GFP_FLAGS | __GFP_ZERO);
if (!parent_gpu->ats.smmu_war.smmu_cmdq) {
status = NV_ERR_NO_MEMORY;
goto out;
}
@@ -187,42 +187,42 @@ static NV_STATUS uvm_ats_smmu_war_init(uvm_parent_gpu_t *parent_gpu)
smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
smmu_vcmdq_write64(smmu_cmdqv_base, SMMU_VCMDQ_CMDQ_BASE,
page_to_phys(parent_gpu->smmu_war.smmu_cmdq) | SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE);
page_to_phys(parent_gpu->ats.smmu_war.smmu_cmdq) | SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE);
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONS, 0);
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_PROD, 0);
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONFIG, SMMU_VCMDQ_CONFIG_ENABLE);
UVM_SPIN_WHILE(!(smmu_vcmdq_read32(smmu_cmdqv_base, SMMU_VCMDQ_STATUS) & SMMU_VCMDQ_STATUS_ENABLED), &spin);
uvm_mutex_init(&parent_gpu->smmu_war.smmu_lock, UVM_LOCK_ORDER_LEAF);
parent_gpu->smmu_war.smmu_prod = 0;
parent_gpu->smmu_war.smmu_cons = 0;
uvm_mutex_init(&parent_gpu->ats.smmu_war.smmu_lock, UVM_LOCK_ORDER_LEAF);
parent_gpu->ats.smmu_war.smmu_prod = 0;
parent_gpu->ats.smmu_war.smmu_cons = 0;
return NV_OK;
out:
iounmap(parent_gpu->smmu_war.smmu_cmdqv_base);
parent_gpu->smmu_war.smmu_cmdqv_base = NULL;
iounmap(parent_gpu->ats.smmu_war.smmu_cmdqv_base);
parent_gpu->ats.smmu_war.smmu_cmdqv_base = NULL;
return status;
}
static void uvm_ats_smmu_war_deinit(uvm_parent_gpu_t *parent_gpu)
{
void __iomem *smmu_cmdqv_base = parent_gpu->smmu_war.smmu_cmdqv_base;
void __iomem *smmu_cmdqv_base = parent_gpu->ats.smmu_war.smmu_cmdqv_base;
NvU32 cmdq_alloc_map;
if (parent_gpu->smmu_war.smmu_cmdqv_base) {
if (parent_gpu->ats.smmu_war.smmu_cmdqv_base) {
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONFIG, 0);
cmdq_alloc_map = ioread32(smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
iowrite32(cmdq_alloc_map & SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC, smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
smmu_vintf_write32(smmu_cmdqv_base, SMMU_VINTF_CONFIG, 0);
}
if (parent_gpu->smmu_war.smmu_cmdq)
__free_page(parent_gpu->smmu_war.smmu_cmdq);
if (parent_gpu->ats.smmu_war.smmu_cmdq)
__free_page(parent_gpu->ats.smmu_war.smmu_cmdq);
if (parent_gpu->smmu_war.smmu_cmdqv_base)
iounmap(parent_gpu->smmu_war.smmu_cmdqv_base);
if (parent_gpu->ats.smmu_war.smmu_cmdqv_base)
iounmap(parent_gpu->ats.smmu_war.smmu_cmdqv_base);
}
// The SMMU on ARM64 can run under different translation regimes depending on
@@ -253,13 +253,13 @@ void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr,
uvm_spin_loop_t spin;
NvU16 asid;
if (!parent_gpu->smmu_war.smmu_cmdqv_base)
if (!parent_gpu->ats.smmu_war.smmu_cmdqv_base)
return;
asid = arm64_mm_context_get(mm);
vcmdq = kmap(parent_gpu->smmu_war.smmu_cmdq);
uvm_mutex_lock(&parent_gpu->smmu_war.smmu_lock);
vcmdq_prod = parent_gpu->smmu_war.smmu_prod;
vcmdq = kmap(parent_gpu->ats.smmu_war.smmu_cmdq);
uvm_mutex_lock(&parent_gpu->ats.smmu_war.smmu_lock);
vcmdq_prod = parent_gpu->ats.smmu_war.smmu_prod;
// Our queue management is very simple. The mutex prevents multiple
// producers writing to the queue and all our commands require waiting for
@@ -293,28 +293,21 @@ void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr,
// MSB is the wrap bit
vcmdq_prod &= (1UL << (SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE + 1)) - 1;
parent_gpu->smmu_war.smmu_prod = vcmdq_prod;
smmu_vcmdq_write32(parent_gpu->smmu_war.smmu_cmdqv_base, SMMU_VCMDQ_PROD, parent_gpu->smmu_war.smmu_prod);
parent_gpu->ats.smmu_war.smmu_prod = vcmdq_prod;
smmu_vcmdq_write32(parent_gpu->ats.smmu_war.smmu_cmdqv_base, SMMU_VCMDQ_PROD, parent_gpu->ats.smmu_war.smmu_prod);
UVM_SPIN_WHILE(
(smmu_vcmdq_read32(parent_gpu->smmu_war.smmu_cmdqv_base, SMMU_VCMDQ_CONS) & GENMASK(19, 0)) != vcmdq_prod,
(smmu_vcmdq_read32(parent_gpu->ats.smmu_war.smmu_cmdqv_base, SMMU_VCMDQ_CONS) & GENMASK(19, 0)) != vcmdq_prod,
&spin);
uvm_mutex_unlock(&parent_gpu->smmu_war.smmu_lock);
kunmap(parent_gpu->smmu_war.smmu_cmdq);
uvm_mutex_unlock(&parent_gpu->ats.smmu_war.smmu_lock);
kunmap(parent_gpu->ats.smmu_war.smmu_cmdq);
arm64_mm_context_put(mm);
}
#endif
NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
{
#if NV_IS_EXPORT_SYMBOL_GPL_iommu_dev_enable_feature
int ret;
ret = iommu_dev_enable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
if (ret)
return errno_to_nv_status(ret);
#endif
if (UVM_ATS_SMMU_WAR_REQUIRED())
return uvm_ats_smmu_war_init(parent_gpu);
else
@@ -325,10 +318,6 @@ void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu)
{
if (UVM_ATS_SMMU_WAR_REQUIRED())
uvm_ats_smmu_war_deinit(parent_gpu);
#if NV_IS_EXPORT_SYMBOL_GPL_iommu_dev_disable_feature
iommu_dev_disable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
#endif
}
NV_STATUS uvm_ats_sva_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2022-2024 NVIDIA Corporation
Copyright (c) 2022-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -97,7 +97,45 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->plc_supported = true;
parent_gpu->no_ats_range_required = true;
parent_gpu->ats.no_ats_range_required = true;
// Blackwell has a physical translation prefetcher, meaning SW must assume
// that any physical ATS translation can be fetched at any time. The
// specific behavior and impact differs with non-PASID ATS support, but
// generally this can result in the GPU accessing a stale invalid physical
// ATS translation after transitioning an IOMMU mapping from invalid to
// valid, or in other words at dma_map_page() time.
//
// These cases could result in faults. That can happen with virtual accesses
// too, but that's fine since we'll just handle and replay them. But
// physical access IOMMU faults are considered globally fatal, so we must
// avoid them.
if (parent_gpu->ats.non_pasid_ats_enabled) {
// On Blackwell GPUs that have non-PASID ATS, physical translations that
// come back as 4K will not be cached. Therefore we don't have to worry
// about invalidating them, and we don't have to worry the cross-
// contamination issue within a 64K region described in the below case.
// However, we do need to ensure that all pending prefetch requests to
// the old invalid translation are complete before we access the new
// valid translation, so we need to flush pending translations.
parent_gpu->ats.dma_map_invalidation = UVM_DMA_MAP_INVALIDATION_FLUSH;
}
else if (g_uvm_global.ats.enabled && PAGE_SIZE == UVM_PAGE_SIZE_64K) {
// If the page size is 64K here in the guest it's possible for the host
// page size to be 4K, which we can't know here, and that could cause
// new mappings to hit old stale invalid entries that aren't
// automatically refetched by HW (see the comments on 4K support in
// uvm_ats_service_faults_region()).
//
// Even with 64K only it's possible for the prefetcher to race with a
// legitimate access in the same 64KB region and cause the legitimate
// access to see an invalid translation, so we need an invalidate to
// flush those out too.
//
// If the guest page size is 4K we don't enable physical ATS so we don't
// have an issue.
parent_gpu->ats.dma_map_invalidation = UVM_DMA_MAP_INVALIDATION_FULL;
}
parent_gpu->conf_computing.per_channel_key_rotation = true;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2024 NVIDIA Corporation
Copyright (c) 2024-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -176,6 +176,26 @@ void uvm_hal_blackwell_host_tlb_invalidate_va(uvm_push_t *push,
HWVALUE(C96F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
}
void uvm_hal_blackwell_host_tlb_invalidate_phys(uvm_push_t *push)
{
// There is no specific mechanism to invalidate only physical translations,
// nor per-address physical translations. The only mechanism is to
// invalidate all PDBs and all VAs, which will also wipe out all physical
// entries for the GFID.
//
// NVLINK and GPC clients do not make physical requests, so their TLBs
// cannot cache GPAs and we can skip them in the invalidate.
NV_PUSH_4U(C96F, MEM_OP_A, HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
MEM_OP_B, 0,
MEM_OP_C, HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB, ALL) |
HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE) |
HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, NONE) |
HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, ALL),
MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE));
}
void uvm_hal_blackwell_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
@@ -255,6 +275,50 @@ void uvm_hal_blackwell_host_tlb_invalidate_test(uvm_push_t *push,
}
}
void uvm_hal_blackwell_host_tlb_flush_prefetch(uvm_push_t *push)
{
// We don't need to invalidate lines, but we need the invalidate to not be
// ignored in order to flush out pending prefetch fills from SYS clients.
// That means invalidating a bound PDB with a dummy VA.
NvU32 aperture_value;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
uvm_gpu_phys_address_t pdb;
NvU32 va_lo = 0;
NvU32 va_hi = 0;
NvU32 pdb_lo;
NvU32 pdb_hi;
// There is no PDB at this point so we must invalidate the entire tlb.
if (!gpu->address_space_tree.root)
return uvm_hal_blackwell_host_tlb_invalidate_phys(push);
pdb = uvm_page_tree_pdb_address(&gpu->address_space_tree);
if (pdb.aperture == UVM_APERTURE_VID)
aperture_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
else
aperture_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
pdb.address >>= 12;
pdb_lo = pdb.address & HWMASK(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
pdb_hi = pdb.address >> HWSIZE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
NV_PUSH_4U(C96F, MEM_OP_A, HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
HWVALUE(C96F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
MEM_OP_B, HWVALUE(C96F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
MEM_OP_C, HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE) |
HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, NONE) |
HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, PTE_ONLY) |
aperture_value |
HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo),
MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
HWVALUE(C96F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
}
uvm_access_counter_clear_op_t
uvm_hal_blackwell_access_counter_query_clear_op_gb100(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
@@ -282,3 +346,24 @@ uvm_hal_blackwell_access_counter_query_clear_op_gb20x(uvm_parent_gpu_t *parent_g
{
return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED;
}
// Host-specific L2 cache invalidate for non-coherent sysmem
void uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem(uvm_push_t *push)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
// First sysmembar
uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
// Flush dirty
NV_PUSH_4U(C96F, MEM_OP_A, 0,
MEM_OP_B, 0,
MEM_OP_C, 0,
MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, L2_FLUSH_DIRTY));
// Invalidate
NV_PUSH_4U(C96F, MEM_OP_A, 0,
MEM_OP_B, 0,
MEM_OP_C, 0,
MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, L2_SYSMEM_NCOH_INVALIDATE));
// Final sysmembar
uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
}

View File

@@ -34,6 +34,7 @@
#include "uvm_kvmalloc.h"
#include "nv_uvm_types.h"
#include "nv_uvm_user_types.h"
#include "nv_uvm_interface.h"
#include "clb06f.h"
#include "uvm_conf_computing.h"
@@ -381,7 +382,9 @@ static void lock_channel_for_push(uvm_channel_t *channel)
__set_bit(index, channel->pool->conf_computing.push_locks);
}
static bool test_claim_and_lock_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
static bool test_claim_and_lock_channel(uvm_channel_t *channel,
NvU32 num_gpfifo_entries,
uvm_channel_reserve_type_t reserve_type)
{
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
uvm_channel_pool_assert_locked(channel->pool);
@@ -390,8 +393,7 @@ static bool test_claim_and_lock_channel(uvm_channel_t *channel, NvU32 num_gpfifo
if (uvm_channel_is_locked_for_push(channel))
return false;
// Confidential compute is not using p2p ops, reserve without p2p
if (try_claim_channel_locked(channel, num_gpfifo_entries, UVM_CHANNEL_RESERVE_NO_P2P)) {
if (try_claim_channel_locked(channel, num_gpfifo_entries, reserve_type)) {
lock_channel_for_push(channel);
return true;
}
@@ -504,7 +506,9 @@ NV_STATUS uvm_channel_pool_rotate_key(uvm_channel_pool_t *pool)
// Reserve a channel in the specified pool. The channel is locked until the push
// ends
static NV_STATUS channel_reserve_and_lock_in_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
static NV_STATUS channel_reserve_and_lock_in_pool(uvm_channel_pool_t *pool,
uvm_channel_reserve_type_t reserve_type,
uvm_channel_t **channel_out)
{
uvm_channel_t *channel;
uvm_spin_loop_t spin;
@@ -533,8 +537,7 @@ static NV_STATUS channel_reserve_and_lock_in_pool(uvm_channel_pool_t *pool, uvm_
for_each_clear_bit(index, pool->conf_computing.push_locks, pool->num_channels) {
channel = &pool->channels[index];
// Confidential compute is not using p2p ops, reserve without p2p
if (try_claim_channel_locked(channel, 1, UVM_CHANNEL_RESERVE_NO_P2P)) {
if (try_claim_channel_locked(channel, 1, reserve_type)) {
lock_channel_for_push(channel);
goto done;
}
@@ -551,7 +554,7 @@ static NV_STATUS channel_reserve_and_lock_in_pool(uvm_channel_pool_t *pool, uvm_
channel_pool_lock(pool);
if (test_claim_and_lock_channel(channel, 1))
if (test_claim_and_lock_channel(channel, 1, reserve_type))
goto done;
channel_pool_unlock(pool);
@@ -562,6 +565,11 @@ static NV_STATUS channel_reserve_and_lock_in_pool(uvm_channel_pool_t *pool, uvm_
return status;
}
if (reserve_type == UVM_CHANNEL_RESERVE_WITH_P2P && channel->suspended_p2p) {
uvm_up(&pool->conf_computing.push_sem);
return NV_ERR_BUSY_RETRY;
}
UVM_SPIN_LOOP(&spin);
}
}
@@ -583,7 +591,7 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool,
UVM_ASSERT(pool);
if (g_uvm_global.conf_computing_enabled)
return channel_reserve_and_lock_in_pool(pool, channel_out);
return channel_reserve_and_lock_in_pool(pool, reserve_type, channel_out);
uvm_for_each_channel_in_pool(channel, pool) {
// TODO: Bug 1764953: Prefer idle/less busy channels
@@ -1799,7 +1807,9 @@ NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_
return NV_OK;
}
static NV_STATUS channel_reserve_and_lock(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
static NV_STATUS channel_reserve_and_lock(uvm_channel_t *channel,
NvU32 num_gpfifo_entries,
uvm_channel_reserve_type_t reserve_type)
{
NV_STATUS status;
uvm_spin_loop_t spin;
@@ -1822,7 +1832,7 @@ static NV_STATUS channel_reserve_and_lock(uvm_channel_t *channel, NvU32 num_gpfi
channel_pool_lock(pool);
if (test_claim_and_lock_channel(channel, num_gpfifo_entries))
if (test_claim_and_lock_channel(channel, num_gpfifo_entries, reserve_type))
goto out;
channel_pool_unlock(pool);
@@ -1833,7 +1843,7 @@ static NV_STATUS channel_reserve_and_lock(uvm_channel_t *channel, NvU32 num_gpfi
channel_pool_lock(pool);
if (test_claim_and_lock_channel(channel, num_gpfifo_entries))
if (test_claim_and_lock_channel(channel, num_gpfifo_entries, reserve_type))
goto out;
channel_pool_unlock(pool);
@@ -1857,8 +1867,9 @@ NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
NV_STATUS status = NV_OK;
uvm_spin_loop_t spin;
// Direct channel reservations don't use p2p
if (g_uvm_global.conf_computing_enabled)
return channel_reserve_and_lock(channel, num_gpfifo_entries);
return channel_reserve_and_lock(channel, num_gpfifo_entries, UVM_CHANNEL_RESERVE_NO_P2P);
// Direct channel reservations don't use p2p
if (try_claim_channel(channel, num_gpfifo_entries, UVM_CHANNEL_RESERVE_NO_P2P))
@@ -3058,7 +3069,7 @@ static int compare_ce_for_channel_type(const UvmGpuCopyEngineCaps *ce_caps,
// Select the preferred CE for the given channel types.
static void pick_ces_for_channel_types(uvm_channel_manager_t *manager,
const UvmGpuCopyEngineCaps *ce_caps,
uvm_channel_type_t *channel_types,
const uvm_channel_type_t *channel_types,
unsigned num_channel_types,
unsigned *preferred_ce)
{
@@ -3067,7 +3078,7 @@ static void pick_ces_for_channel_types(uvm_channel_manager_t *manager,
for (i = 0; i < num_channel_types; ++i) {
unsigned ce;
unsigned best_ce = UVM_COPY_ENGINE_COUNT_MAX;
uvm_channel_type_t type = channel_types[i];
const uvm_channel_type_t type = channel_types[i];
for (ce = 0; ce < UVM_COPY_ENGINE_COUNT_MAX; ++ce) {
if (!ce_is_usable(ce_caps + ce))
@@ -3104,7 +3115,7 @@ static void pick_ces(uvm_channel_manager_t *manager, const UvmGpuCopyEngineCaps
// the usage count of each CE and it increases every time a CE
// is selected. MEMOPS has the least priority as it only cares about
// low usage of the CE to improve latency
uvm_channel_type_t types[] = {UVM_CHANNEL_TYPE_CPU_TO_GPU,
static const uvm_channel_type_t types[] = { UVM_CHANNEL_TYPE_CPU_TO_GPU,
UVM_CHANNEL_TYPE_GPU_TO_CPU,
UVM_CHANNEL_TYPE_GPU_INTERNAL,
UVM_CHANNEL_TYPE_GPU_TO_GPU,
@@ -3123,7 +3134,7 @@ static void pick_ces_conf_computing(uvm_channel_manager_t *manager,
uvm_gpu_t *gpu = manager->gpu;
// The WLC type must go last so an unused CE is chosen, if available
uvm_channel_type_t types[] = {UVM_CHANNEL_TYPE_CPU_TO_GPU,
static const uvm_channel_type_t types[] = { UVM_CHANNEL_TYPE_CPU_TO_GPU,
UVM_CHANNEL_TYPE_GPU_TO_CPU,
UVM_CHANNEL_TYPE_GPU_INTERNAL,
UVM_CHANNEL_TYPE_MEMOPS,

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -393,6 +393,100 @@ done:
return status;
}
// Transition TLB-cached invalid IOVAs to valid and make sure the GPU sees the
// fresh mapping.
static NV_STATUS test_iommu_stale_invalid(uvm_gpu_t *gpu)
{
// Semi-arbitrary number. We want enough pages that the odds of two mappings
// landing on the same GPU TLB cache line are high.
size_t num_pages = 4 * 16;
uvm_mem_t **mem;
uvm_push_t push;
size_t i;
NV_STATUS status = NV_OK;
NvU64 pattern = jhash_2words(current->pid, uvm_id_value(gpu->id), 0xaa);
// This tests allocates sysmem outside of the DMA API. Sysmem allocation
// without dma owner uses protected sysmem if Confidential Computing is
// enabled and cannot be accessed by GPU.
if (g_uvm_global.conf_computing_enabled)
return NV_OK;
mem = uvm_kvmalloc_zero(num_pages * sizeof(mem[0]));
TEST_CHECK_RET(mem != NULL);
// Allocate and map each page. Our assumption is that pages mapped near each
// other in time will tend to use similar IOVAs. Getting IOVAs which share a
// GPU TLB cache line is ideal, but even nearby cache lines are useful since
// they can engage the TLB prefetcher.
for (i = 0; i < num_pages; i++) {
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(PAGE_SIZE, NULL, &mem[i]), out);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(mem[i], gpu), out);
memset(uvm_mem_get_cpu_addr_kernel(mem[i]), 0, PAGE_SIZE);
}
// Now that all pages are mapped in what is hopefully a near-contiguous
// pattern, unmap every odd page to create invalid gaps.
for (i = 1; i < num_pages; i += 2)
uvm_mem_unmap_gpu_phys(mem[i], gpu);
// Access the even pages with physical ATS access where possible. This
// should pull in the invalid adjacent mappings too.
TEST_NV_CHECK_GOTO(uvm_push_begin(gpu->channel_manager,
UVM_CHANNEL_TYPE_GPU_TO_CPU,
&push,
"Write base 0x%llx to even pages",
pattern),
out);
for (i = 0; i < num_pages; i += 2) {
uvm_gpu_address_t addr = uvm_mem_gpu_address_physical(mem[i], gpu, 0, sizeof(pattern));
gpu->parent->ce_hal->memset_8(&push, addr, pattern + i, sizeof(pattern));
}
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
// Verify writes
for (i = 0; i < num_pages; i++) {
NvU64 expected = (i & 1) ? 0 : pattern + i;
NvU64 *cpu_ptr = uvm_mem_get_cpu_addr_kernel(mem[i]);
TEST_CHECK_GOTO(*cpu_ptr == expected, out);
}
// Re-map the odd pages. Hopefully they'll fall into their old addresses, or
// at least will overlap with GPU-prefetched translations from above.
for (i = 1; i < num_pages; i += 2)
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(mem[i], gpu), out);
// Access the odd pages
TEST_NV_CHECK_GOTO(uvm_push_begin(gpu->channel_manager,
UVM_CHANNEL_TYPE_GPU_TO_CPU,
&push,
"Write base 0x%llx to odd pages",
pattern),
out);
for (i = 1; i < num_pages; i += 2) {
uvm_gpu_address_t addr = uvm_mem_gpu_address_physical(mem[i], gpu, 0, sizeof(NvU64));
gpu->parent->ce_hal->memset_8(&push, addr, pattern + i, sizeof(NvU64));
}
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
// Verify writes
for (i = 0; i < num_pages; i++) {
NvU64 *cpu_ptr = uvm_mem_get_cpu_addr_kernel(mem[i]);
TEST_CHECK_GOTO(*cpu_ptr == pattern + i, out);
}
out:
for (i = 0; i < num_pages; i++)
uvm_mem_free(mem[i]);
uvm_kvfree(mem);
return status;
}
static NV_STATUS test_iommu(uvm_va_space_t *va_space)
{
uvm_gpu_t *gpu;
@@ -415,6 +509,8 @@ static NV_STATUS test_iommu(uvm_va_space_t *va_space)
TEST_NV_CHECK_RET(test_status);
TEST_NV_CHECK_RET(create_status);
TEST_NV_CHECK_RET(test_iommu_stale_invalid(gpu));
}
return NV_OK;
@@ -965,7 +1061,7 @@ release:
static NV_STATUS force_key_rotations(uvm_channel_pool_t *pool, unsigned num_rotations)
{
unsigned num_tries;
unsigned max_num_tries = 20;
unsigned max_num_tries = 0x100;
unsigned num_rotations_completed = 0;
if (num_rotations == 0)

View File

@@ -328,7 +328,7 @@ typedef struct
} uvm_rm_user_object_t;
// Macro used to compare two values for types that support less than operator.
// It returns -1 if a < b, 1 if a > b and 0 if a == 0
// It returns -1 if a < b, 1 if a > b and 0 if a == b
#define UVM_CMP_DEFAULT(a,b) \
({ \
typeof(a) _a = a; \

View File

@@ -92,15 +92,57 @@ static uvm_gpu_t *dma_buffer_pool_to_gpu(uvm_conf_computing_dma_buffer_pool_t *d
return container_of(dma_buffer_pool, uvm_gpu_t, conf_computing.dma_buffer_pool);
}
static NV_STATUS dma_buffer_alloc(uvm_gpu_t *gpu, uvm_conf_computing_dma_buffer_t *dma_buffer)
{
uvm_mem_alloc_params_t params = { 0 };
NV_STATUS status;
uvm_mem_t *mem;
BUILD_BUG_ON(UVM_CONF_COMPUTING_DMA_BUFFER_SIZE % UVM_PAGE_SIZE_2M);
params.size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
params.page_size = UVM_PAGE_SIZE_2M;
params.dma_owner = gpu;
status = uvm_mem_alloc(&params, &mem);
if (status != NV_OK)
return status;
status = uvm_mem_map_cpu_kernel(mem);
if (status != NV_OK)
goto err;
status = uvm_mem_map_gpu_kernel(mem, gpu);
if (status != NV_OK)
goto err;
dma_buffer->alloc = mem;
return NV_OK;
err:
uvm_mem_free(mem);
return status;
}
#define AUTH_TAGS_SIZE ((UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE)
static NV_STATUS auth_tag_alloc(uvm_gpu_t *gpu, uvm_conf_computing_dma_buffer_t *dma_buffer) {
NV_STATUS status;
status = uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(AUTH_TAGS_SIZE, gpu, NULL, &(dma_buffer->auth_tag));
if (status != NV_OK)
return status;
return uvm_mem_map_gpu_kernel(dma_buffer->auth_tag, gpu);
}
// Allocate and map a new DMA stage buffer to CPU and GPU (VA)
static NV_STATUS dma_buffer_create(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
uvm_conf_computing_dma_buffer_t **dma_buffer_out)
{
uvm_gpu_t *dma_owner;
uvm_conf_computing_dma_buffer_t *dma_buffer;
uvm_mem_t *alloc = NULL;
NV_STATUS status = NV_OK;
size_t auth_tags_size = (UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
NV_STATUS status;
dma_buffer = uvm_kvmalloc_zero(sizeof(*dma_buffer));
if (!dma_buffer)
@@ -110,23 +152,11 @@ static NV_STATUS dma_buffer_create(uvm_conf_computing_dma_buffer_pool_t *dma_buf
uvm_tracker_init(&dma_buffer->tracker);
INIT_LIST_HEAD(&dma_buffer->node);
status = uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(UVM_CONF_COMPUTING_DMA_BUFFER_SIZE, dma_owner, NULL, &alloc);
status = dma_buffer_alloc(dma_owner, dma_buffer);
if (status != NV_OK)
goto err;
dma_buffer->alloc = alloc;
status = uvm_mem_map_gpu_kernel(alloc, dma_owner);
if (status != NV_OK)
goto err;
status = uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(auth_tags_size, dma_owner, NULL, &alloc);
if (status != NV_OK)
goto err;
dma_buffer->auth_tag = alloc;
status = uvm_mem_map_gpu_kernel(alloc, dma_owner);
status = auth_tag_alloc(dma_owner, dma_buffer);
if (status != NV_OK)
goto err;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -98,7 +98,6 @@ typedef struct uvm_non_replayable_fault_buffer_struct uvm_non_replayable_fault_b
typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
typedef struct uvm_access_counter_buffer_struct uvm_access_counter_buffer_t;
typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
typedef struct uvm_pmm_sysmem_mappings_struct uvm_pmm_sysmem_mappings_t;
typedef struct uvm_reverse_map_struct uvm_reverse_map_t;
#endif //__UVM_FORWARD_DECL_H__

View File

@@ -28,6 +28,7 @@
#include "uvm_va_space.h"
#include "uvm_mmu.h"
#include "nv_uvm_types.h"
#include "nv_uvm_user_types.h"
#include "nv_uvm_interface.h"
#include "uvm_common.h"
@@ -39,13 +40,23 @@
size, \
ext_map_info))
// TODO: Bug 5310168: Expand test coverage of external mappings. The existing
// get_aperture() and is_cacheable() are only correct for the subset of external
// mappings that are currently tested.
static uvm_aperture_t get_aperture(uvm_va_space_t *va_space,
uvm_gpu_t *memory_owning_gpu,
uvm_gpu_t *memory_mapping_gpu,
UvmGpuMemoryInfo *memory_info,
bool sli_supported)
bool sli_supported,
UvmGpuExternalMappingInfo *ext_mapping_info)
{
if (memory_info->sysmem) {
// TODO: Bug 5310178: Do not use integrated GPU as a proxy for a fully
// coherent L2 cache.
if (memory_mapping_gpu->parent->is_integrated_gpu &&
ext_mapping_info->cachingType == UvmGpuCachingTypeForceCached)
return UVM_APERTURE_SYS_NON_COHERENT;
return UVM_APERTURE_SYS;
}
else {
@@ -57,9 +68,9 @@ static uvm_aperture_t get_aperture(uvm_va_space_t *va_space,
static bool is_cacheable(UvmGpuExternalMappingInfo *ext_mapping_info, uvm_aperture_t aperture)
{
if (ext_mapping_info->cachingType == UvmRmGpuCachingTypeForceCached)
if (ext_mapping_info->cachingType == UvmGpuCachingTypeForceCached)
return true;
else if (ext_mapping_info->cachingType == UvmRmGpuCachingTypeForceUncached)
else if (ext_mapping_info->cachingType == UvmGpuCachingTypeForceUncached)
return false;
else if (aperture == UVM_APERTURE_VID)
return true;
@@ -69,10 +80,10 @@ static bool is_cacheable(UvmGpuExternalMappingInfo *ext_mapping_info, uvm_apertu
static NvU32 get_protection(UvmGpuExternalMappingInfo *ext_mapping_info)
{
if (ext_mapping_info->mappingType == UvmRmGpuMappingTypeReadWriteAtomic ||
ext_mapping_info->mappingType == UvmRmGpuMappingTypeDefault)
if (ext_mapping_info->mappingType == UvmGpuMappingTypeReadWriteAtomic ||
ext_mapping_info->mappingType == UvmGpuMappingTypeDefault)
return UVM_PROT_READ_WRITE_ATOMIC;
else if (ext_mapping_info->mappingType == UvmRmGpuMappingTypeReadWrite)
else if (ext_mapping_info->mappingType == UvmGpuMappingTypeReadWrite)
return UVM_PROT_READ_WRITE;
else
return UVM_PROT_READ_ONLY;
@@ -119,7 +130,12 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
if (!memory_owning_gpu)
return NV_ERR_INVALID_DEVICE;
aperture = get_aperture(va_space, memory_owning_gpu, memory_mapping_gpu, memory_info, sli_supported);
aperture = get_aperture(va_space,
memory_owning_gpu,
memory_mapping_gpu,
memory_info,
sli_supported,
ext_mapping_info);
if (is_cacheable(ext_mapping_info, aperture))
pte_flags |= UVM_MMU_PTE_FLAGS_CACHED;
@@ -228,8 +244,8 @@ static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_
&memory_info,
false) == NV_OK, done);
ext_mapping_info.mappingType = UvmRmGpuMappingTypeReadWrite;
ext_mapping_info.cachingType = UvmRmGpuCachingTypeForceCached;
ext_mapping_info.mappingType = UvmGpuMappingTypeReadWrite;
ext_mapping_info.cachingType = UvmGpuCachingTypeForceCached;
TEST_CHECK_GOTO(get_rm_ptes(memory_info.pageSize, size - memory_info.pageSize, &ext_mapping_info) == NV_OK, done);
TEST_CHECK_GOTO(verify_mapping_info(va_space,
memory_mapping_gpu,
@@ -239,8 +255,8 @@ static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_
&memory_info,
false) == NV_OK, done);
ext_mapping_info.mappingType = UvmRmGpuMappingTypeReadOnly;
ext_mapping_info.cachingType = UvmRmGpuCachingTypeForceUncached;
ext_mapping_info.mappingType = UvmGpuMappingTypeReadOnly;
ext_mapping_info.cachingType = UvmGpuCachingTypeForceUncached;
TEST_CHECK_GOTO(get_rm_ptes(size - memory_info.pageSize, memory_info.pageSize, &ext_mapping_info) == NV_OK, done);
TEST_CHECK_GOTO(verify_mapping_info(va_space,
memory_mapping_gpu,

View File

@@ -111,9 +111,9 @@ struct uvm_global_struct
struct
{
// Indicates whether the system HW supports ATS. This field is set once
// during global initialization (uvm_global_init), and can be read
// afterwards without acquiring any locks.
// Indicates whether any GPU in the system supports PASID ATS. This
// field is set once during global initialization (uvm_global_init), and
// can be read afterwards without acquiring any locks.
bool supported;
// On top of HW platform support, ATS support can be overridden using

View File

@@ -42,8 +42,8 @@
#include "uvm_ats.h"
#include "uvm_test.h"
#include "uvm_conf_computing.h"
#include "uvm_linux.h"
#include "uvm_mmu.h"
#define UVM_PROC_GPUS_PEER_DIR_NAME "peers"
@@ -93,6 +93,8 @@ static void fill_parent_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo
parent_gpu->system_bus.link = get_gpu_link_type(gpu_info->sysmemLink);
UVM_ASSERT(parent_gpu->system_bus.link != UVM_GPU_LINK_INVALID);
parent_gpu->ats_supported = gpu_info->atsSupport;
parent_gpu->system_bus.link_rate_mbyte_per_s = gpu_info->sysmemLinkRateMBps;
if (gpu_info->systemMemoryWindowSize > 0) {
@@ -114,6 +116,15 @@ static void fill_parent_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo
parent_gpu->nvswitch_info.egm_fabric_memory_window_start = gpu_info->nvswitchEgmMemoryWindowStart;
}
parent_gpu->ats.non_pasid_ats_enabled = gpu_info->nonPasidAtsSupport;
// Non-PASID ATS cannot be disabled if present, but it implies PASID ATS
// support.
// TODO: Bug 5161018: Include integrated GPUs in the assert once they
// support PASID ATS.
if (UVM_ATS_SUPPORTED() && parent_gpu->ats.non_pasid_ats_enabled && gpu_info->systemMemoryWindowSize)
UVM_ASSERT(g_uvm_global.ats.supported);
uvm_uuid_string(uuid_buffer, &parent_gpu->uuid);
snprintf(parent_gpu->name,
sizeof(parent_gpu->name),
@@ -139,9 +150,13 @@ static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
gpu->mem_info.numa.enabled = true;
gpu->mem_info.numa.node_id = gpu_caps.numaNodeId;
gpu->mem_info.cdmm_enabled = false;
}
else {
UVM_ASSERT(!uvm_parent_gpu_is_coherent(gpu->parent));
// TODO: Bug 5273146: Use RM control call to detect CDMM mode.
if (uvm_parent_gpu_is_coherent(gpu->parent))
gpu->mem_info.cdmm_enabled = true;
gpu->mem_info.numa.node_id = NUMA_NO_NODE;
}
@@ -154,6 +169,10 @@ static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
// The buffer management thus remains the same: DMA mapped GPA addresses can
// be accessed by the GPU, while unmapped addresses can not and any access is
// blocked and potentially unrecoverable to the engine that made it.
//
// If non-PASID ATS is supported, then we can skip this because that will handle
// things for us instead without us having to fake it with PASID ATS. This is
// only to enable the feature on other ATS systems.
static int gpu_get_internal_pasid(const uvm_gpu_t *gpu)
{
#if UVM_ATS_SVA_SUPPORTED() && defined(NV_IOMMU_IS_DMA_DOMAIN_PRESENT)
@@ -175,13 +194,14 @@ static int gpu_get_internal_pasid(const uvm_gpu_t *gpu)
#define UVM_INTERNAL_PASID 0
#endif
// Enable internal ATS only if ATS is enabled in general and we are using
// 64kB base page size. The base page size limitation is needed to avoid
// GH180 MMU behaviour which does not refetch invalid 4K ATS translations
// on access (see bug 3949400). This also works in virtualized environments
// because the entire 64kB guest page has to be mapped and pinned by the
// hypervisor for device access.
if (g_uvm_global.ats.enabled && PAGE_SIZE == UVM_PAGE_SIZE_64K) {
// Enable internal ATS only if non-PASID ATS is disabled, but PASID ATS is
// enabled, and we are using 64kB base page size. The base page size
// limitation is needed to avoid GH180 MMU behaviour which does not refetch
// invalid 4K ATS translations on access (see bug 3949400). This also works
// in virtualized environments because the entire 64kB guest page has to be
// mapped and pinned by the hypervisor for device access.
if (!gpu->parent->ats.non_pasid_ats_enabled && g_uvm_global.ats.enabled &&
uvm_parent_gpu_supports_ats(gpu->parent) && PAGE_SIZE == UVM_PAGE_SIZE_64K) {
struct device *dev = &gpu->parent->pci_dev->dev;
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
@@ -252,6 +272,7 @@ static NV_STATUS get_gpu_fb_info(uvm_gpu_t *gpu)
gpu->mem_info.max_vidmem_page_size = fb_info.maxVidmemPageSize;
gpu->mem_info.static_bar1_start = pci_bar1_addr + fb_info.staticBar1StartOffset;
gpu->mem_info.static_bar1_size = fb_info.staticBar1Size;
gpu->mem_info.static_bar1_write_combined = fb_info.bStaticBar1WriteCombined;
return NV_OK;
}
@@ -1299,6 +1320,7 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
return status;
}
// The aperture here refers to sysmem, which only uses UVM_APERTURE_SYS
if (tree_alloc->addr.aperture == UVM_APERTURE_SYS)
gpu->address_space_tree.pdb_rm_dma_address = uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_address);
@@ -1385,6 +1407,7 @@ NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu)
return NV_OK;
}
static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
const NvProcessorUuid *gpu_uuid,
const UvmGpuInfo *gpu_info,
@@ -1451,6 +1474,12 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
uvm_hal_init_properties(parent_gpu);
// On SoCs which are not NUMA aware use the linux kernel's fake NUMA node.
if (parent_gpu->is_integrated_gpu && parent_gpu->closest_cpu_numa_node == -1) {
UVM_ASSERT(num_possible_nodes() == 1);
parent_gpu->closest_cpu_numa_node = numa_mem_id();
}
UVM_ASSERT(!parent_gpu->rm_info.smcEnabled || parent_gpu->smc.supported);
parent_gpu->smc.enabled = !!parent_gpu->rm_info.smcEnabled;
@@ -1568,12 +1597,6 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
return status;
}
status = uvm_pmm_sysmem_mappings_init(gpu, &gpu->pmm_reverse_sysmem_mappings);
if (status != NV_OK) {
UVM_ERR_PRINT("CPU PMM MMIO initialization failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
return status;
}
uvm_pmm_gpu_device_p2p_init(gpu);
status = init_semaphore_pools(gpu);
@@ -1765,8 +1788,6 @@ static void deinit_gpu(uvm_gpu_t *gpu)
uvm_pmm_gpu_device_p2p_deinit(gpu);
uvm_pmm_sysmem_mappings_deinit(&gpu->pmm_reverse_sysmem_mappings);
uvm_pmm_gpu_deinit(&gpu->pmm);
if (gpu->rm_address_space != 0)
@@ -1863,7 +1884,9 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
++parent_gpu->stats.num_replayable_faults;
}
static void update_stats_fault_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
static void update_stats_fault_cb(uvm_va_space_t *va_space,
uvm_perf_event_t event_id,
uvm_perf_event_data_t *event_data)
{
uvm_parent_gpu_t *parent_gpu;
const uvm_fault_buffer_entry_t *fault_entry, *fault_instance;
@@ -1888,7 +1911,9 @@ static void update_stats_fault_cb(uvm_perf_event_t event_id, uvm_perf_event_data
update_stats_parent_gpu_fault_instance(parent_gpu, fault_instance, event_data->fault.gpu.is_duplicate);
}
static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
static void update_stats_migration_cb(uvm_va_space_t *va_space,
uvm_perf_event_t event_id,
uvm_perf_event_data_t *event_data)
{
uvm_gpu_t *gpu_dst = NULL;
uvm_gpu_t *gpu_src = NULL;
@@ -1910,11 +1935,11 @@ static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_
// Page prefetching is also triggered by faults
is_replayable_fault =
event_data->migration.make_resident_context->cause == UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT;
event_data->migration.cause == UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT;
is_non_replayable_fault =
event_data->migration.make_resident_context->cause == UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT;
event_data->migration.cause == UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT;
is_access_counter =
event_data->migration.make_resident_context->cause == UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
event_data->migration.cause == UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
pages = event_data->migration.bytes / PAGE_SIZE;
UVM_ASSERT(event_data->migration.bytes % PAGE_SIZE == 0);
@@ -1929,7 +1954,7 @@ static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_
atomic64_add(pages, &gpu_dst->parent->fault_buffer.non_replayable.stats.num_pages_in);
}
else if (is_access_counter) {
NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
NvU32 index = event_data->migration.access_counters_buffer_index;
atomic64_add(pages, &gpu_dst->parent->access_counter_buffer[index].stats.num_pages_in);
}
}
@@ -1942,7 +1967,7 @@ static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_
atomic64_add(pages, &gpu_src->parent->fault_buffer.non_replayable.stats.num_pages_out);
}
else if (is_access_counter) {
NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
NvU32 index = event_data->migration.access_counters_buffer_index;
atomic64_add(pages, &gpu_src->parent->access_counter_buffer[index].stats.num_pages_out);
}
}
@@ -3169,6 +3194,7 @@ uvm_aperture_t uvm_get_page_tree_location(const uvm_gpu_t *gpu)
if (uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent) || g_uvm_global.conf_computing_enabled)
return UVM_APERTURE_VID;
// Sysmem is represented by UVM_APERTURE_SYS
if (!gpu->mem_info.size)
return UVM_APERTURE_SYS;
@@ -3637,41 +3663,50 @@ NvU64 uvm_parent_gpu_dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dm
return gpu_addr;
}
static void *parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu, gfp_t gfp_flags, NvU64 *dma_address_out)
static void *parent_gpu_dma_alloc(NvU64 size, uvm_parent_gpu_t *parent_gpu, gfp_t gfp_flags, NvU64 *dma_address_out)
{
NvU64 dma_addr;
void *cpu_addr;
cpu_addr = dma_alloc_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, &dma_addr, gfp_flags);
cpu_addr = dma_alloc_coherent(&parent_gpu->pci_dev->dev, size, &dma_addr, gfp_flags);
if (!cpu_addr)
return cpu_addr;
*dma_address_out = uvm_parent_gpu_dma_addr_to_gpu_addr(parent_gpu, dma_addr);
atomic64_add(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
atomic64_add(size, &parent_gpu->mapped_cpu_pages_size);
return cpu_addr;
}
NV_STATUS uvm_gpu_dma_alloc_page(uvm_gpu_t *gpu, gfp_t gfp_flags, void **cpu_addr_out, NvU64 *dma_address_out)
NV_STATUS uvm_gpu_dma_alloc(NvU64 size, uvm_gpu_t *gpu, gfp_t gfp_flags, void **cpu_addr_out, NvU64 *dma_address_out)
{
void *cpu_addr = parent_gpu_dma_alloc_page(gpu->parent, gfp_flags, dma_address_out);
NV_STATUS status;
void *cpu_addr = parent_gpu_dma_alloc(size, gpu->parent, gfp_flags, dma_address_out);
if (!cpu_addr)
return NV_ERR_NO_MEMORY;
// TODO: Bug 4868590: Issue GPA invalidate here
// Some GPUs require TLB invalidation on IOMMU invalid -> valid transitions
status = uvm_mmu_tlb_invalidate_phys(gpu);
if (status != NV_OK) {
uvm_parent_gpu_dma_free(size, gpu->parent, cpu_addr, *dma_address_out);
return status;
}
*cpu_addr_out = cpu_addr;
return NV_OK;
}
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *cpu_addr, NvU64 dma_address)
void uvm_parent_gpu_dma_free(NvU64 size, uvm_parent_gpu_t *parent_gpu, void *cpu_addr, NvU64 dma_address)
{
dma_address = gpu_addr_to_dma_addr(parent_gpu, dma_address);
dma_free_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, cpu_addr, dma_address);
atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
dma_free_coherent(&parent_gpu->pci_dev->dev, size, cpu_addr, dma_address);
atomic64_sub(size, &parent_gpu->mapped_cpu_pages_size);
}
static NV_STATUS parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
NV_STATUS uvm_gpu_map_cpu_pages_no_invalidate(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out)
{
// This function only actually needs the parent GPU, but it takes in the
// sub GPU for API symmetry with uvm_gpu_map_cpu_pages().
uvm_parent_gpu_t *parent_gpu = gpu->parent;
NvU64 dma_addr;
UVM_ASSERT(PAGE_ALIGNED(size));
@@ -3700,9 +3735,15 @@ static NV_STATUS parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct p
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out)
{
NV_STATUS status = parent_gpu_map_cpu_pages(gpu->parent, page, size, dma_address_out);
NV_STATUS status = uvm_gpu_map_cpu_pages_no_invalidate(gpu, page, size, dma_address_out);
if (status != NV_OK)
return status;
// TODO: Bug 4868590: Issue GPA invalidate here
// Some GPUs require TLB invalidation on IOMMU invalid -> valid
// transitions.
status = uvm_mmu_tlb_invalidate_phys(gpu);
if (status != NV_OK)
uvm_parent_gpu_unmap_cpu_pages(gpu->parent, *dma_address_out, size);
return status;
}
@@ -3800,7 +3841,7 @@ NV_STATUS uvm_api_pageable_mem_access_on_gpu(UVM_PAGEABLE_MEM_ACCESS_ON_GPU_PARA
return NV_ERR_INVALID_DEVICE;
}
if (uvm_va_space_pageable_mem_access_supported(va_space) && gpu->parent->replayable_faults_supported)
if (uvm_va_space_pageable_mem_access_enabled(va_space) && gpu->parent->replayable_faults_supported)
params->pageableMemAccess = NV_TRUE;
else
params->pageableMemAccess = NV_FALSE;

View File

@@ -28,6 +28,7 @@
#include "nvmisc.h"
#include "uvm_types.h"
#include "nv_uvm_types.h"
#include "nv_uvm_user_types.h"
#include "uvm_linux.h"
#include "nv-kref.h"
#include "uvm_common.h"
@@ -237,6 +238,8 @@ typedef struct
// aligned region of a SAM VMA.
uvm_page_mask_t migrated_mask;
// Access counters notification buffer index.
NvU32 buffer_index;
} access_counters;
};
@@ -708,11 +711,21 @@ struct uvm_gpu_struct
int node_id;
} numa;
// Coherent Driver-based Memory Management (CDMM) is a mode that allows
// coherent GPU memory to be managed by the driver and not the OS. This
// is done by the driver not onlining the memory as NUMA nodes. Having
// the field provides the most flexibility and is sync with the numa
// properties above. CDMM as a property applies to the entire system.
bool cdmm_enabled;
// Physical address of the start of statically mapped fb memory in BAR1
NvU64 static_bar1_start;
// Size of statically mapped fb memory in BAR1.
NvU64 static_bar1_size;
// Whether or not RM has iomapped the region write combined.
NvBool static_bar1_write_combined;
} mem_info;
struct
@@ -815,14 +828,6 @@ struct uvm_gpu_struct
uvm_bit_locks_t bitlocks;
} sysmem_mappings;
// Reverse lookup table used to query the user mapping associated with a
// sysmem (DMA) physical address.
//
// The system memory mapping information referred to by this field is
// different from that of sysmem_mappings, because it relates to user
// mappings (instead of kernel), and it is used in most configurations.
uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings;
struct
{
uvm_conf_computing_dma_buffer_pool_t dma_buffer_pool;
@@ -1064,11 +1069,6 @@ struct uvm_parent_gpu_struct
bool access_counters_supported;
// TODO: Bug 4637114: [UVM] Remove support for physical access counter
// notifications. Always set to false, until we remove the PMM reverse
// mapping code.
bool access_counters_can_use_physical_addresses;
bool fault_cancel_va_supported;
// True if the GPU has hardware support for scoped atomics
@@ -1095,10 +1095,6 @@ struct uvm_parent_gpu_struct
bool plc_supported;
// If true, page_tree initialization pre-populates no_ats_ranges. It only
// affects ATS systems.
bool no_ats_range_required;
// Parameters used by the TLB batching API
struct
{
@@ -1317,6 +1313,22 @@ struct uvm_parent_gpu_struct
NvU64 memory_window_end;
} system_bus;
struct
{
// TODO: Bug 5013952: Add per-GPU PASID ATS fields in addition to the
// global ones.
// Whether this GPU uses non-PASID ATS (aka serial ATS) to translate
// IOVAs to SPAs.
bool non_pasid_ats_enabled : 1;
// If true, page_tree initialization pre-populates no_ats_ranges. It
// only affects ATS systems.
bool no_ats_range_required : 1;
// See the comments on uvm_dma_map_invalidation_t
uvm_dma_map_invalidation_t dma_map_invalidation;
// WAR to issue ATS TLB invalidation commands ourselves.
struct
{
@@ -1326,6 +1338,7 @@ struct uvm_parent_gpu_struct
unsigned long smmu_prod;
unsigned long smmu_cons;
} smmu_war;
} ats;
struct
{
@@ -1344,6 +1357,9 @@ struct uvm_parent_gpu_struct
} egm;
uvm_test_parent_gpu_inject_error_t test;
// PASID ATS
bool ats_supported;
};
NvU64 uvm_parent_gpu_dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr);
@@ -1693,6 +1709,13 @@ NV_STATUS uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_t *gpu);
// lifetime of that parent.
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out);
// Like uvm_gpu_map_cpu_pages(), but skips issuing any GPU TLB invalidates
// required by the architecture for invalid -> valid IOMMU transitions. It is
// the caller's responsibility to perform those invalidates before accessing the
// mappings, such as with uvm_mmu_tlb_invalidate_phys() or
// uvm_hal_tlb_invalidate_phys().
NV_STATUS uvm_gpu_map_cpu_pages_no_invalidate(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out);
// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
void uvm_parent_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
@@ -1706,18 +1729,18 @@ static void uvm_parent_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dm
uvm_parent_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
}
// Allocate and map a page of system DMA memory on the GPU for physical access
// Allocate and map system DMA memory on the GPU for physical access
//
// Returns
// - the address of allocated memory in CPU virtual address space.
// - the address of the page that can be used to access them on
// - the address of the page(s) that can be used to access them on
// the GPU in the dma_address_out parameter. This address is usable by any GPU
// under the same parent for the lifetime of that parent.
NV_STATUS uvm_gpu_dma_alloc_page(uvm_gpu_t *gpu, gfp_t gfp_flags, void **cpu_addr_out, NvU64 *dma_address_out);
NV_STATUS uvm_gpu_dma_alloc(NvU64 size, uvm_gpu_t *gpu, gfp_t gfp_flags, void **cpu_addr_out, NvU64 *dma_address_out);
// Unmap and free size bytes of contiguous sysmem DMA previously allocated
// with uvm_gpu_dma_alloc_page().
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *cpu_addr, NvU64 dma_address);
void uvm_parent_gpu_dma_free(NvU64 size, uvm_parent_gpu_t *parent_gpu, void *cpu_addr, NvU64 dma_address);
// Returns whether the given range is within the GPU's addressable VA ranges.
// It requires the input 'addr' to be in canonical form for platforms compliant
@@ -1747,6 +1770,11 @@ static bool uvm_parent_gpu_is_coherent(const uvm_parent_gpu_t *parent_gpu)
return parent_gpu->system_bus.memory_window_end > parent_gpu->system_bus.memory_window_start;
}
static bool uvm_parent_gpu_supports_ats(const uvm_parent_gpu_t *parent_gpu)
{
return parent_gpu->ats_supported;
}
static bool uvm_parent_gpu_needs_pushbuffer_segments(uvm_parent_gpu_t *parent_gpu)
{
return parent_gpu->max_host_va > (1ull << 40);

View File

@@ -1489,6 +1489,8 @@ static NV_STATUS service_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
// Atleast one notification should have been processed.
UVM_ASSERT(index < *out_index);
ats_context->access_counters.buffer_index = access_counters->index;
// TODO: Bug 2113632: [UVM] Don't clear access counters when the preferred
// location is set
// If no pages were actually migrated, don't clear the access counters.
@@ -1574,8 +1576,21 @@ static NV_STATUS service_notifications_batch(uvm_gpu_va_space_t *gpu_va_space,
}
}
else if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
if (!gpu_va_space->gpu->mem_info.cdmm_enabled) {
status = service_notification_ats(gpu_va_space, mm, access_counters, index, out_index);
}
else {
status = notify_tools_and_process_flags(va_space,
gpu_va_space->gpu,
access_counters,
0,
batch_context->notifications,
1,
0,
NULL);
*out_index = index + 1;
}
}
else {
NvU32 flags;
uvm_va_block_t *va_block = NULL;
@@ -1811,8 +1826,9 @@ static NV_STATUS access_counters_config_from_test_params(const UVM_TEST_RECONFIG
if (config_granularity_to_bytes(params->granularity, &tracking_size) != NV_OK)
return NV_ERR_INVALID_ARGUMENT;
// Since values for granularity are shared between tests and nv_uvm_types.h,
// the value will be checked in the call to nvUvmInterfaceEnableAccessCntr
// Since values for granularity are shared between tests and
// nv_uvm_user_types.h, the value will be checked in the call to
// nvUvmInterfaceEnableAccessCntr
config->granularity = params->granularity;
config->threshold = params->threshold;

View File

@@ -287,14 +287,14 @@ NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid)
static NV_STATUS init_queue_on_node(nv_kthread_q_t *queue, const char *name, int node)
{
#if UVM_THREAD_AFFINITY_SUPPORTED()
if (node != -1 && !cpumask_empty(uvm_cpumask_of_node(node))) {
if (node != -1 && !cpumask_empty(cpumask_of_node(node))) {
NV_STATUS status;
status = errno_to_nv_status(nv_kthread_q_init_on_node(queue, name, node));
if (status != NV_OK)
return status;
return errno_to_nv_status(set_cpus_allowed_ptr(queue->q_kthread, uvm_cpumask_of_node(node)));
return errno_to_nv_status(set_cpus_allowed_ptr(queue->q_kthread, cpumask_of_node(node)));
}
#endif

View File

@@ -28,6 +28,7 @@
#include "uvm_lock.h"
#include "uvm_rm_mem.h"
#include "uvm_linux.h"
#include "nv_uvm_types.h"
typedef NvU32 uvm_gpu_semaphore_notifier_t;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -208,7 +208,9 @@ static uvm_hal_class_ops_t host_table[] =
.write_gpu_put = uvm_hal_maxwell_host_write_gpu_put,
.tlb_invalidate_all = uvm_hal_maxwell_host_tlb_invalidate_all_a16f,
.tlb_invalidate_va = uvm_hal_maxwell_host_tlb_invalidate_va,
.tlb_invalidate_phys = uvm_hal_maxwell_host_tlb_invalidate_phys_unsupported,
.tlb_invalidate_test = uvm_hal_maxwell_host_tlb_invalidate_test,
.tlb_flush_prefetch = uvm_hal_maxwell_host_tlb_flush_prefetch_unsupported,
.replay_faults = uvm_hal_maxwell_replay_faults_unsupported,
.cancel_faults_global = uvm_hal_maxwell_cancel_faults_global_unsupported,
.cancel_faults_targeted = uvm_hal_maxwell_cancel_faults_targeted_unsupported,
@@ -219,6 +221,7 @@ static uvm_hal_class_ops_t host_table[] =
.access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
.access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
.access_counter_query_clear_op = uvm_hal_maxwell_access_counter_query_clear_op_unsupported,
.l2_invalidate_noncoh_sysmem = uvm_hal_host_l2_invalidate_noncoh_sysmem_unsupported,
.get_time = uvm_hal_maxwell_get_time,
}
},
@@ -309,7 +312,10 @@ static uvm_hal_class_ops_t host_table[] =
.u.host_ops = {
.tlb_invalidate_all = uvm_hal_blackwell_host_tlb_invalidate_all,
.tlb_invalidate_va = uvm_hal_blackwell_host_tlb_invalidate_va,
.tlb_invalidate_phys = uvm_hal_blackwell_host_tlb_invalidate_phys,
.tlb_invalidate_test = uvm_hal_blackwell_host_tlb_invalidate_test,
.tlb_flush_prefetch = uvm_hal_blackwell_host_tlb_flush_prefetch,
.l2_invalidate_noncoh_sysmem = uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem,
.access_counter_query_clear_op = uvm_hal_blackwell_access_counter_query_clear_op_gb100,
}
},
@@ -905,12 +911,6 @@ static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
// TODO: Bug 200692962: Add support for access counters in vGPU
if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled)
parent_gpu->access_counters_supported = false;
// TODO: Bug 4637114: [UVM] Remove support for physical access counter
// notifications. Always set to false, until we remove the PMM reverse
// mapping code.
parent_gpu->access_counters_can_use_physical_addresses = false;
}
void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
@@ -977,9 +977,28 @@ uvm_membar_t uvm_hal_downgrade_membar_type(uvm_gpu_t *gpu, bool is_local_vidmem)
return UVM_MEMBAR_SYS;
}
void uvm_hal_tlb_invalidate_phys(uvm_push_t *push, uvm_dma_map_invalidation_t inval_type)
{
uvm_parent_gpu_t *parent = uvm_push_get_gpu(push)->parent;
switch (inval_type) {
case UVM_DMA_MAP_INVALIDATION_FLUSH:
parent->host_hal->tlb_flush_prefetch(push);
break;
case UVM_DMA_MAP_INVALIDATION_FULL:
parent->host_hal->tlb_invalidate_phys(push);
break;
default:
UVM_ASSERT(inval_type == UVM_DMA_MAP_INVALIDATION_NONE);
break;
}
}
const char *uvm_aperture_string(uvm_aperture_t aperture)
{
BUILD_BUG_ON(UVM_APERTURE_MAX != 12);
BUILD_BUG_ON(UVM_APERTURE_MAX != 13);
switch (aperture) {
UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_0);
@@ -992,6 +1011,7 @@ const char *uvm_aperture_string(uvm_aperture_t aperture)
UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_7);
UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_MAX);
UVM_ENUM_STRING_CASE(UVM_APERTURE_SYS);
UVM_ENUM_STRING_CASE(UVM_APERTURE_SYS_NON_COHERENT);
UVM_ENUM_STRING_CASE(UVM_APERTURE_VID);
UVM_ENUM_STRING_CASE(UVM_APERTURE_DEFAULT);
UVM_ENUM_STRING_DEFAULT();
@@ -1121,6 +1141,18 @@ void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_e
UVM_DBG_PRINT(" tag %x\n", entry->tag);
}
const char *uvm_dma_map_invalidation_string(uvm_dma_map_invalidation_t inval_type)
{
BUILD_BUG_ON(UVM_DMA_MAP_INVALIDATION_COUNT != 3);
switch (inval_type) {
UVM_ENUM_STRING_CASE(UVM_DMA_MAP_INVALIDATION_NONE);
UVM_ENUM_STRING_CASE(UVM_DMA_MAP_INVALIDATION_FLUSH);
UVM_ENUM_STRING_CASE(UVM_DMA_MAP_INVALIDATION_FULL);
UVM_ENUM_STRING_DEFAULT();
}
}
bool uvm_hal_method_is_valid_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
{
return true;
@@ -1129,3 +1161,11 @@ bool uvm_hal_method_is_valid_stub(uvm_push_t *push, NvU32 method_address, NvU32
void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
{
}
void uvm_hal_host_l2_invalidate_noncoh_sysmem_unsupported(uvm_push_t *push)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
UVM_ERR_PRINT("L2 cache invalidation: Called on unsupported GPU %s (arch: 0x%x, impl: 0x%x)\n",
uvm_gpu_name(gpu), gpu->parent->rm_info.gpuArch, gpu->parent->rm_info.gpuImplementation);
UVM_ASSERT_MSG(false, "host l2_invalidate_noncoh_sysmem called on unsupported GPU\n");
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -209,6 +209,15 @@ void uvm_hal_blackwell_host_tlb_invalidate_va(uvm_push_t *push,
NvU64 page_size,
uvm_membar_t membar);
// Issue a TLB invalidate applying to cached physical translations (GPAs), such
// as those used by ATS for physical CE transfer. No membar is performed.
typedef void (*uvm_hal_host_tlb_invalidate_phys_t)(uvm_push_t *push);
// Volta was the first GPU which enabled caching of physical translations, but
// we have no need to invalidate manually until Blackwell.
void uvm_hal_maxwell_host_tlb_invalidate_phys_unsupported(uvm_push_t *push);
void uvm_hal_blackwell_host_tlb_invalidate_phys(uvm_push_t *push);
typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
@@ -231,6 +240,20 @@ void uvm_hal_blackwell_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
// Flush out pending physical MMU prefetches to guarantee that no new
// translations can observe them.
typedef void (*uvm_hal_host_tlb_flush_prefetch_t)(uvm_push_t *push);
// Blackwell is the first GPU which needs prefetch flushing
void uvm_hal_maxwell_host_tlb_flush_prefetch_unsupported(uvm_push_t *push);
void uvm_hal_blackwell_host_tlb_flush_prefetch(uvm_push_t *push);
// L2 cache invalidate for non-coherent sysmem for systems with write back cache.
// These are iGPUs as of now.
typedef void (*uvm_hal_host_l2_invalidate_noncoh_sysmem_t)(uvm_push_t *push);
void uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem(uvm_push_t *push);
void uvm_hal_host_l2_invalidate_noncoh_sysmem_unsupported(uvm_push_t *push);
// By default all semaphore release operations include a membar sys before the
// operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
// uvm_push_set_flag().
@@ -796,7 +819,10 @@ struct uvm_host_hal_struct
uvm_hal_host_write_gpu_put_t write_gpu_put;
uvm_hal_host_tlb_invalidate_all_t tlb_invalidate_all;
uvm_hal_host_tlb_invalidate_va_t tlb_invalidate_va;
uvm_hal_host_tlb_invalidate_phys_t tlb_invalidate_phys;
uvm_hal_host_tlb_invalidate_test_t tlb_invalidate_test;
uvm_hal_host_tlb_flush_prefetch_t tlb_flush_prefetch;
uvm_hal_host_l2_invalidate_noncoh_sysmem_t l2_invalidate_noncoh_sysmem;
uvm_hal_fault_buffer_replay_t replay_faults;
uvm_hal_fault_cancel_global_t cancel_faults_global;
uvm_hal_fault_cancel_targeted_t cancel_faults_targeted;
@@ -960,4 +986,8 @@ bool uvm_hal_membar_before_semaphore(uvm_push_t *push);
// the local GPU's memory.
uvm_membar_t uvm_hal_downgrade_membar_type(uvm_gpu_t *gpu, bool is_local_vidmem);
// Perform the requested physical GPU TLB invalidation for cached IOMMU
// mappings. See uvm_dma_map_invalidation_t. No membar is performed.
void uvm_hal_tlb_invalidate_phys(uvm_push_t *push, uvm_dma_map_invalidation_t inval_type);
#endif // __UVM_HAL_H__

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -41,7 +41,48 @@ typedef enum
UVM_APERTURE_PEER_6,
UVM_APERTURE_PEER_7,
UVM_APERTURE_PEER_MAX,
// SYS aperture is used for memory addresses in system physical address
// space, or device DMA space. This is currently used for system memory
// and certain types of peer memory.
// UVM always uses UVM_APERTURE_SYS to access system memory so the value
// is also used to denote 'sysmem' as a location (e.g. for page tables)
// It is directly encoded as SYS_COH in PTEs and CE/esched methods.
UVM_APERTURE_SYS,
// On platforms that support the GPU coherently caching system memory,
// SYS_NON_COHERENT prevents other clients from snooping the GPU L2 cache.
// This allows noncoherent caching of system memory by GPUs on these
// platforms. UVM does not use SYS_NON_COHERENT for its internal mappings,
// however it may be used by UvmMapExternalAllocation() for external
// mappings. On fully coherent platforms, SYS_NON_COHERENT will be used in
// the following cases:
//
// +----------------------------------------------------------+------------+-------------+
// | RM Allocation Settings / UvmMapExternalAllocation() Flag | Default | ForceCached |
// +----------------------------------------------------------+------------+-------------+
// | GPU_CACHE_SNOOP_DISABLE + GPU CACHED | SYS_NONCOH | SYS_NONCOH |
// | GPU_CACHE_SNOOPABLE_DEFER_TO_MAP + GPU CACHED | SYS_NONCOH | SYS_NONCOH |
// | GPU_CACHE_SNOOP_DISABLE + GPU UNCACHED | - | SYS_NONCOH |
// | GPU_CACHE_SNOOPABLE_DEFER_TO_MAP + GPU UNCACHED | - | SYS_NONCOH |
// +----------------------------------------------------------+------------+-------------+
//
// The implementation of UvmMapExternalAllocation() relies on RM to
// actually generate PTEs (see nvUvmInterfaceGetExternalAllocPtes()) which
// means that UVM does not create these SYS_NON_COHERENT entries itself.
//
// However, in the kernel_driver_get_rm_ptes test, UVM creates
// SYS_NON_COHERENT PTEs to verify that RM is using the expected aperture on
// fully coherent platforms. These test PTEs are never actually used for
// translation.
//
// SYS_NON_COHERENT aperture is never used to denote a location and its use
// is more limited than the SYS aperture above.
UVM_APERTURE_SYS_NON_COHERENT,
// VID aperture is used for memory accesses in GPU's local vidmem.
// It's the only aperture used for local vidmem, so it's also used to
// denote vidmem location (e.g. for page tables)
UVM_APERTURE_VID,
// DEFAULT is a special value to let MMU pick the location of page tables
@@ -536,4 +577,24 @@ static uvm_prot_t uvm_fault_access_type_to_prot(uvm_fault_access_type_t access_t
}
}
// Indicates the type of GPU physical TLB invalidation required when
// transitioning an IOMMU GPA mapping from invalid to valid (dma_map_page() and
// friends).
typedef enum
{
UVM_DMA_MAP_INVALIDATION_NONE = 0,
// No actual invalidation of lines is required. A dummy invalidation is
// needed flush out non-fatal prefetch accesses which may have occurred
// while the mapping was still invalid.
UVM_DMA_MAP_INVALIDATION_FLUSH,
// Invalidate all matching GPA lines
UVM_DMA_MAP_INVALIDATION_FULL,
UVM_DMA_MAP_INVALIDATION_COUNT
} uvm_dma_map_invalidation_t;
const char *uvm_dma_map_invalidation_string(uvm_dma_map_invalidation_t inval_type);
#endif // __UVM_HAL_TYPES_H__

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -131,6 +131,8 @@ typedef struct
bool uvm_hmm_is_enabled_system_wide(void)
{
// TODO: Bug 4103580: Once aarch64 supports HMM this condition will no
// longer be true.
if (uvm_disable_hmm)
return false;
@@ -207,6 +209,7 @@ static NV_STATUS hmm_copy_devmem_page(struct page *dst_page, struct page *src_pa
PAGE_SIZE);
uvm_push_end(&push);
status = uvm_tracker_add_push_safe(&tracker, &push);
if (status == NV_OK)
status = uvm_tracker_wait_deinit(&tracker);
@@ -248,6 +251,7 @@ static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
// in data loss in the application but failures are not expected.
if (hmm_copy_devmem_page(dst_page, migrate_pfn_to_page(src_pfn)) != NV_OK)
memzero_page(dst_page, 0, PAGE_SIZE);
dst_pfn = migrate_pfn(page_to_pfn(dst_page));
migrate_device_pages(&src_pfn, &dst_pfn, 1);
}
@@ -270,6 +274,7 @@ void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
uvm_range_tree_init(&hmm_va_space->blocks);
uvm_mutex_init(&hmm_va_space->blocks_lock, UVM_LOCK_ORDER_LEAF);
atomic64_set(&hmm_va_space->allocated_page_count, 0);
return;
}
@@ -349,6 +354,11 @@ void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_
for (pfn = __phys_to_pfn(devmem_start); pfn <= __phys_to_pfn(devmem_end); pfn++) {
struct page *page = pfn_to_page(pfn);
// No need to keep scanning if no HMM pages are allocated for this
// va_space.
if (!atomic64_read(&va_space->hmm.allocated_page_count))
break;
UVM_ASSERT(is_device_private_page(page));
// This check is racy because nothing stops the page being freed and
@@ -946,7 +956,6 @@ static NV_STATUS split_block_if_needed(uvm_va_block_t *va_block,
uvm_va_space_t *va_space;
struct mm_struct *mm;
struct vm_area_struct *vma;
uvm_va_block_region_t region;
NvU64 addr, from, to;
uvm_va_block_t *new;
NV_STATUS status = NV_OK;
@@ -988,7 +997,6 @@ static NV_STATUS split_block_if_needed(uvm_va_block_t *va_block,
from = max(addr, (NvU64)vma->vm_start);
to = min(va_block->end, (NvU64)vma->vm_end - 1);
region = uvm_va_block_region_from_start_end(va_block, from, to);
if (!uvm_hmm_vma_is_valid(vma, from, false))
continue;
@@ -1603,7 +1611,7 @@ static NV_STATUS hmm_va_block_cpu_page_populate(uvm_va_block_t *va_block,
return status;
}
status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk, page_index);
status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk);
if (status != NV_OK) {
uvm_cpu_chunk_remove_from_block(va_block, page_to_nid(page), page_index);
uvm_cpu_chunk_free(chunk);
@@ -1998,6 +2006,7 @@ static void fill_dst_pfn(uvm_va_block_t *va_block,
UVM_ASSERT(!page_count(dpage));
zone_device_page_init(dpage);
dpage->zone_device_data = gpu_chunk;
atomic64_inc(&va_block->hmm.va_space->hmm.allocated_page_count);
dst_pfns[page_index] = migrate_pfn(pfn);
}
@@ -2063,15 +2072,14 @@ static NV_STATUS alloc_page_on_cpu(uvm_va_block_t *va_block,
// TODO: Bug 3368756: add support for transparent huge pages
// Support for large CPU pages means the page_index may need fixing
dst_page = migrate_pfn_to_page(block_context->hmm.dst_pfns[page_index]);
// Note that we don't call get_page(dst_page) since alloc_page_vma()
// returns with a page reference count of one and we are passing
// ownership to Linux. Also, uvm_va_block_cpu_page_populate() recorded
// the page as "mirrored" so that migrate_vma_finalize() and
// hmm_va_block_cpu_page_unpopulate() don't double free the page.
dst_pfns[page_index] = block_context->hmm.dst_pfns[page_index];
dst_page = migrate_pfn_to_page(dst_pfns[page_index]);
lock_page(dst_page);
dst_pfns[page_index] = migrate_pfn(page_to_pfn(dst_page));
return NV_OK;
}
@@ -2159,6 +2167,13 @@ static NV_STATUS migrate_alloc_on_cpu(uvm_va_block_t *va_block,
}
status = alloc_page_on_cpu(va_block, page_index, src_pfns, dst_pfns, same_devmem_page_mask, block_context);
if (status != NV_OK) {
// Try to migrate other pages if we can't allocate this one.
if (status != NV_ERR_NO_MEMORY)
break;
uvm_page_mask_clear(page_mask, page_index);
}
}
if (status != NV_OK)
@@ -2218,8 +2233,6 @@ static NV_STATUS uvm_hmm_devmem_fault_finalize_and_map(uvm_hmm_devmem_fault_cont
{
uvm_processor_id_t processor_id;
uvm_service_block_context_t *service_context;
uvm_perf_prefetch_hint_t *prefetch_hint;
uvm_va_block_retry_t *va_block_retry;
const unsigned long *src_pfns;
unsigned long *dst_pfns;
uvm_page_mask_t *page_mask;
@@ -2230,9 +2243,7 @@ static NV_STATUS uvm_hmm_devmem_fault_finalize_and_map(uvm_hmm_devmem_fault_cont
processor_id = devmem_fault_context->processor_id;
service_context = devmem_fault_context->service_context;
prefetch_hint = &service_context->prefetch_hint;
va_block = devmem_fault_context->va_block;
va_block_retry = devmem_fault_context->va_block_retry;
src_pfns = service_context->block_context->hmm.src_pfns;
dst_pfns = service_context->block_context->hmm.dst_pfns;
region = service_context->region;
@@ -2837,7 +2848,6 @@ static NV_STATUS uvm_hmm_gpu_fault_alloc_and_copy(struct vm_area_struct *vma,
uvm_va_block_t *va_block;
uvm_va_block_retry_t *va_block_retry;
uvm_service_block_context_t *service_context;
uvm_perf_prefetch_hint_t *prefetch_hint;
const unsigned long *src_pfns;
unsigned long *dst_pfns;
uvm_va_block_region_t region;
@@ -2850,7 +2860,6 @@ static NV_STATUS uvm_hmm_gpu_fault_alloc_and_copy(struct vm_area_struct *vma,
va_block_retry = uvm_hmm_gpu_fault_event->va_block_retry;
service_context = uvm_hmm_gpu_fault_event->service_context;
region = service_context->region;
prefetch_hint = &service_context->prefetch_hint;
src_pfns = service_context->block_context->hmm.src_pfns;
dst_pfns = service_context->block_context->hmm.dst_pfns;
@@ -2896,7 +2905,6 @@ static NV_STATUS uvm_hmm_gpu_fault_finalize_and_map(uvm_hmm_gpu_fault_event_t *u
uvm_processor_id_t processor_id;
uvm_processor_id_t new_residency;
uvm_va_block_t *va_block;
uvm_va_block_retry_t *va_block_retry;
uvm_service_block_context_t *service_context;
const unsigned long *src_pfns;
unsigned long *dst_pfns;
@@ -2908,7 +2916,6 @@ static NV_STATUS uvm_hmm_gpu_fault_finalize_and_map(uvm_hmm_gpu_fault_event_t *u
processor_id = uvm_hmm_gpu_fault_event->processor_id;
new_residency = uvm_hmm_gpu_fault_event->new_residency;
va_block = uvm_hmm_gpu_fault_event->va_block;
va_block_retry = uvm_hmm_gpu_fault_event->va_block_retry;
service_context = uvm_hmm_gpu_fault_event->service_context;
src_pfns = service_context->block_context->hmm.src_pfns;
dst_pfns = service_context->block_context->hmm.dst_pfns;
@@ -3117,7 +3124,6 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma,
static NV_STATUS uvm_hmm_migrate_finalize(uvm_hmm_migrate_event_t *uvm_hmm_migrate_event)
{
uvm_va_block_t *va_block;
uvm_va_block_retry_t *va_block_retry;
uvm_va_block_context_t *va_block_context;
uvm_va_block_region_t region;
uvm_processor_id_t dest_id;
@@ -3127,7 +3133,6 @@ static NV_STATUS uvm_hmm_migrate_finalize(uvm_hmm_migrate_event_t *uvm_hmm_migra
unsigned long *dst_pfns;
va_block = uvm_hmm_migrate_event->va_block;
va_block_retry = uvm_hmm_migrate_event->va_block_retry;
va_block_context = uvm_hmm_migrate_event->service_context->block_context;
region = uvm_hmm_migrate_event->region;
dest_id = uvm_hmm_migrate_event->dest_id;
@@ -3375,6 +3380,7 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
.region = region,
.dest_id = UVM_ID_CPU,
.cause = cause,
.same_devmem_page_mask = {},
};
uvm_page_mask_t *page_mask = &uvm_hmm_migrate_event.page_mask;
const uvm_va_policy_t *policy;
@@ -3391,7 +3397,7 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
// Note that there is no VMA available when evicting HMM pages.
va_block_context->hmm.vma = NULL;
uvm_page_mask_copy(page_mask, pages_to_evict);
uvm_page_mask_init_from_region(page_mask, region, pages_to_evict);
uvm_for_each_va_policy_in(policy, va_block, start, end, node, region) {
npages = uvm_va_block_region_num_pages(region);
@@ -3404,7 +3410,13 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
// TODO: Bug 3660922: Need to handle read duplication at some point.
UVM_ASSERT(uvm_page_mask_region_empty(cpu_resident_mask, region));
status = migrate_alloc_on_cpu(va_block, src_pfns, dst_pfns, region, page_mask, NULL, va_block_context);
status = migrate_alloc_on_cpu(va_block,
src_pfns,
dst_pfns,
region,
page_mask,
&uvm_hmm_migrate_event.same_devmem_page_mask,
va_block_context);
if (status != NV_OK)
goto err;
@@ -3430,6 +3442,13 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
migrate_device_finalize(src_pfns + region.first, dst_pfns + region.first, npages);
}
// TODO: Bug 5167764: Evictions can't handle partial migrations.
uvm_page_mask_init_from_region(&va_block_context->scratch_page_mask, region, pages_to_evict);
if (uvm_page_mask_andnot(&va_block_context->scratch_page_mask,
&va_block_context->scratch_page_mask,
page_mask))
return NV_WARN_MORE_PROCESSING_REQUIRED;
return NV_OK;
err:
@@ -3506,7 +3525,7 @@ NV_STATUS uvm_hmm_remote_cpu_fault(struct vm_fault *vmf)
args.vma = vmf->vma;
args.src = &src_pfn;
args.dst = &dst_pfn;
args.start = nv_page_fault_va(vmf);
args.start = vmf->address;
args.end = args.start + PAGE_SIZE;
args.pgmap_owner = &g_uvm_global;
args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
@@ -3540,6 +3559,7 @@ NV_STATUS uvm_hmm_remote_cpu_fault(struct vm_fault *vmf)
out:
if (status == NV_OK)
migrate_vma_pages(&args);
migrate_vma_finalize(&args);
return status;
@@ -3669,7 +3689,7 @@ NV_STATUS uvm_hmm_va_block_update_residency_info(uvm_va_block_t *va_block,
uvm_mutex_unlock(&va_block->lock);
uvm_hmm_migrate_finish(va_block);
return NV_OK;
return status;
}
NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params, struct file *filp)

View File

@@ -37,6 +37,7 @@ typedef struct
// This stores pointers to uvm_va_block_t for HMM blocks.
uvm_range_tree_t blocks;
uvm_mutex_t blocks_lock;
atomic64_t allocated_page_count;
} uvm_hmm_va_space_t;
#if UVM_IS_CONFIG_HMM()

Some files were not shown because too many files have changed in this diff Show More