Compare commits

..

3 Commits

Author SHA1 Message Date
Maneet Singh
2ccbad25e1 590.48.01 2025-12-18 09:16:33 -08:00
Maneet Singh
a5bfb10e75 590.44.01 2025-12-02 15:32:25 -08:00
Maneet Singh
2af9f1f0f7 580.105.08 2025-11-04 12:45:59 -08:00
971 changed files with 423839 additions and 408458 deletions

View File

@@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules,
version 580.94.11.
version 590.48.01.
## How to Build
@@ -17,7 +17,7 @@ as root:
Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
580.94.11 driver release. This can be achieved by installing
590.48.01 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,
@@ -185,7 +185,7 @@ table below).
For details on feature support and limitations, see the NVIDIA GPU driver
end user README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/580.94.11/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/590.48.01/README/kernel_open.html
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
Package for more details.
@@ -959,9 +959,13 @@ Subsystem Device ID.
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 10DE 204B |
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 17AA 204B |
| NVIDIA RTX PRO 5000 Blackwell | 2BB3 1028 204D |
| NVIDIA RTX PRO 5000 72GB Blackwell | 2BB3 1028 227A |
| NVIDIA RTX PRO 5000 Blackwell | 2BB3 103C 204D |
| NVIDIA RTX PRO 5000 72GB Blackwell | 2BB3 103C 227A |
| NVIDIA RTX PRO 5000 Blackwell | 2BB3 10DE 204D |
| NVIDIA RTX PRO 5000 72GB Blackwell | 2BB3 10DE 227A |
| NVIDIA RTX PRO 5000 Blackwell | 2BB3 17AA 204D |
| NVIDIA RTX PRO 5000 72GB Blackwell | 2BB3 17AA 227A |
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 1028 204C |
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 103C 204C |
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 10DE 204C |
@@ -1008,6 +1012,7 @@ Subsystem Device ID.
| NVIDIA RTX PRO 500 Blackwell Generation Laptop GPU | 2DB9 |
| NVIDIA GeForce RTX 5050 Laptop GPU | 2DD8 |
| NVIDIA RTX PRO 500 Blackwell Embedded GPU | 2DF9 |
| NVIDIA GB10 | 2E12 10DE 21EC |
| NVIDIA GeForce RTX 5070 | 2F04 |
| NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F18 |
| NVIDIA RTX PRO 3000 Blackwell Generation Laptop GPU | 2F38 |

View File

@@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
ccflags-y += -I$(src)
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
ccflags-y += -DNV_VERSION_STRING=\"580.94.11\"
ccflags-y += -DNV_VERSION_STRING=\"590.48.01\"
# Include and link Tegra out-of-tree modules.
ifneq ($(wildcard /usr/src/nvidia/nvidia-oot),)
@@ -187,6 +187,7 @@ NV_CONFTEST_CFLAGS += $(filter -std=%,$(KBUILD_CFLAGS))
NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
NV_CONFTEST_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types,)
NV_CONFTEST_CFLAGS += $(call cc-option,-fms-extensions,)
NV_CONFTEST_CFLAGS += -Wno-error
NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h

View File

@@ -0,0 +1,53 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef SBIOS_TABLE_VERSION_H
#define SBIOS_TABLE_VERSION_H
#define CONTROLLER_SBIOS_TABLE_VERSION_10 (0x10)
#define CONTROLLER_SBIOS_TABLE_VERSION_20 (0x20)
#define CONTROLLER_SBIOS_TABLE_VERSION_21 (0x21)
#define CONTROLLER_SBIOS_TABLE_VERSION_22 (0x22)
#define CONTROLLER_SBIOS_TABLE_VERSION_23 (0x23)
#define CONTROLLER_SBIOS_TABLE_VERSION_24 (0x24)
#define CONTROLLER_SBIOS_TABLE_VERSION_25 (0x25)
#define CONTROLLER_SBIOS_TABLE_MAX_ENTRIES (8)
// NOTE: When adding a new version, make sure to update MAX_VERSION accordingly.
#define CONTROLLER_SBIOS_TABLE_MAX_VERSION (0x25)
/*!
* Layout of Controller 2x data used for static config
*/
#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_20 (0x20)
#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_21 (0x21)
#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_22 (0x22)
#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_23 (0x23)
#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_24 (0x24)
#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_25 (0x25)
#define NVPCF_CONTROLLER_STATIC_TABLE_MAX_ENTRIES (8)
// NOTE: When adding a new version, make sure to update MAX_VERSION accordingly.
#define NVPCF_CONTROLLER_STATIC_TABLE_MAX_VERSION (0x25)
#endif // SBIOS_TABLE_VERSION_H

View File

@@ -24,7 +24,6 @@
#define __NV_HASH_H__
#include "conftest.h"
#include "nv-list-helpers.h"
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/hash.h>

View File

@@ -26,8 +26,10 @@
#define NV_IOCTL_H
#include <nv-ioctl-numbers.h>
#include <nv-ioctl-numa.h>
#include <nvtypes.h>
typedef struct {
NvU32 domain; /* PCI domain number */
NvU8 bus; /* PCI bus number */
@@ -113,7 +115,7 @@ typedef struct nv_ioctl_query_device_intr
{
NvU32 intrStatus NV_ALIGN_BYTES(4);
NvU32 status;
} nv_ioctl_query_device_intr;
} nv_ioctl_query_device_intr_t;
/* system parameters that the kernel driver may use for configuration */
typedef struct nv_ioctl_sys_params

View File

@@ -102,17 +102,6 @@
#include <linux/dma-buf.h>
#endif
#if defined(NV_DRM_AVAILABLE)
#include <drm/drm_device.h>
#include <drm/drm_drv.h>
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
#include <drm/drm_gem.h>
#endif /* NV_DRM_AVAILABLE */
/* task and signal-related items */
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
@@ -141,8 +130,6 @@
#include <asm/bitops.h> /* __set_bit() */
#include <linux/time.h> /* FD_SET() */
#include "nv-list-helpers.h"
/*
* Use current->cred->euid, instead of calling current_euid().
* The latter can pull in the GPL-only debug_lockdep_rcu_enabled()
@@ -274,14 +261,8 @@ extern int nv_pat_mode;
user_function, NULL, args)
#endif
#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL)
#define NV_CONFIG_PREEMPT_RT 1
#endif
#define NV_PAGE_COUNT(page) \
((unsigned int)page_count(page))
#define NV_GET_PAGE_FLAGS(page_ptr) \
(NV_GET_PAGE_STRUCT(page_ptr->phys_addr)->flags)
#if !defined(DEBUG) && defined(__GFP_NOWARN)
#define NV_GFP_KERNEL (GFP_KERNEL | __GFP_NOWARN)
@@ -298,9 +279,9 @@ extern int nv_pat_mode;
* such as Linux/x86-64; the alternative is to use an IOMMU such
* as the one implemented with the K8 GART, if available.
*/
#define NV_GFP_DMA32 (NV_GFP_KERNEL | GFP_DMA32)
#define NV_GFP_DMA32 (GFP_DMA32)
#else
#define NV_GFP_DMA32 (NV_GFP_KERNEL)
#define NV_GFP_DMA32 0
#endif
#if defined(NVCPU_AARCH64) || defined(NVCPU_RISCV64)
@@ -388,11 +369,7 @@ static inline void nv_vfree(void *ptr, NvU64 size)
static inline void *nv_ioremap(NvU64 phys, NvU64 size)
{
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_PRESENT)
void *ptr = ioremap_driver_hardened(phys, size);
#else
void *ptr = ioremap(phys, size);
#endif
NV_MEMDBG_ADD(ptr, size);
return ptr;
}
@@ -405,9 +382,7 @@ static inline void *nv_ioremap_nocache(NvU64 phys, NvU64 size)
static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
{
void *ptr = NULL;
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_CACHE_SHARED_PRESENT)
ptr = ioremap_cache_shared(phys, size);
#elif defined(NV_IOREMAP_CACHE_PRESENT)
#if defined(NV_IOREMAP_CACHE_PRESENT)
ptr = ioremap_cache(phys, size);
#else
return nv_ioremap(phys, size);
@@ -421,9 +396,7 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
{
void *ptr = NULL;
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT)
ptr = ioremap_driver_hardened_wc(phys, size);
#elif defined(NV_IOREMAP_WC_PRESENT)
#if defined(NV_IOREMAP_WC_PRESENT)
ptr = ioremap_wc(phys, size);
#else
return nv_ioremap_nocache(phys, size);
@@ -465,13 +438,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
NV_MEMDBG_ADD(ptr, size); \
}
#if defined(__GFP_RETRY_MAYFAIL)
#define NV_GFP_NO_OOM (NV_GFP_KERNEL | __GFP_RETRY_MAYFAIL)
#elif defined(__GFP_NORETRY)
#define NV_GFP_NO_OOM (NV_GFP_KERNEL | __GFP_NORETRY)
#else
#define NV_GFP_NO_OOM (NV_GFP_KERNEL)
#endif
#define NV_KMALLOC_NO_OOM(ptr, size) \
{ \
@@ -528,22 +495,12 @@ static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot)
#endif
#endif
#define NV_GET_CURRENT_PROCESS() current->tgid
#define NV_IN_ATOMIC() in_atomic()
#define NV_COPY_TO_USER(to, from, n) copy_to_user(to, from, n)
#define NV_COPY_FROM_USER(to, from, n) copy_from_user(to, from, n)
#define NV_IS_SUSER() capable(CAP_SYS_ADMIN)
#define NV_CLI() local_irq_disable()
#define NV_SAVE_FLAGS(eflags) local_save_flags(eflags)
#define NV_RESTORE_FLAGS(eflags) local_irq_restore(eflags)
#define NV_MAY_SLEEP() (!irqs_disabled() && !in_interrupt() && !NV_IN_ATOMIC())
#define NV_MAY_SLEEP() (!irqs_disabled() && !in_interrupt() && !in_atomic())
#define NV_MODULE_PARAMETER(x) module_param(x, int, 0)
#define NV_MODULE_STRING_PARAMETER(x) module_param(x, charp, 0)
#undef MODULE_PARM
#define NV_NUM_CPUS() num_possible_cpus()
#define NV_HAVE_MEMORY_ENCRYPT_DECRYPT 0
#if defined(NVCPU_X86_64) && \
@@ -596,7 +553,6 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
#endif
}
#define NV_GET_OFFSET_IN_PAGE(phys_page) offset_in_page(phys_page)
#define NV_GET_PAGE_STRUCT(phys_page) virt_to_page(__va(phys_page))
#define NV_VMA_PGOFF(vma) ((vma)->vm_pgoff)
#define NV_VMA_SIZE(vma) ((vma)->vm_end - (vma)->vm_start)
@@ -693,9 +649,9 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
#define NV_PRINT_AT(nv_debug_level,at) \
{ \
nv_printf(nv_debug_level, \
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, " \
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %lld, " \
"page_table = 0x%p\n", __FUNCTION__, __LINE__, at, \
at->num_pages, NV_ATOMIC_READ(at->usage_count), \
at->num_pages, (long long)atomic64_read(&at->usage_count), \
at->page_table); \
}
@@ -711,13 +667,6 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
# define minor(x) MINOR(x)
#endif
#if !defined(PCI_COMMAND_SERR)
#define PCI_COMMAND_SERR 0x100
#endif
#if !defined(PCI_COMMAND_INTX_DISABLE)
#define PCI_COMMAND_INTX_DISABLE 0x400
#endif
#ifndef PCI_CAP_ID_EXP
#define PCI_CAP_ID_EXP 0x10
#endif
@@ -970,7 +919,7 @@ struct nv_dma_buf
typedef struct nv_alloc_s {
struct nv_alloc_s *next;
struct device *dev;
atomic_t usage_count;
atomic64_t usage_count;
struct {
NvBool contig : 1;
NvBool guest : 1;
@@ -983,6 +932,7 @@ typedef struct nv_alloc_s {
NvBool unencrypted : 1;
NvBool coherent : 1;
NvBool carveout : 1;
NvBool pool : 1;
} flags;
unsigned int cache_type;
unsigned int num_pages;
@@ -1143,14 +1093,18 @@ typedef struct nv_dma_map_s {
i++, sm = &dm->mapping.discontig.submaps[i])
/*
* On 4K ARM kernels, use max submap size a multiple of 64K to keep nv-p2p happy.
* Despite 4K OS pages, we still use 64K P2P pages due to dependent modules still using 64K.
* Instead of using (4G-4K), use max submap size as (4G-64K) since the mapped IOVA range
* must be aligned at 64K boundary.
* On 4K ARM kernels, use max submap size a multiple of 2M to avoid breaking up 2M page size
* sysmem allocations.
*
* Instead of using (4G-4K), use max submap size as (4G-2M) since the mapped IOVA range
* must be aligned at 2M boundary.
*
* Bug 5401803: Tracks migrating away from making IOMMU mappings using submaps in favor of
* using sg_chain() to chain a single large scatterlist.
*/
#if defined(CONFIG_ARM64_4K_PAGES)
#define NV_DMA_U32_MAX_4K_PAGES ((NvU32)((NV_U32_MAX >> PAGE_SHIFT) + 1))
#define NV_DMA_SUBMAP_MAX_PAGES ((NvU32)(NV_DMA_U32_MAX_4K_PAGES - 16))
#define NV_DMA_SUBMAP_MAX_PAGES ((NvU32)(NV_DMA_U32_MAX_4K_PAGES - 512))
#else
#define NV_DMA_SUBMAP_MAX_PAGES ((NvU32)(NV_U32_MAX >> PAGE_SHIFT))
#endif
@@ -1294,7 +1248,8 @@ struct nv_pci_tegra_devfreq_dev;
typedef struct nv_linux_state_s {
nv_state_t nv_state;
atomic_t usage_count;
atomic64_t usage_count;
NvU32 suspend_count;
struct device *dev;
@@ -1470,6 +1425,8 @@ typedef struct nv_linux_state_s {
int (*devfreq_suspend)(struct device *dev);
int (*devfreq_resume)(struct device *dev);
int (*devfreq_enable_boost)(struct device *dev, unsigned int duration);
int (*devfreq_disable_boost)(struct device *dev);
#endif
} nv_linux_state_t;
@@ -1640,6 +1597,7 @@ extern NvU32 NVreg_EnableUserNUMAManagement;
extern NvU32 NVreg_RegisterPCIDriver;
extern NvU32 NVreg_RegisterPlatformDeviceDriver;
extern NvU32 NVreg_EnableResizableBar;
extern NvU32 NVreg_TegraGpuPgMask;
extern NvU32 NVreg_EnableNonblockingOpen;
extern NvU32 num_probed_nv_devices;
@@ -1669,9 +1627,9 @@ static inline NvBool nv_alloc_release(nv_linux_file_private_t *nvlfp, nv_alloc_t
{
NV_PRINT_AT(NV_DBG_MEMINFO, at);
if (NV_ATOMIC_DEC_AND_TEST(at->usage_count))
if (atomic64_dec_and_test(&at->usage_count))
{
NV_ATOMIC_INC(at->usage_count);
atomic64_inc(&at->usage_count);
at->next = nvlfp->free_list;
nvlfp->free_list = at;
@@ -1697,10 +1655,7 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
return flags;
}
#define MODULE_BASE_NAME "nvidia"
#define MODULE_INSTANCE_NUMBER 0
#define MODULE_INSTANCE_STRING ""
#define MODULE_NAME MODULE_BASE_NAME MODULE_INSTANCE_STRING
#define MODULE_NAME "nvidia"
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32, const char*);
NV_STATUS nv_imp_icc_get(nv_state_t *nv);

View File

@@ -26,45 +26,12 @@
#include <linux/list.h>
#include "conftest.h"
/*
* list_first_entry_or_null added by commit 6d7581e62f8b ("list: introduce
* list_first_entry_or_null") in v3.10 (2013-05-29).
*/
#if !defined(list_first_entry_or_null)
#define list_first_entry_or_null(ptr, type, member) \
(!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL)
#endif
/*
* Added by commit 93be3c2eb337 ("list: introduce list_last_entry(), use
* list_{first,last}_entry()") in v3.13 (2013-11-12).
*/
#if !defined(list_last_entry)
#define list_last_entry(ptr, type, member) \
list_entry((ptr)->prev, type, member)
#endif
/* list_last_entry_or_null() doesn't actually exist in the kernel */
#if !defined(list_last_entry_or_null)
#define list_last_entry_or_null(ptr, type, member) \
(!list_empty(ptr) ? list_last_entry(ptr, type, member) : NULL)
#endif
/*
* list_prev_entry() and list_next_entry added by commit 008208c6b26f
* ("list: introduce list_next_entry() and list_prev_entry()") in
* v3.13 (2013-11-12).
*/
#if !defined(list_prev_entry)
#define list_prev_entry(pos, member) \
list_entry((pos)->member.prev, typeof(*(pos)), member)
#endif
#if !defined(list_next_entry)
#define list_next_entry(pos, member) \
list_entry((pos)->member.next, typeof(*(pos)), member)
#endif
#if !defined(NV_LIST_IS_FIRST_PRESENT)
static inline int list_is_first(const struct list_head *list,
const struct list_head *head)

View File

@@ -32,18 +32,6 @@
#include <linux/semaphore.h>
#include <linux/sched/signal.h> /* signal_pending */
#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL)
typedef raw_spinlock_t nv_spinlock_t;
#define NV_DEFINE_SPINLOCK(lock) DEFINE_RAW_SPINLOCK(lock)
#define NV_SPIN_LOCK_INIT(lock) raw_spin_lock_init(lock)
#define NV_SPIN_LOCK_IRQ(lock) raw_spin_lock_irq(lock)
#define NV_SPIN_UNLOCK_IRQ(lock) raw_spin_unlock_irq(lock)
#define NV_SPIN_LOCK_IRQSAVE(lock,flags) raw_spin_lock_irqsave(lock,flags)
#define NV_SPIN_UNLOCK_IRQRESTORE(lock,flags) raw_spin_unlock_irqrestore(lock,flags)
#define NV_SPIN_LOCK(lock) raw_spin_lock(lock)
#define NV_SPIN_UNLOCK(lock) raw_spin_unlock(lock)
#define NV_SPIN_UNLOCK_WAIT(lock) raw_spin_unlock_wait(lock)
#else
typedef spinlock_t nv_spinlock_t;
#define NV_DEFINE_SPINLOCK(lock) DEFINE_SPINLOCK(lock)
#define NV_SPIN_LOCK_INIT(lock) spin_lock_init(lock)
@@ -54,7 +42,6 @@ typedef spinlock_t nv_spinlock_t;
#define NV_SPIN_LOCK(lock) spin_lock(lock)
#define NV_SPIN_UNLOCK(lock) spin_unlock(lock)
#define NV_SPIN_UNLOCK_WAIT(lock) spin_unlock_wait(lock)
#endif
#define NV_INIT_MUTEX(mutex) sema_init(mutex, 1)

View File

@@ -196,14 +196,33 @@ static inline struct rw_semaphore *nv_mmap_get_lock(struct mm_struct *mm)
* Commit 45ad9f5290dc updated vma_start_write() to call __vma_start_write().
*/
void nv_vma_start_write(struct vm_area_struct *);
static inline void nv_vma_flags_set_word(struct vm_area_struct *vma, unsigned long flags)
{
nv_vma_start_write(vma);
#if defined(NV_VMA_FLAGS_SET_WORD_PRESENT)
vma_flags_set_word(&vma->flags, flags);
#else
ACCESS_PRIVATE(vma, __vm_flags) |= flags;
#endif
}
static inline void nv_vma_flags_clear_word(struct vm_area_struct *vma, unsigned long flags)
{
nv_vma_start_write(vma);
#if defined(NV_VMA_FLAGS_SET_WORD_PRESENT)
vma_flags_clear_word(&vma->flags, flags);
#else
ACCESS_PRIVATE(vma, __vm_flags) &= ~flags;
#endif
}
#endif // !NV_CAN_CALL_VMA_START_WRITE
static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
{
#if !NV_CAN_CALL_VMA_START_WRITE
nv_vma_start_write(vma);
ACCESS_PRIVATE(vma, __vm_flags) |= flags;
#elif defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
nv_vma_flags_set_word(vma, flags);
#elif defined(NV_VM_FLAGS_SET_PRESENT)
vm_flags_set(vma, flags);
#else
vma->vm_flags |= flags;
@@ -213,9 +232,8 @@ static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
static inline void nv_vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags)
{
#if !NV_CAN_CALL_VMA_START_WRITE
nv_vma_start_write(vma);
ACCESS_PRIVATE(vma, __vm_flags) &= ~flags;
#elif defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
nv_vma_flags_clear_word(vma, flags);
#elif defined(NV_VM_FLAGS_SET_PRESENT)
vm_flags_clear(vma, flags);
#else
vma->vm_flags &= ~flags;

View File

@@ -47,9 +47,6 @@ void NV_API_CALL nv_init_msi (nv_state_t *);
void NV_API_CALL nv_init_msix (nv_state_t *);
NvS32 NV_API_CALL nv_request_msix_irq (nv_linux_state_t *);
#define NV_PCI_MSIX_FLAGS 2
#define NV_PCI_MSIX_FLAGS_QSIZE 0x7FF
static inline void nv_free_msix_irq(nv_linux_state_t *nvl)
{
int i;
@@ -67,17 +64,8 @@ static inline int nv_get_max_irq(struct pci_dev *pci_dev)
NvU16 ctrl;
cap_ptr = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
/*
* The 'PCI_MSIX_FLAGS' was added in 2.6.21-rc3 by:
* 2007-03-05 f5f2b13129a6541debf8851bae843cbbf48298b7
*/
#if defined(PCI_MSIX_FLAGS)
pci_read_config_word(pci_dev, cap_ptr + PCI_MSIX_FLAGS, &ctrl);
nvec = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1;
#else
pci_read_config_word(pci_dev, cap_ptr + NV_PCI_MSIX_FLAGS, &ctrl);
nvec = (ctrl & NV_PCI_MSIX_FLAGS_QSIZE) + 1;
#endif
return nvec;
}

View File

@@ -73,31 +73,22 @@ extern NvBool nvos_is_chipset_io_coherent(void);
#define NV_PGPROT_UNCACHED_DEVICE(old_prot) pgprot_noncached(old_prot)
#if defined(NVCPU_AARCH64)
#define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \
PTE_ATTRINDX(MT_DEVICE_nGnRE))
#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot) \
__pgprot_modify(old_prot, PTE_ATTRINDX_MASK, NV_PROT_WRITE_COMBINED_DEVICE)
#define NV_PGPROT_WRITE_COMBINED(old_prot) NV_PGPROT_UNCACHED(old_prot)
#define NV_PGPROT_READ_ONLY(old_prot) \
__pgprot_modify(old_prot, 0, PTE_RDONLY)
#elif defined(NVCPU_X86_64)
#define NV_PGPROT_UNCACHED_WEAK(old_prot) pgprot_noncached_weak(old_prot)
#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot) \
pgprot_modify_writecombine(old_prot)
#define NV_PGPROT_WRITE_COMBINED(old_prot) \
NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot)
pgprot_modify_writecombine(old_prot)
#define NV_PGPROT_READ_ONLY(old_prot) \
__pgprot(pgprot_val((old_prot)) & ~_PAGE_RW)
#elif defined(NVCPU_RISCV64)
#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot) \
#define NV_PGPROT_WRITE_COMBINED(old_prot) \
pgprot_writecombine(old_prot)
/* Don't attempt to mark sysmem pages as write combined on riscv */
#define NV_PGPROT_WRITE_COMBINED(old_prot) old_prot
#define NV_PGPROT_READ_ONLY(old_prot) \
__pgprot(pgprot_val((old_prot)) & ~_PAGE_WRITE)
#else
/* Writecombine is not supported */
#undef NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot)
#undef NV_PGPROT_WRITE_COMBINED(old_prot)
#define NV_PGPROT_READ_ONLY(old_prot)
#endif

View File

@@ -57,8 +57,6 @@ int nv_uvm_init (void);
void nv_uvm_exit (void);
NV_STATUS nv_uvm_suspend (void);
NV_STATUS nv_uvm_resume (void);
void nv_uvm_notify_start_device (const NvU8 *uuid);
void nv_uvm_notify_stop_device (const NvU8 *uuid);
NV_STATUS nv_uvm_event_interrupt (const NvU8 *uuid);
NV_STATUS nv_uvm_drain_P2P (const NvU8 *uuid);
NV_STATUS nv_uvm_resume_P2P (const NvU8 *uuid);

View File

@@ -36,6 +36,19 @@
#define NV_MAX_ISR_DELAY_MS (NV_MAX_ISR_DELAY_US / 1000)
#define NV_NSECS_TO_JIFFIES(nsec) ((nsec) * HZ / 1000000000)
/*
* in_hardirq() was added in v5.11-rc1 (2020-12-15) to replace in_irq().
* Fall back to in_irq() for older kernels that don't have in_hardirq().
*/
static inline NvBool nv_in_hardirq(void)
{
#if defined(in_hardirq)
return in_hardirq();
#else
return in_irq();
#endif
}
#if !defined(NV_KTIME_GET_RAW_TS64_PRESENT)
static inline void ktime_get_raw_ts64(struct timespec64 *ts64)
{
@@ -82,7 +95,7 @@ static inline NV_STATUS nv_sleep_us(unsigned int us)
ktime_get_raw_ts64(&tm1);
#endif
if (in_irq() && (us > NV_MAX_ISR_DELAY_US))
if (nv_in_hardirq() && (us > NV_MAX_ISR_DELAY_US))
return NV_ERR_GENERIC;
mdelay_safe_msec = us / 1000;
@@ -127,7 +140,7 @@ static inline NV_STATUS nv_sleep_ms(unsigned int ms)
tm_start = tm_aux;
#endif
if (in_irq() && (ms > NV_MAX_ISR_DELAY_MS))
if (nv_in_hardirq() && (ms > NV_MAX_ISR_DELAY_MS))
{
return NV_ERR_GENERIC;
}

View File

@@ -86,6 +86,8 @@ extern const NvBool nv_is_rm_firmware_supported_os;
#define NV_RM_DEVICE_INTR_ADDRESS 0x100
#define NV_TEGRA_PCI_IGPU_PG_MASK_DEFAULT 0xFFFFFFFF
/*
* Clock domain identifier, which is used for fetching the engine
* load backed by the specified clock domain for Tegra platforms
@@ -413,6 +415,7 @@ typedef struct nv_soc_irq_info_s {
#define NV_MAX_SOC_IRQS 10
#define NV_MAX_DPAUX_NUM_DEVICES 4
#define NV_MAX_DPAUX_DEV_NAME_SIZE 10
#define NV_MAX_SOC_DPAUX_NUM_DEVICES 4
@@ -429,6 +432,12 @@ typedef struct nv_phys_addr_range
NvU64 len;
} nv_phys_addr_range_t;
typedef struct
{
char vbios_version[15];
char firmware_version[64];
} nv_cached_gpu_info_t;
typedef struct nv_state_t
{
void *priv; /* private data */
@@ -465,6 +474,7 @@ typedef struct nv_state_t
NvU32 num_dpaux_instance;
NvU32 interrupt_line;
NvU32 dpaux_irqs[NV_MAX_DPAUX_NUM_DEVICES];
char dpaux_devname[NV_MAX_DPAUX_NUM_DEVICES][NV_MAX_DPAUX_DEV_NAME_SIZE];
nv_soc_irq_info_t soc_irq_info[NV_MAX_SOC_IRQS];
NvS32 current_soc_irq;
NvU32 num_soc_irqs;
@@ -481,6 +491,7 @@ typedef struct nv_state_t
NvBool is_tegra_pci_igpu;
NvBool supports_tegra_igpu_rg;
NvBool is_tegra_pci_igpu_rg_enabled;
NvU32 tegra_pci_igpu_pg_mask;
NvBool primary_vga;
@@ -588,8 +599,12 @@ typedef struct nv_state_t
/* Console is managed by drm drivers or NVKMS */
NvBool client_managed_console;
/* Bool to check if power management is unsupported */
/* Struct to cache the gpu info details */
nv_cached_gpu_info_t cached_gpu_info;
/* Bool to check if power management is supported */
NvBool is_pm_unsupported;
} nv_state_t;
#define NVFP_TYPE_NONE 0x0
@@ -651,7 +666,7 @@ typedef struct UvmGpuPagingChannelInfo_tag *nvgpuPagingChannelInfo_t;
typedef enum UvmPmaGpuMemoryType_tag nvgpuGpuMemoryType_t;
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU64, NvU64 *, NvU32, NvU64, NvU64, nvgpuGpuMemoryType_t);
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemoryType_t);
typedef struct UvmGpuAccessBitsBufferAlloc_tag *nvgpuAccessBitBufferAlloc_t;
/*
* flags
*/
@@ -988,6 +1003,7 @@ NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
void NV_API_CALL nv_get_updated_emu_seg(NvU32 *start, NvU32 *end);
void NV_API_CALL nv_get_screen_info(nv_state_t *, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64 *);
void NV_API_CALL nv_set_gpu_pg_mask(nv_state_t *);
struct dma_buf;
typedef struct nv_dma_buf nv_dma_buf_t;
@@ -1107,16 +1123,15 @@ NvBool NV_API_CALL rm_isr (nvidia_stack_t *, nv_state_t *
void NV_API_CALL rm_isr_bh (nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_isr_bh_unlocked (nvidia_stack_t *, nv_state_t *);
NvBool NV_API_CALL rm_is_msix_allowed (nvidia_stack_t *, nv_state_t *);
NvBool NV_API_CALL rm_wait_for_bar_firewall (nvidia_stack_t *, NvU32 domain, NvU8 bus, NvU8 device, NvU8 function, NvU16 devId, NvU16 subsystemId);
NV_STATUS NV_API_CALL rm_pmu_perfmon_get_load (nvidia_stack_t *, nv_state_t *, NvU32 *, TEGRASOC_DEVFREQ_CLK);
NV_STATUS NV_API_CALL rm_power_management (nvidia_stack_t *, nv_state_t *, nv_pm_action_t);
NV_STATUS NV_API_CALL rm_stop_user_channels (nvidia_stack_t *, nv_state_t *);
NV_STATUS NV_API_CALL rm_restart_user_channels (nvidia_stack_t *, nv_state_t *);
NV_STATUS NV_API_CALL rm_save_low_res_mode (nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_get_vbios_version (nvidia_stack_t *, nv_state_t *, char *);
char* NV_API_CALL rm_get_gpu_uuid (nvidia_stack_t *, nv_state_t *);
const NvU8* NV_API_CALL rm_get_gpu_uuid_raw (nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_set_rm_firmware_requested(nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_get_firmware_version (nvidia_stack_t *, nv_state_t *, char *, NvLength);
void NV_API_CALL rm_cleanup_file_private (nvidia_stack_t *, nv_state_t *, nv_file_private_t *);
void NV_API_CALL rm_unbind_lock (nvidia_stack_t *, nv_state_t *);
NV_STATUS NV_API_CALL rm_read_registry_dword (nvidia_stack_t *, nv_state_t *, const char *, NvU32 *);

View File

@@ -33,6 +33,12 @@ typedef NvU32 MIGDeviceId;
#define NO_MIG_DEVICE 0L
/* Convert a MIGDeviceId into a 0-based per-GPU subdevice index. */
#define MIG_DEVICE_ID_SUBDEV_MASK 0xf0000000
#define MIG_DEVICE_ID_SUBDEV_SHIFT 28
#define MIG_DEVICE_ID_TO_SUBDEV(migDeviceId) (((migDeviceId) & MIG_DEVICE_ID_SUBDEV_MASK) >> MIG_DEVICE_ID_SUBDEV_SHIFT)
#ifdef __cplusplus
}
#endif

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2013-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -29,7 +29,7 @@
#define _NV_UVM_INTERFACE_H_
// Forward references, to break circular header file dependencies:
struct UvmOpsUvmEvents;
struct UvmEventsLinux;
#if defined(NVIDIA_UVM_ENABLED)
@@ -1008,6 +1008,65 @@ NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(UvmGpuFaultInfo *pFaultInfo,
*/
NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo,
NvBool bEnable);
/*******************************************************************************
nvUvmInterfaceAccessBitsBufAlloc
This function allocates a buffer for access bits.
Arguments:
device[IN] - Device handle associated with the gpu
pAccessBitsInfo[OUT] - Information provided by RM for access bits handling
Error codes:
NV_ERR_INVALID_ARGUMENT - If the parameter/s is invalid.
NV_ERR_NO_MEMORY - If the memory allocation fails.
NV_ERR_GENERIC - Unexpected error. We try hard to
avoid returning this error code,
because it is not very informative.
*/
NV_STATUS nvUvmInterfaceAccessBitsBufAlloc(uvmGpuDeviceHandle device,
UvmGpuAccessBitsBufferAlloc* pAccessBitsInfo);
/*******************************************************************************
nvUvmInterfaceAccessBitsBufFree
This function frees the buffer used for access bits.
Arguments:
device[IN] - Device handle associated with the gpu
pAccessBitsInfo[IN] - Information containing the access bits buffer handle to be freed
Error codes:
NV_ERR_INVALID_ARGUMENT - If the parameter/s is invalid.
NV_ERR_GENERIC - Unexpected error. We try hard to
avoid returning this error code,
because it is not very informative.
*/
NV_STATUS nvUvmInterfaceAccessBitsBufFree(uvmGpuDeviceHandle device,
UvmGpuAccessBitsBufferAlloc* pAccessBitsInfo);
/*******************************************************************************
nvUvmInterfaceAccessBitsDump
This function get the access bits information in accordance with the mode
requested and stores it in the buffer provided by the client.
Arguments:
device[IN] - Device handle associated with the gpu
pAccessBitsInfo[IN/OUT] - Information containing the access bits buffer
handle to be used for dumping the access bits
and the buffer where the dumped data will be stored
mode[IN] - Mode in which the access bits are dumped
Error codes:
NV_ERR_INVALID_ARGUMENT - If the parameter/s is invalid.
NV_ERR_GENERIC - Unexpected error. We try hard to
avoid returning this error code,
because it is not very informative.
*/
NV_STATUS nvUvmInterfaceAccessBitsDump(uvmGpuDeviceHandle device,
UvmGpuAccessBitsBufferAlloc* pAccessBitsInfo,
UVM_ACCESS_BITS_DUMP_MODE mode);
/*******************************************************************************
nvUvmInterfaceInitAccessCntrInfo
@@ -1086,20 +1145,20 @@ NV_STATUS nvUvmInterfaceDisableAccessCntr(uvmGpuDeviceHandle device,
UvmGpuAccessCntrInfo *pAccessCntrInfo);
//
// Called by the UVM driver to register operations with RM. Only one set of
// Called by the UVM driver to register event callbacks with RM. Only one set of
// callbacks can be registered by any driver at a time. If another set of
// callbacks was already registered, NV_ERR_IN_USE is returned.
//
NV_STATUS nvUvmInterfaceRegisterUvmCallbacks(struct UvmOpsUvmEvents *importedUvmOps);
NV_STATUS nvUvmInterfaceRegisterUvmEvents(struct UvmEventsLinux *importedEvents);
//
// Counterpart to nvUvmInterfaceRegisterUvmCallbacks. This must only be called
// if nvUvmInterfaceRegisterUvmCallbacks returned NV_OK.
// Counterpart to nvUvmInterfaceRegisterUvmEvents. This must only be called if
// nvUvmInterfaceRegisterUvmEvents returned NV_OK.
//
// Upon return, the caller is guaranteed that any outstanding callbacks are done
// and no new ones will be invoked.
//
void nvUvmInterfaceDeRegisterUvmOps(void);
void nvUvmInterfaceDeRegisterUvmEvents(void);
/*******************************************************************************
nvUvmInterfaceGetNvlinkInfo

View File

@@ -221,9 +221,11 @@ typedef struct UvmGpuChannelInstanceInfo_tag
// Ampere+ GPUs
volatile NvU32 *pChramChannelRegister;
// Out: Address of the Runlist PRI Base Register required to ring the
// doorbell after clearing the faulted bit.
volatile NvU32 *pRunlistPRIBaseRegister;
// Out: Address of the doorbell.
volatile NvU32 *workSubmissionOffset;
// Out: channel handle required to ring the doorbell.
NvU32 workSubmissionToken;
// Out: SMC engine id to which the GR channel is bound, or zero if the GPU
// does not support SMC or it is a CE channel
@@ -365,6 +367,9 @@ typedef struct
// True if the CE supports encryption
NvBool secure:1;
// True if the CE can be used for fast scrub
NvBool scrub:1;
// Mask of physical CEs assigned to this LCE
//
// The value returned by RM for this field may change when a GPU is
@@ -383,6 +388,7 @@ typedef enum
{
UVM_LINK_TYPE_NONE,
UVM_LINK_TYPE_PCIE,
UVM_LINK_TYPE_PCIE_BAR1,
UVM_LINK_TYPE_NVLINK_1,
UVM_LINK_TYPE_NVLINK_2,
UVM_LINK_TYPE_NVLINK_3,
@@ -539,6 +545,12 @@ typedef struct UvmGpuP2PCapsParams_tag
// Size is 0 if bar1 p2p is not supported.
NvU64 bar1DmaAddress[2];
NvU64 bar1DmaSize[2];
// True if GPU i can use PCIe atomics on locations in GPU[i-1]
// BAR1. This implies that GPU[i] can issue PCIe atomics,
// GPU[i-1] can accept PCIe atomics, and the bus interconnect
// between the two GPUs can correctly route PCIe atomics.
NvBool bar1PcieAtomics[2];
} UvmGpuP2PCapsParams;
// Platform-wide information
@@ -830,11 +842,7 @@ typedef NV_STATUS (*uvmEventServiceInterrupt_t) (void *pDeviceObject,
NV_OK if the UVM driver handled the interrupt
NV_ERR_NO_INTR_PENDING if the interrupt is not for the UVM driver
*/
#if defined (__linux__)
typedef NV_STATUS (*uvmEventIsrTopHalf_t) (const NvProcessorUuid *pGpuUuidStruct);
#else
typedef void (*uvmEventIsrTopHalf_t) (void);
#endif
/*******************************************************************************
uvmEventDrainP2P
@@ -871,20 +879,24 @@ typedef NV_STATUS (*uvmEventDrainP2P_t) (const NvProcessorUuid *pGpuUuidStruct);
*/
typedef NV_STATUS (*uvmEventResumeP2P_t) (const NvProcessorUuid *pGpuUuidStruct);
struct UvmOpsUvmEvents
struct UvmEventsLinux
{
uvmEventIsrTopHalf_t isrTopHalf;
uvmEventSuspend_t suspend;
uvmEventResume_t resume;
uvmEventDrainP2P_t drainP2P;
uvmEventResumeP2P_t resumeP2P;
};
struct UvmEventsWindows
{
uvmEventSuspend_t suspend;
uvmEventResume_t resume;
uvmEventStartDevice_t startDevice;
uvmEventStopDevice_t stopDevice;
uvmEventIsrTopHalf_t isrTopHalf;
uvmEventStopDevice_t stopDevice;
#if defined (_WIN32)
uvmEventWddmResetDuringTimeout_t wddmResetDuringTimeout;
uvmEventWddmRestartAfterTimeout_t wddmRestartAfterTimeout;
uvmEventServiceInterrupt_t serviceInterrupt;
#endif
uvmEventDrainP2P_t drainP2P;
uvmEventResumeP2P_t resumeP2P;
};
#define UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES 32
@@ -1043,6 +1055,22 @@ typedef struct UvmGpuAccessCntrConfig_tag
NvU32 threshold;
} UvmGpuAccessCntrConfig;
typedef enum
{
UVM_ACCESS_BITS_DUMP_MODE_AGGREGATE = 0,
UVM_ACCESS_BITS_DUMP_MODE_DIFF = 1,
UVM_ACCESS_BITS_DUMP_MODE_CURRENT = 2,
} UVM_ACCESS_BITS_DUMP_MODE;
typedef struct UvmGpuAccessBitsBufferAlloc_tag
{
NvHandle accessBitsBufferHandle;
NvBool bDirtyBits;
NvU32 granularity;
NV_DECLARE_ALIGNED(NvU64 enabledMask[64], 8);
NV_DECLARE_ALIGNED(NvU64 currentBits[64], 8);
} UvmGpuAccessBitsBufferAlloc;
//
// When modifying this enum, make sure they are compatible with the mirrored
// MEMORY_PROTECTION enum in phys_mem_allocator.h.
@@ -1080,6 +1108,7 @@ typedef UvmGpuPagingChannel *gpuPagingChannelHandle;
typedef UvmGpuPagingChannelInfo gpuPagingChannelInfo;
typedef UvmGpuPagingChannelAllocParams gpuPagingChannelAllocParams;
typedef UvmPmaAllocationOptions gpuPmaAllocationOptions;
typedef UvmGpuAccessBitsBufferAlloc gpuAccessBitsBufferAlloc;
typedef struct UvmCslIv
{

View File

@@ -142,6 +142,8 @@ typedef enum {
UvmGpuCompressionTypeCount = 2
} UvmGpuCompressionType;
#define UVM_PMA_MAX_LOCALIZED_REGION_COUNT 2
//
// Mirrored in PMA (PMA_STATS)
//
@@ -153,6 +155,9 @@ typedef struct UvmPmaStatistics_tag
volatile NvU64 numPages2mProtected; // PMA-wide 2MB pages count in protected memory
volatile NvU64 numFreePages64kProtected; // PMA-wide free 64KB page count in protected memory
volatile NvU64 numFreePages2mProtected; // PMA-wide free 2MB pages count in protected memory
volatile NvU64 numPages2mLocalizable[UVM_PMA_MAX_LOCALIZED_REGION_COUNT]; // Localizable free 64KB per-uGPU frame count
volatile NvU64 numFreePages64kLocalizable[UVM_PMA_MAX_LOCALIZED_REGION_COUNT]; // Localizable free 64KB per-uGPU frame count
volatile NvU64 numFreePages2mLocalizable[UVM_PMA_MAX_LOCALIZED_REGION_COUNT]; // Localizable free 64KB per-uGPU frame count
} UvmPmaStatistics;
typedef enum

View File

@@ -174,10 +174,7 @@ struct NvKmsKapiDeviceResourcesInfo {
NvBool supportsSyncpts;
NvBool requiresVrrSemaphores;
NvBool supportsInputColorRange;
NvBool supportsInputColorSpace;
NvBool contiguousPhysicalMappings;
} caps;
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
@@ -451,7 +448,6 @@ struct NvKmsKapiHeadReplyConfig {
struct NvKmsKapiModeSetReplyConfig {
enum NvKmsFlipResult flipResult;
NvBool vrrFlip;
NvS32 vrrSemaphoreIndex;
struct NvKmsKapiHeadReplyConfig
headReplyConfig[NVKMS_KAPI_MAX_HEADS];
};
@@ -1550,22 +1546,6 @@ struct NvKmsKapiFunctionsTable {
NvU32 semaphoreIndex
);
/*!
* Signal the VRR semaphore at the specified index from the CPU.
* If device does not support VRR semaphores, this is a no-op.
* Returns true if signal is success or no-op, otherwise returns false.
*
* \param [in] device A device allocated using allocateDevice().
*
* \param [in] index The VRR semaphore index to be signalled.
*/
NvBool
(*signalVrrSemaphore)
(
struct NvKmsKapiDevice *device,
NvS32 index
);
/*!
* Check or wait on a head's LUT notifier.
*

View File

@@ -213,6 +213,7 @@ int NV_API_CALL os_nv_cap_validate_and_dup_fd (const nv_cap_t *, int
void NV_API_CALL os_nv_cap_close_fd (int);
NvS32 NV_API_CALL os_imex_channel_get (NvU64);
NvS32 NV_API_CALL os_imex_channel_count (void);
NV_STATUS NV_API_CALL os_tegra_igpu_perf_boost (void *, NvBool, NvU32);
NV_STATUS NV_API_CALL os_get_tegra_platform (NvU32 *);
enum os_pci_req_atomics_type {

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -83,6 +83,9 @@ NV_STATUS NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvg
NV_STATUS NV_API_CALL rm_gpu_ops_own_access_cntr_intr(nvidia_stack_t *, nvgpuSessionHandle_t, nvgpuAccessCntrInfo_t, NvBool);
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, const nvgpuAccessCntrConfig_t *);
NV_STATUS NV_API_CALL rm_gpu_ops_disable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_access_bits_buffer_alloc(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessBitBufferAlloc_t);
NV_STATUS NV_API_CALL rm_gpu_ops_access_bits_buffer_free(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessBitBufferAlloc_t);
NV_STATUS NV_API_CALL rm_gpu_ops_access_bits_dump(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessBitBufferAlloc_t, UVM_ACCESS_BITS_DUMP_MODE);
NV_STATUS NV_API_CALL rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32, NvU64 *);
NV_STATUS NV_API_CALL rm_gpu_ops_unset_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_get_nvlink_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuNvlinkInfo_t);

View File

@@ -669,50 +669,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_IOREMAP_WC_PRESENT" "" "functions"
;;
ioremap_driver_hardened)
#
# Determine if the ioremap_driver_hardened() function is present.
# It does not exist on all architectures.
# TODO: Update the commit ID once the API is upstreamed.
#
CODE="
#include <asm/io.h>
void conftest_ioremap_driver_hardened(void) {
ioremap_driver_hardened();
}"
compile_check_conftest "$CODE" "NV_IOREMAP_DRIVER_HARDENED_PRESENT" "" "functions"
;;
ioremap_driver_hardened_wc)
#
# Determine if the ioremap_driver_hardened_wc() function is present.
# It does not exist on all architectures.
# TODO: Update the commit ID once the API is upstreamed.
#
CODE="
#include <asm/io.h>
void conftest_ioremap_driver_hardened_wc(void) {
ioremap_driver_hardened_wc();
}"
compile_check_conftest "$CODE" "NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT" "" "functions"
;;
ioremap_cache_shared)
#
# Determine if the ioremap_cache_shared() function is present.
# It does not exist on all architectures.
# TODO: Update the commit ID once the API is upstreamed.
#
CODE="
#include <asm/io.h>
void conftest_ioremap_cache_shared(void) {
ioremap_cache_shared();
}"
compile_check_conftest "$CODE" "NV_IOREMAP_CACHE_SHARED_PRESENT" "" "functions"
;;
dom0_kernel_present)
# Add config parameter if running on DOM0.
if [ -n "$VGX_BUILD" ]; then
@@ -1330,33 +1286,54 @@ compile_test() {
compile_check_conftest "$CODE" "NV_EVENTFD_SIGNAL_HAS_COUNTER_ARG" "" "types"
;;
drm_available)
# Determine if the DRM subsystem is usable
get_dev_pagemap_has_pgmap_arg)
#
# Determine if the get_dev_pagemap() function has an additional
# 'pgmap' argument.
#
# This argument was removed by commit dd57f5feb19a
# (mm/memremap: remove unused get_dev_pagemap() parameter)
# in linux-next, expected in v6.18.
#
CODE="
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
#include <linux/memremap.h>
#include <drm/drm_drv.h>
struct dev_pagemap *get_dev_pagemap_has_pgmap_arg(void) {
struct dev_pagemap *pgmap;
#if !defined(CONFIG_DRM) && !defined(CONFIG_DRM_MODULE) && !defined(__FreeBSD__)
#error DRM not enabled
#endif
void conftest_drm_available(void) {
struct drm_driver drv;
/* 2013-10-02 1bb72532ac260a2d3982b40bdd4c936d779d0d16 */
(void)drm_dev_alloc;
/* 2013-10-02 c22f0ace1926da399d9a16dfaf09174c1b03594c */
(void)drm_dev_register;
/* 2013-10-02 c3a49737ef7db0bdd4fcf6cf0b7140a883e32b2a */
(void)drm_dev_unregister;
get_dev_pagemap(0, pgmap);
}"
compile_check_conftest "$CODE" "NV_DRM_AVAILABLE" "" "generic"
compile_check_conftest "$CODE" "NV_GET_DEV_PAGEMAP_HAS_PGMAP_ARG" "" "types"
;;
drm_sysfs_connector_property_event)
#
# Determine if drm_sysfs_connector_property_event() is present.
#
# Commit 0cf8d292ba5e ("drm/sysfs: rename drm_sysfs_connector_status_event()")
# renamed drm_sysfs_connector_status_event() to
# drm_sysfs_connector_property_event() in Linux v6.5.
#
CODE="
#include <drm/drm_sysfs.h>
void conftest_drm_sysfs_connector_property_event(void) {
drm_sysfs_connector_property_event();
}"
compile_check_conftest "$CODE" "NV_DRM_SYSFS_CONNECTOR_PROPERTY_EVENT_PRESENT" "" "functions"
;;
drm_sysfs_connector_status_event)
#
# Determine if drm_sysfs_connector_status_event() is present.
#
#
CODE="
#include <drm/drm_sysfs.h>
void conftest_drm_sysfs_connector_status_event(void) {
drm_sysfs_connector_status_event();
}"
compile_check_conftest "$CODE" "NV_DRM_SYSFS_CONNECTOR_STATUS_EVENT_PRESENT" "" "functions"
;;
pde_data)
@@ -1437,71 +1414,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_VMF_INSERT_PFN_PROT_PRESENT" "" "functions"
;;
drm_atomic_available)
#
# Determine if the DRM atomic modesetting subsystem is usable
#
# Added by commit 036ef5733ba4
# ("drm/atomic: Allow drivers to subclass drm_atomic_state, v3") in
# v4.2 (2018-05-18).
#
# Make conftest more robust by adding test for
# drm_atomic_set_mode_prop_for_crtc(), this function added by
# commit 955f3c334f0f ("drm/atomic: Add MODE_ID property") in v4.2
# (2015-05-25). If the DRM atomic modesetting subsystem is
# back ported to Linux kernel older than v4.2, then commit
# 955f3c334f0f must be back ported in order to get NVIDIA-DRM KMS
# support.
# Commit 72fdb40c1a4b ("drm: extract drm_atomic_uapi.c") in v4.20
# (2018-09-05), moved drm_atomic_set_mode_prop_for_crtc() function
# prototype from drm/drm_atomic.h to drm/drm_atomic_uapi.h.
#
echo "$CONFTEST_PREAMBLE
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
#include <drm/drm_atomic.h>
#if !defined(CONFIG_DRM) && !defined(CONFIG_DRM_MODULE) && !defined(__FreeBSD__)
#error DRM not enabled
#endif
void conftest_drm_atomic_modeset_available(void) {
size_t a;
a = offsetof(struct drm_mode_config_funcs, atomic_state_alloc);
}" > conftest$$.c;
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "$CONFTEST_PREAMBLE
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
#include <drm/drm_atomic.h>
#if defined(NV_DRM_DRM_ATOMIC_UAPI_H_PRESENT)
#include <drm/drm_atomic_uapi.h>
#endif
void conftest_drm_atomic_set_mode_prop_for_crtc(void) {
drm_atomic_set_mode_prop_for_crtc();
}" > conftest$$.c;
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#undef NV_DRM_ATOMIC_MODESET_AVAILABLE" | append_conftest "generic"
else
echo "#define NV_DRM_ATOMIC_MODESET_AVAILABLE" | append_conftest "generic"
fi
else
echo "#undef NV_DRM_ATOMIC_MODESET_AVAILABLE" | append_conftest "generic"
fi
;;
drm_driver_has_legacy_dev_list)
#
# Determine if the 'drm_driver' structure has a 'legacy_dev_list' field.
@@ -2202,6 +2114,35 @@ compile_test() {
compile_check_conftest "$CODE" "NV_GET_BACKLIGHT_DEVICE_BY_NAME_PRESENT" "" "functions"
;;
dma_map_ops_has_map_phys)
#
# Determine if .map_phys exists in struct dma_map_ops.
#
# Commit 14cb413af00c ("dma-mapping: remove unused mapping resource callbacks")
# removed .map_resource operation and replaced it with .map_phys.
#
echo "$CONFTEST_PREAMBLE
#include <linux/dma-map-ops.h>
int conftest_dma_map_ops_has_map_phys(void) {
return offsetof(struct dma_map_ops, map_phys);
}
int conftest_dma_map_ops_has_unmap_phys(void) {
return offsetof(struct dma_map_ops, unmap_phys);
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
echo "#define NV_DMA_MAP_OPS_HAS_MAP_PHYS" | append_conftest "types"
rm -f conftest$$.o
return
else
echo "#undef NV_DMA_MAP_OPS_HAS_MAP_PHYS" | append_conftest "types"
return
fi
;;
dma_buf_ops_has_map)
#
# Determine if .map exists in dma_buf_ops.
@@ -2330,6 +2271,7 @@ compile_test() {
# drm_helper_mode_fill_fb_struct()") in linux-next
# (2025-07-16)
CODE="
#include <linux/stddef.h>
#include <drm/drm_modeset_helper.h>
void conftest_drm_fill_fb_struct_takes_format_info(void) {
@@ -2444,6 +2386,23 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PM_RUNTIME_AVAILABLE" "" "generic"
;;
pm_domain_available)
#
# Determine whether dev_pm_genpd_suspend() exists.
#
# This was added to the kernel in commit fc51989062138
# ("PM: domains: Rename pm_genpd_syscore_poweroff|poweron()")
# in v5.11-rc1 (2020-11-10),
#
CODE="
#include <linux/pm_domain.h>
void pm_domain_conftest(void) {
dev_pm_genpd_suspend();
}"
compile_check_conftest "$CODE" "NV_PM_DOMAIN_AVAILABLE" "" "functions"
;;
dma_direct_map_resource)
#
# Determine whether dma_is_direct() exists.
@@ -2617,31 +2576,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS" "" "types"
;;
drm_format_num_planes)
#
# Determine if drm_format_num_planes() function is present.
#
# The drm_format_num_planes() function was added by commit
# d0d110e09629 drm: Add drm_format_num_planes() utility function in
# v3.3 (2011-12-20). Prototype was moved from drm_crtc.h to
# drm_fourcc.h by commit ae4df11a0f53 (drm: Move format-related
# helpers to drm_fourcc.c) in v4.8 (2016-06-09).
# drm_format_num_planes() has been removed by commit 05c452c115bf
# (drm: Remove users of drm_format_num_planes) removed v5.3
# (2019-05-16).
#
CODE="
#include <drm/drm_crtc.h>
#include <drm/drm_fourcc.h>
void conftest_drm_format_num_planes(void) {
drm_format_num_planes();
}
"
compile_check_conftest "$CODE" "NV_DRM_FORMAT_NUM_PLANES_PRESENT" "" "functions"
;;
drm_gem_object_has_resv)
#
# Determine if the 'drm_gem_object' structure has a 'resv' field.
@@ -3712,6 +3646,90 @@ compile_test() {
compile_check_conftest "$CODE" "NV_DEVM_CLK_BULK_GET_ALL_PRESENT" "" "functions"
;;
thermal_zone_for_each_trip)
#
# Determine if thermal_zone_for_each_trip() function is present
#
# Added by commit a56cc0a83385 ("thermal: core: Add function to
# walk trips under zone lock") in v6.6-rc3
#
CODE="
#include <linux/thermal.h>
void conftest_thermal_zone_for_each_trip(void)
{
thermal_zone_for_each_trip();
}
"
compile_check_conftest "$CODE" "NV_THERMAL_ZONE_FOR_EACH_TRIP_PRESENT" "" "functions"
;;
thermal_bind_cdev_to_trip)
#
# Determine if thermal_bind_cdev_to_trip() function is present
#
# Added by commit d069ed6b752f ("thermal: core: Allow trip
# pointers to be used for cooling device binding") in v6.6-rc3
#
CODE="
#include <linux/thermal.h>
void conftest_thermal_bind_cdev_to_trip(void)
{
thermal_bind_cdev_to_trip();
}
"
compile_check_conftest "$CODE" "NV_THERMAL_BIND_CDEV_TO_TRIP_PRESENT" "" "functions"
;;
thermal_unbind_cdev_from_trip)
#
# Determine if thermal_unbind_cdev_from_trip() function is present
#
# Added by commit d069ed6b752f ("thermal: core: Allow trip
# pointers to be used for cooling device binding") in v6.6-rc3
#
CODE="
#include <linux/thermal.h>
void conftest_thermal_unbind_cdev_from_trip(void)
{
thermal_unbind_cdev_from_trip();
}
"
compile_check_conftest "$CODE" "NV_THERMAL_UNBIND_CDEV_FROM_TRIP_PRESENT" "" "functions"
;;
update_devfreq)
#
# Determine if update_devfreq() function is present
#
# Added by commit b596d895fa29 ("PM / devfreq: Make update_devfreq()
# public") in v4.20
#
CODE="
#include <linux/devfreq.h>
void conftest_update_devfreq(void)
{
update_devfreq();
}
"
compile_check_conftest "$CODE" "NV_UPDATE_DEVFREQ_PRESENT" "" "functions"
;;
devfreq_dev_profile_has_is_cooling_device)
#
# Determine if the 'devfreq_dev_profile' structure has 'is_cooling_device'
#
# Added by commit 1224451bb6f93 ("PM / devfreq: Register devfreq as a cooling device
# on demand") in v5.12-rc1
#
CODE="
#include <linux/devfreq.h>
int conftest_devfreq_dev_profile_has_is_cooling_device(void) {
return offsetof(struct devfreq_dev_profile, is_cooling_device);
}
"
compile_check_conftest "$CODE" "NV_DEVFREQ_DEV_PROFILE_HAS_IS_COOLING_DEVICE" "" "types"
;;
devfreq_has_freq_table)
#
# Determine if the 'devfreq' structure has 'freq_table'
@@ -3729,6 +3747,38 @@ compile_test() {
compile_check_conftest "$CODE" "NV_DEVFREQ_HAS_FREQ_TABLE" "" "types"
;;
devfreq_has_suspend_freq)
#
# Determine if the 'devfreq' structure has 'suspend_freq'
#
# Commit 83f8ca45afbf ("PM / devfreq: add support for
# suspend/resume of a devfreq device") updated the devfreq
# and add the suspend_freq field in v5.0.
#
CODE="
#include <linux/devfreq.h>
int conftest_devfreq_has_suspend_freq(void) {
return offsetof(struct devfreq, suspend_freq);
}
"
compile_check_conftest "$CODE" "NV_DEVFREQ_HAS_SUSPEND_FREQ" "" "types"
;;
bpmp_mrq_has_strap_set)
#
# Determine if STRAP_SET is present in the bpmp MRQ ABI.
#
# STRAP_SET was added by commit 4bef358c9071 ("soc/tegra:
#bpmp: Update ABI header") in v5.0.
#
CODE="
#include <stdint.h>
#include <soc/tegra/bpmp-abi.h>
int bpmp_mrq_has_strap = STRAP_SET;
"
compile_check_conftest "$CODE" "NV_BPMP_MRQ_HAS_STRAP_SET" "" "types"
;;
dma_resv_add_fence)
#
# Determine if the dma_resv_add_fence() function is present.
@@ -3917,6 +3967,27 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PCI_REBAR_GET_POSSIBLE_SIZES_PRESENT" "" "functions"
;;
pci_resize_resource_has_exclude_bars_arg)
#
# Determine if pci_resize_resource() has exclude_bars argument.
#
# exclude_bars argument was added to pci_resize_resource by commit
# 337b1b566db0 (11/14/2025) ("PCI: Fix restoring BARs on BAR resize rollback path")
# in linux-next.
#
CODE="
#include <linux/pci.h>
typeof(pci_resize_resource) conftest_pci_resize_resource_has_exclude_bars_arg;
int __must_check conftest_pci_resize_resource_has_exclude_bars_arg(struct pci_dev *dev,
int i, int size,
int exclude_bars) {
return 0;
}"
compile_check_conftest "$CODE" "NV_PCI_RESIZE_RESOURCE_HAS_EXCLUDE_BARS_ARG" "" "types"
;;
drm_connector_has_override_edid)
#
# Determine if 'struct drm_connector' has an 'override_edid' member.
@@ -3955,22 +4026,39 @@ compile_test() {
compile_check_conftest "$CODE" "NV_IOMMU_SVA_BIND_DEVICE_HAS_DRVDATA_ARG" "" "types"
;;
vm_area_struct_has_const_vm_flags)
vm_flags_set)
#
# Determine if the 'vm_area_struct' structure has
# const 'vm_flags'.
# Determine if the vm_flags_set() function is present. The
# presence of this function indicates that the vm_flags_clear()
# function is also present.
#
# A union of '__vm_flags' and 'const vm_flags' was added by
# The functions vm_flags_set()/ vm_flags_clear() were added by
# commit bc292ab00f6c ("mm: introduce vma->vm_flags wrapper
# functions") in v6.3.
# functions") in v6.3-rc1 (2023-02-09).
#
CODE="
#include <linux/mm_types.h>
int conftest_vm_area_struct_has_const_vm_flags(void) {
return offsetof(struct vm_area_struct, __vm_flags);
#include <linux/mm.h>
void conftest_vm_flags_set(void) {
vm_flags_set();
}"
compile_check_conftest "$CODE" "NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS" "" "types"
compile_check_conftest "$CODE" "NV_VM_FLAGS_SET_PRESENT" "" "functions"
;;
vma_flags_set_word)
#
# Determine if the vma_flags_set_word() function is present.
#
# Added by commit c3f7c506e8f1 ("mm: introduce VMA flags bitmap type")
# in v6.19-rc1.
#
CODE="
#include <linux/mm.h>
void conftest_vma_flags_set_word(void) {
vma_flags_set_word();
}"
compile_check_conftest "$CODE" "NV_VMA_FLAGS_SET_WORD_PRESENT" "" "functions"
;;
drm_driver_has_dumb_destroy)
@@ -4856,6 +4944,46 @@ compile_test() {
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG" "" "types"
;;
register_shrinker_has_format_arg)
# TODO:desc
# Determine if the 'mode' pointer argument is const in
# drm_connector_helper_funcs::mode_valid.
#
# The 'mode' pointer argument in
# drm_connector_helper_funcs::mode_valid was made const by commit
# 26d6fd81916e ("drm/connector: make mode_valid take a const struct
# drm_display_mode") in linux-next, expected in v6.15.
#
CODE="
#include <linux/mm.h>
void conftest_register_shrinker_has_format_arg(void) {
register_shrinker(NULL, \"%d\", 0);
}"
compile_check_conftest "$CODE" "NV_REGISTER_SHRINKER_HAS_FMT_ARG" "" "types"
;;
shrinker_alloc)
# TODO:desc
# Determine if the 'mode' pointer argument is const in
# drm_connector_helper_funcs::mode_valid.
#
# The 'mode' pointer argument in
# drm_connector_helper_funcs::mode_valid was made const by commit
# 26d6fd81916e ("drm/connector: make mode_valid take a const struct
# drm_display_mode") in linux-next, expected in v6.15.
#
CODE="
#include <linux/mm.h>
void conftest_shrinker_alloc(void) {
shrinker_alloc();
}"
compile_check_conftest "$CODE" "NV_SHRINKER_ALLOC_PRESENT" "" "functions"
;;
memory_device_coherent_present)
#
# Determine if MEMORY_DEVICE_COHERENT support is present or not

View File

@@ -2,6 +2,9 @@
# corresponding #define will be generated in conftest/headers.h.
NV_HEADER_PRESENCE_TESTS = \
asm/system.h \
drm/drm_hdcp.h \
drm/display/drm_hdcp.h \
drm/display/drm_hdcp_helper.h \
drm/drmP.h \
drm/drm_aperture.h \
drm/drm_atomic_state_helper.h \

View File

@@ -22,7 +22,6 @@
*/
#include "nv-kthread-q.h"
#include "nv-list-helpers.h"
#include <linux/kthread.h>
#include <linux/interrupt.h>
@@ -43,17 +42,6 @@
// into the queue, and those functions will be run in the context of the
// queue's kthread.
#ifndef WARN
// Only *really* old kernels (2.6.9) end up here. Just use a simple printk
// to implement this, because such kernels won't be supported much longer.
#define WARN(condition, format...) ({ \
int __ret_warn_on = !!(condition); \
if (unlikely(__ret_warn_on)) \
printk(KERN_ERR format); \
unlikely(__ret_warn_on); \
})
#endif
#define NVQ_WARN(fmt, ...) \
do { \
if (in_interrupt()) { \

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -20,8 +20,8 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _UAPI_NVIDIA_DRM_IOCTL_H_
#define _UAPI_NVIDIA_DRM_IOCTL_H_
#ifndef _NV_DRM_COMMON_IOCTL_H_
#define _NV_DRM_COMMON_IOCTL_H_
#include <drm/drm.h>
@@ -396,4 +396,4 @@ struct drm_nvidia_get_drm_file_unique_id_params {
uint64_t id; /* OUT Unique ID of the DRM file */
};
#endif /* _UAPI_NVIDIA_DRM_IOCTL_H_ */
#endif /* _NV_DRM_COMMON_IOCTL_H_ */

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -31,8 +31,8 @@
#include "nvidia-drm-encoder.h"
#include "nvidia-drm-utils.h"
#include "nvidia-drm-fb.h"
#include "nvidia-drm-ioctl.h"
#include "nvidia-drm-format.h"
#include "nv_drm_common_ioctl.h"
#include "nvmisc.h"
#include "nv_common_utils.h"
@@ -1286,15 +1286,10 @@ plane_req_config_update(struct drm_plane *plane,
if ((nv_drm_plane_state->input_colorspace == NV_DRM_INPUT_COLOR_SPACE_NONE) &&
nv_drm_format_is_yuv(plane_state->fb->format->format)) {
if (nv_plane->supportsColorProperties) {
req_config->config.inputColorSpace =
nv_drm_color_encoding_to_nvkms_colorspace(plane_state->color_encoding);
req_config->config.inputColorRange =
nv_drm_color_range_to_nvkms_color_range(plane_state->color_range);
} else {
req_config->config.inputColorSpace = NVKMS_INPUT_COLOR_SPACE_NONE;
req_config->config.inputColorRange = NVKMS_INPUT_COLOR_RANGE_DEFAULT;
}
req_config->config.inputColorSpace =
nv_drm_color_encoding_to_nvkms_colorspace(plane_state->color_encoding);
req_config->config.inputColorRange =
nv_drm_color_range_to_nvkms_color_range(plane_state->color_range);
req_config->config.inputTf = NVKMS_INPUT_TF_LINEAR;
} else {
#endif
@@ -1559,7 +1554,7 @@ static int __nv_drm_cursor_atomic_check(struct drm_plane *plane,
WARN_ON(nv_plane->layer_idx != NVKMS_KAPI_LAYER_INVALID_IDX);
nv_drm_for_each_crtc_in_state(plane_state->state, crtc, crtc_state, i) {
for_each_new_crtc_in_state(plane_state->state, crtc, crtc_state, i) {
struct nv_drm_crtc_state *nv_crtc_state = to_nv_crtc_state(crtc_state);
struct NvKmsKapiHeadRequestedConfig *head_req_config =
&nv_crtc_state->req_config;
@@ -1605,7 +1600,7 @@ static int nv_drm_plane_atomic_check(struct drm_plane *plane,
WARN_ON(nv_plane->layer_idx == NVKMS_KAPI_LAYER_INVALID_IDX);
nv_drm_for_each_crtc_in_state(plane_state->state, crtc, crtc_state, i) {
for_each_new_crtc_in_state(plane_state->state, crtc, crtc_state, i) {
struct nv_drm_crtc_state *nv_crtc_state = to_nv_crtc_state(crtc_state);
struct NvKmsKapiHeadRequestedConfig *head_req_config =
&nv_crtc_state->req_config;
@@ -2435,7 +2430,7 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
req_config->flags.displaysChanged = NV_TRUE;
nv_drm_for_each_connector_in_state(crtc_state->state,
for_each_new_connector_in_state(crtc_state->state,
connector, connector_state, j) {
if (connector_state->crtc != crtc) {
continue;
@@ -2844,26 +2839,16 @@ nv_drm_plane_create(struct drm_device *dev,
}
#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
if (pResInfo->caps.supportsInputColorSpace &&
pResInfo->caps.supportsInputColorRange) {
nv_plane->supportsColorProperties = true;
drm_plane_create_color_properties(
plane,
NVBIT(DRM_COLOR_YCBCR_BT601) |
NVBIT(DRM_COLOR_YCBCR_BT709) |
NVBIT(DRM_COLOR_YCBCR_BT2020),
NVBIT(DRM_COLOR_YCBCR_FULL_RANGE) |
NVBIT(DRM_COLOR_YCBCR_LIMITED_RANGE),
DRM_COLOR_YCBCR_BT709,
DRM_COLOR_YCBCR_FULL_RANGE
);
} else {
nv_plane->supportsColorProperties = false;
}
#else
nv_plane->supportsColorProperties = false;
drm_plane_create_color_properties(
plane,
NVBIT(DRM_COLOR_YCBCR_BT601) |
NVBIT(DRM_COLOR_YCBCR_BT709) |
NVBIT(DRM_COLOR_YCBCR_BT2020),
NVBIT(DRM_COLOR_YCBCR_FULL_RANGE) |
NVBIT(DRM_COLOR_YCBCR_LIMITED_RANGE),
DRM_COLOR_YCBCR_BT709,
DRM_COLOR_YCBCR_FULL_RANGE
);
#endif
drm_plane_helper_add(plane, &nv_plane_helper_funcs);

View File

@@ -191,13 +191,6 @@ struct nv_drm_plane {
*/
uint32_t layer_idx;
/**
* @supportsColorProperties
*
* If true, supports the COLOR_ENCODING and COLOR_RANGE properties.
*/
bool supportsColorProperties;
struct NvKmsLUTCaps ilut_caps;
struct NvKmsLUTCaps tmo_caps;
};

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -40,7 +40,7 @@
#if defined(NV_DRM_AVAILABLE)
#include "nvidia-drm-ioctl.h"
#include "nv_drm_common_ioctl.h"
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
@@ -326,8 +326,8 @@ done:
static int nv_drm_disp_cmp (const void *l, const void *r)
{
struct nv_drm_mst_display_info *l_info = (struct nv_drm_mst_display_info *)l;
struct nv_drm_mst_display_info *r_info = (struct nv_drm_mst_display_info *)r;
const struct nv_drm_mst_display_info *l_info = (const struct nv_drm_mst_display_info *)l;
const struct nv_drm_mst_display_info *r_info = (const struct nv_drm_mst_display_info *)r;
return strcmp(l_info->dpAddress, r_info->dpAddress);
}
@@ -743,6 +743,8 @@ static int nv_drm_dev_load(struct drm_device *dev)
nv_dev->hasVideoMemory = resInfo.caps.hasVideoMemory;
nv_dev->contiguousPhysicalMappings = resInfo.caps.contiguousPhysicalMappings;
nv_dev->genericPageKind = resInfo.caps.genericPageKind;
// Fermi-Volta use generation 0, Turing+ uses generation 2.
@@ -762,8 +764,6 @@ static int nv_drm_dev_load(struct drm_device *dev)
resInfo.caps.numDisplaySemaphores;
nv_dev->display_semaphores.next_index = 0;
nv_dev->requiresVrrSemaphores = resInfo.caps.requiresVrrSemaphores;
nv_dev->vtFbBaseAddress = resInfo.vtFbBaseAddress;
nv_dev->vtFbSize = resInfo.vtFbSize;
@@ -1717,6 +1717,11 @@ static long nv_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return retcode;
}
static int nv_drm_load_noop(struct drm_device *dev, unsigned long flags)
{
return 0;
}
static const struct file_operations nv_drm_fops = {
.owner = THIS_MODULE,
@@ -1899,6 +1904,8 @@ static struct drm_driver nv_drm_driver = {
.gem_prime_res_obj = nv_drm_gem_prime_res_obj,
#endif
.load = nv_drm_load_noop,
.postclose = nv_drm_postclose,
.open = nv_drm_open,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,12 +25,12 @@
#if defined(NV_DRM_AVAILABLE)
#include "nvidia-drm-priv.h"
#include "nvidia-drm-ioctl.h"
#include "nvidia-drm-fb.h"
#include "nvidia-drm-utils.h"
#include "nvidia-drm-gem.h"
#include "nvidia-drm-helper.h"
#include "nvidia-drm-format.h"
#include "nv_drm_common_ioctl.h"
#include <drm/drm_crtc_helper.h>

View File

@@ -29,10 +29,10 @@
#endif
#include "nvidia-drm-priv.h"
#include "nvidia-drm-ioctl.h"
#include "nvidia-drm-gem.h"
#include "nvidia-drm-fence.h"
#include "nvidia-dma-resv-helper.h"
#include "nv_drm_common_ioctl.h"
#include <linux/dma-fence.h>

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -35,15 +35,22 @@
static const u32 nvkms_to_drm_format[] = {
/* RGB formats */
[NvKmsSurfaceMemoryFormatA1R5G5B5] = DRM_FORMAT_ARGB1555,
[NvKmsSurfaceMemoryFormatX1R5G5B5] = DRM_FORMAT_XRGB1555,
[NvKmsSurfaceMemoryFormatR5G6B5] = DRM_FORMAT_RGB565,
[NvKmsSurfaceMemoryFormatA8R8G8B8] = DRM_FORMAT_ARGB8888,
[NvKmsSurfaceMemoryFormatX8R8G8B8] = DRM_FORMAT_XRGB8888,
[NvKmsSurfaceMemoryFormatX8B8G8R8] = DRM_FORMAT_XBGR8888,
[NvKmsSurfaceMemoryFormatA2B10G10R10] = DRM_FORMAT_ABGR2101010,
[NvKmsSurfaceMemoryFormatX2B10G10R10] = DRM_FORMAT_XBGR2101010,
[NvKmsSurfaceMemoryFormatA8B8G8R8] = DRM_FORMAT_ABGR8888,
[NvKmsSurfaceMemoryFormatA1R5G5B5] = DRM_FORMAT_ARGB1555,
[NvKmsSurfaceMemoryFormatX1R5G5B5] = DRM_FORMAT_XRGB1555,
[NvKmsSurfaceMemoryFormatR5G6B5] = DRM_FORMAT_RGB565,
[NvKmsSurfaceMemoryFormatA8R8G8B8] = DRM_FORMAT_ARGB8888,
[NvKmsSurfaceMemoryFormatX8R8G8B8] = DRM_FORMAT_XRGB8888,
[NvKmsSurfaceMemoryFormatX8B8G8R8] = DRM_FORMAT_XBGR8888,
[NvKmsSurfaceMemoryFormatA2B10G10R10] = DRM_FORMAT_ABGR2101010,
[NvKmsSurfaceMemoryFormatX2B10G10R10] = DRM_FORMAT_XBGR2101010,
[NvKmsSurfaceMemoryFormatA8B8G8R8] = DRM_FORMAT_ABGR8888,
#if defined(DRM_FORMAT_ABGR16161616)
/*
* DRM_FORMAT_ABGR16161616 was introduced by Linux kernel commit
* ff92ecf575a92 (v5.14).
*/
[NvKmsSurfaceMemoryFormatR16G16B16A16] = DRM_FORMAT_ABGR16161616,
#endif
#if defined(DRM_FORMAT_ABGR16161616F)
[NvKmsSurfaceMemoryFormatRF16GF16BF16AF16] = DRM_FORMAT_ABGR16161616F,
#endif

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -33,7 +33,7 @@
#include <drm/drm_drv.h>
#include "nvidia-drm-gem-dma-buf.h"
#include "nvidia-drm-ioctl.h"
#include "nv_drm_common_ioctl.h"
#include "linux/dma-buf.h"

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -26,7 +26,7 @@
#include "nvidia-drm-gem-nvkms-memory.h"
#include "nvidia-drm-helper.h"
#include "nvidia-drm-ioctl.h"
#include "nv_drm_common_ioctl.h"
#include <drm/drm_drv.h>
#include <drm/drm_prime.h>
@@ -161,6 +161,21 @@ static int __nv_drm_gem_nvkms_map(
goto done;
}
/*
* XXX Physical mapping currently broken in cases where we can't guarantee
* that the mapping is contiguous. Fail on platforms that don't have
* guaranteed contiguous physical mappings.
*/
if (!nv_dev->contiguousPhysicalMappings) {
NV_DRM_DEV_LOG_INFO(
nv_dev,
"Mapping vidmem NvKmsKapiMemory 0x%p is currently "
"unsupported on coherent GPU memory configurations",
pMemory);
ret = -ENOMEM;
goto done;
}
if (!nvKms->mapMemory(nv_dev->pDevice,
pMemory,
NVKMS_KAPI_MAPPING_TYPE_USER,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -28,7 +28,7 @@
#include "nvidia-drm-gem-user-memory.h"
#include "nvidia-drm-helper.h"
#include "nvidia-drm-ioctl.h"
#include "nv_drm_common_ioctl.h"
#include "linux/dma-buf.h"
#include "linux/mm.h"

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,7 +25,6 @@
#if defined(NV_DRM_AVAILABLE)
#include "nvidia-drm-priv.h"
#include "nvidia-drm-ioctl.h"
#include "nvidia-drm-fence.h"
#include "nvidia-drm-gem.h"
#include "nvidia-drm-gem-nvkms-memory.h"
@@ -34,6 +33,7 @@
#include "nvidia-drm-helper.h"
#include "nvidia-drm-gem-dma-buf.h"
#include "nvidia-drm-gem-nvkms-memory.h"
#include "nv_drm_common_ioctl.h"
#include <drm/drm_drv.h>
#include <drm/drm_prime.h>

View File

@@ -54,7 +54,7 @@
* drm_atomic_helper_disable_all() is copied from
* linux/drivers/gpu/drm/drm_atomic_helper.c and modified to use
* nv_drm_for_each_crtc instead of drm_for_each_crtc to loop over all crtcs,
* use nv_drm_for_each_*_in_state instead of for_each_connector_in_state to loop
* use for_each_new_*_in_state instead of for_each_connector_in_state to loop
* over all modeset object states, and use drm_atomic_state_free() if
* drm_atomic_state_put() is not available.
*
@@ -139,13 +139,13 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
plane_state->rotation = DRM_MODE_ROTATE_0;
}
nv_drm_for_each_connector_in_state(state, conn, conn_state, i) {
for_each_new_connector_in_state(state, conn, conn_state, i) {
ret = drm_atomic_set_crtc_for_connector(conn_state, NULL);
if (ret < 0)
goto free;
}
nv_drm_for_each_plane_in_state(state, plane, plane_state, i) {
for_each_new_plane_in_state(state, plane, plane_state, i) {
ret = drm_atomic_set_crtc_for_plane(plane_state, NULL);
if (ret < 0)
goto free;

View File

@@ -138,154 +138,6 @@ nv_drm_prime_pages_to_sg(struct drm_device *dev,
int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
struct drm_modeset_acquire_ctx *ctx);
/*
* for_each_connector_in_state(), for_each_crtc_in_state() and
* for_each_plane_in_state() were added by kernel commit
* df63b9994eaf942afcdb946d27a28661d7dfbf2a which was Signed-off-by:
* Ander Conselvan de Oliveira <ander.conselvan.de.oliveira@intel.com>
* Daniel Vetter <daniel.vetter@ffwll.ch>
*
* for_each_connector_in_state(), for_each_crtc_in_state() and
* for_each_plane_in_state() were copied from
* include/drm/drm_atomic.h @
* 21a01abbe32a3cbeb903378a24e504bfd9fe0648
* which has the following copyright and license information:
*
* Copyright (C) 2014 Red Hat
* Copyright (C) 2014 Intel Corp.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Rob Clark <robdclark@gmail.com>
* Daniel Vetter <daniel.vetter@ffwll.ch>
*/
/**
* nv_drm_for_each_connector_in_state - iterate over all connectors in an
* atomic update
* @__state: &struct drm_atomic_state pointer
* @connector: &struct drm_connector iteration cursor
* @connector_state: &struct drm_connector_state iteration cursor
* @__i: int iteration cursor, for macro-internal use
*
* This iterates over all connectors in an atomic update. Note that before the
* software state is committed (by calling drm_atomic_helper_swap_state(), this
* points to the new state, while afterwards it points to the old state. Due to
* this tricky confusion this macro is deprecated.
*/
#if !defined(for_each_connector_in_state)
#define nv_drm_for_each_connector_in_state(__state, \
connector, connector_state, __i) \
for ((__i) = 0; \
(__i) < (__state)->num_connector && \
((connector) = (__state)->connectors[__i].ptr, \
(connector_state) = (__state)->connectors[__i].state, 1); \
(__i)++) \
for_each_if (connector)
#else
#define nv_drm_for_each_connector_in_state(__state, \
connector, connector_state, __i) \
for_each_connector_in_state(__state, connector, connector_state, __i)
#endif
/**
* nv_drm_for_each_crtc_in_state - iterate over all CRTCs in an atomic update
* @__state: &struct drm_atomic_state pointer
* @crtc: &struct drm_crtc iteration cursor
* @crtc_state: &struct drm_crtc_state iteration cursor
* @__i: int iteration cursor, for macro-internal use
*
* This iterates over all CRTCs in an atomic update. Note that before the
* software state is committed (by calling drm_atomic_helper_swap_state(), this
* points to the new state, while afterwards it points to the old state. Due to
* this tricky confusion this macro is deprecated.
*/
#if !defined(for_each_crtc_in_state)
#define nv_drm_for_each_crtc_in_state(__state, crtc, crtc_state, __i) \
for ((__i) = 0; \
(__i) < (__state)->dev->mode_config.num_crtc && \
((crtc) = (__state)->crtcs[__i].ptr, \
(crtc_state) = (__state)->crtcs[__i].state, 1); \
(__i)++) \
for_each_if (crtc_state)
#else
#define nv_drm_for_each_crtc_in_state(__state, crtc, crtc_state, __i) \
for_each_crtc_in_state(__state, crtc, crtc_state, __i)
#endif
/**
* nv_drm_for_each_plane_in_state - iterate over all planes in an atomic update
* @__state: &struct drm_atomic_state pointer
* @plane: &struct drm_plane iteration cursor
* @plane_state: &struct drm_plane_state iteration cursor
* @__i: int iteration cursor, for macro-internal use
*
* This iterates over all planes in an atomic update. Note that before the
* software state is committed (by calling drm_atomic_helper_swap_state(), this
* points to the new state, while afterwards it points to the old state. Due to
* this tricky confusion this macro is deprecated.
*/
#if !defined(for_each_plane_in_state)
#define nv_drm_for_each_plane_in_state(__state, plane, plane_state, __i) \
for ((__i) = 0; \
(__i) < (__state)->dev->mode_config.num_total_plane && \
((plane) = (__state)->planes[__i].ptr, \
(plane_state) = (__state)->planes[__i].state, 1); \
(__i)++) \
for_each_if (plane_state)
#else
#define nv_drm_for_each_plane_in_state(__state, plane, plane_state, __i) \
for_each_plane_in_state(__state, plane, plane_state, __i)
#endif
/*
* for_each_new_plane_in_state() was added by kernel commit
* 581e49fe6b411f407102a7f2377648849e0fa37f which was Signed-off-by:
* Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
* Daniel Vetter <daniel.vetter@ffwll.ch>
*
* This commit also added the old_state and new_state pointers to
* __drm_planes_state. Because of this, the best that can be done on kernel
* versions without this macro is for_each_plane_in_state.
*/
/**
* nv_drm_for_each_new_plane_in_state - iterate over all planes in an atomic update
* @__state: &struct drm_atomic_state pointer
* @plane: &struct drm_plane iteration cursor
* @new_plane_state: &struct drm_plane_state iteration cursor for the new state
* @__i: int iteration cursor, for macro-internal use
*
* This iterates over all planes in an atomic update, tracking only the new
* state. This is useful in enable functions, where we need the new state the
* hardware should be in when the atomic commit operation has completed.
*/
#if !defined(for_each_new_plane_in_state)
#define nv_drm_for_each_new_plane_in_state(__state, plane, new_plane_state, __i) \
nv_drm_for_each_plane_in_state(__state, plane, new_plane_state, __i)
#else
#define nv_drm_for_each_new_plane_in_state(__state, plane, new_plane_state, __i) \
for_each_new_plane_in_state(__state, plane, new_plane_state, __i)
#endif
#include <drm/drm_auth.h>
#include <drm/drm_file.h>

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015, 2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -108,8 +108,11 @@ static bool __will_generate_flip_event(struct drm_crtc *crtc,
return false;
}
/* Find out whether primary & overlay flip done events will be generated. */
nv_drm_for_each_plane_in_state(old_crtc_state->state,
/*
* Find out whether primary & overlay flip done events will be generated.
* Only called after drm_atomic_helper_swap_state, so we use old state.
*/
for_each_old_plane_in_state(old_crtc_state->state,
plane, old_plane_state, i) {
if (old_plane_state->crtc != crtc) {
continue;
@@ -193,7 +196,7 @@ static int __nv_drm_convert_in_fences(
return 0;
}
nv_drm_for_each_new_plane_in_state(state, plane, plane_state, i) {
for_each_new_plane_in_state(state, plane, plane_state, i) {
if ((plane->type == DRM_PLANE_TYPE_CURSOR) ||
(plane_state->crtc != crtc) ||
(plane_state->fence == NULL)) {
@@ -334,7 +337,8 @@ static int __nv_drm_get_syncpt_data(
head_reply_config = &reply_config->headReplyConfig[nv_crtc->head];
nv_drm_for_each_plane_in_state(old_crtc_state->state, plane, old_plane_state, i) {
/* Use old state because this is only called after drm_atomic_helper_swap_state */
for_each_old_plane_in_state(old_crtc_state->state, plane, old_plane_state, i) {
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
if (plane->type == DRM_PLANE_TYPE_CURSOR || old_plane_state->crtc != crtc) {
@@ -395,7 +399,7 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
&(to_nv_atomic_state(state)->config);
struct NvKmsKapiModeSetReplyConfig reply_config = { };
struct drm_crtc *crtc;
struct drm_crtc_state *crtc_state;
struct drm_crtc_state *old_crtc_state, *new_crtc_state;
int i;
int ret;
@@ -429,18 +433,10 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
memset(requested_config, 0, sizeof(*requested_config));
/* Loop over affected crtcs and construct NvKmsKapiRequestedModeSetConfig */
nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
/*
* When committing a state, the new state is already stored in
* crtc->state. When checking a proposed state, the proposed state is
* stored in crtc_state.
*/
struct drm_crtc_state *new_crtc_state =
commit ? crtc->state : crtc_state;
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
if (commit) {
struct drm_crtc_state *old_crtc_state = crtc_state;
struct nv_drm_crtc_state *nv_new_crtc_state =
to_nv_crtc_state(new_crtc_state);
@@ -497,20 +493,17 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
}
if (commit && nv_dev->supportsSyncpts) {
nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
/* commit is true so we check old state */
for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) {
/*! loop over affected crtcs and get NvKmsKapiModeSetReplyConfig */
ret = __nv_drm_get_syncpt_data(
nv_dev, crtc, crtc_state, requested_config, &reply_config);
nv_dev, crtc, old_crtc_state, requested_config, &reply_config);
if (ret != 0) {
return ret;
}
}
}
if (commit && nv_dev->requiresVrrSemaphores && reply_config.vrrFlip) {
nvKms->signalVrrSemaphore(nv_dev->pDevice, reply_config.vrrSemaphoreIndex);
}
return 0;
}
@@ -523,12 +516,38 @@ int nv_drm_atomic_check(struct drm_device *dev,
struct drm_crtc_state *crtc_state;
int i;
nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
struct drm_plane *plane;
struct drm_plane_state *plane_state;
int j;
bool cursor_surface_changed;
bool cursor_only_commit;
for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
/*
* Committing cursor surface change without any other plane change can
* cause cursor surface in use by HW to be freed prematurely. Add all
* planes to the commit to avoid this. This is a workaround for bug 4966645.
*/
cursor_surface_changed = false;
cursor_only_commit = true;
for_each_new_plane_in_state(crtc_state->state, plane, plane_state, j) {
if (plane->type == DRM_PLANE_TYPE_CURSOR) {
if (plane_state->fb != plane->state->fb) {
cursor_surface_changed = true;
}
} else {
cursor_only_commit = false;
break;
}
}
/*
* if the color management changed on the crtc, we need to update the
* crtc's plane's CSC matrices, so add the crtc's planes to the commit
*/
if (crtc_state->color_mgmt_changed) {
if (crtc_state->color_mgmt_changed ||
(cursor_surface_changed && cursor_only_commit)) {
if ((ret = drm_atomic_add_affected_planes(state, crtc)) != 0) {
goto done;
}
@@ -619,7 +638,7 @@ int nv_drm_atomic_commit(struct drm_device *dev,
* Our system already implements such a queue, but due to
* bug 4054608, it is currently not used.
*/
nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
/*
@@ -726,7 +745,7 @@ int nv_drm_atomic_commit(struct drm_device *dev,
goto done;
}
nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
for_each_old_crtc_in_state(state, crtc, crtc_state, i) {
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
struct nv_drm_crtc_state *nv_new_crtc_state =
to_nv_crtc_state(crtc->state);

View File

@@ -30,6 +30,7 @@
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
#include <drm/drm_print.h>
#include <drm/drm_device.h>
#include <drm/drm_gem.h>
@@ -141,8 +142,9 @@ struct nv_drm_device {
NvBool hasVideoMemory;
NvBool contiguousPhysicalMappings;
NvBool supportsSyncpts;
NvBool requiresVrrSemaphores;
NvBool subOwnershipGranted;
NvBool hasFramebufferConsole;

View File

@@ -62,6 +62,9 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_plane_create_color_properties
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_legacy_gamma_set
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_mixed
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_prime_mmap
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_sysfs_connector_property_event
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_sysfs_connector_status_event
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_flags_set
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_legacy_dev_list
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
@@ -91,7 +94,6 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fenc
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers_has_driver_arg

View File

@@ -22,7 +22,6 @@
*/
#include "nv-kthread-q.h"
#include "nv-list-helpers.h"
#include <linux/kthread.h>
#include <linux/interrupt.h>
@@ -43,17 +42,6 @@
// into the queue, and those functions will be run in the context of the
// queue's kthread.
#ifndef WARN
// Only *really* old kernels (2.6.9) end up here. Just use a simple printk
// to implement this, because such kernels won't be supported much longer.
#define WARN(condition, format...) ({ \
int __ret_warn_on = !!(condition); \
if (unlikely(__ret_warn_on)) \
printk(KERN_ERR format); \
unlikely(__ret_warn_on); \
})
#endif
#define NVQ_WARN(fmt, ...) \
do { \
if (in_interrupt()) { \

View File

@@ -130,6 +130,15 @@ module_param_named(config_file, nvkms_conf, charp, 0400);
static atomic_t nvkms_alloc_called_count;
#define NV_SUPPORTS_PLATFORM_DEVICE_PUT NV_IS_EXPORT_SYMBOL_GPL_platform_device_put
#if defined(NV_LINUX_NVHOST_H_PRESENT) && NV_SUPPORTS_PLATFORM_DEVICE_PUT
#if defined(NV_LINUX_HOST1X_NEXT_H_PRESENT) || defined(CONFIG_TEGRA_GRHOST)
#define NVKMS_NVHOST_SYNCPT_SUPPORTED
struct platform_device *nvhost_platform_device = NULL;
#endif
#endif
NvBool nvkms_test_fail_alloc_core_channel(
enum FailAllocCoreChannelMethod method
)
@@ -206,21 +215,18 @@ NvBool nvkms_kernel_supports_syncpts(void)
* support for syncpts; callers must also check that the hardware
* supports syncpts.
*/
#if (defined(CONFIG_TEGRA_GRHOST) || defined(NV_LINUX_HOST1X_NEXT_H_PRESENT))
#if defined(NVKMS_NVHOST_SYNCPT_SUPPORTED)
return NV_TRUE;
#else
return NV_FALSE;
#endif
}
#define NVKMS_SYNCPT_STUBS_NEEDED
/*************************************************************************
* NVKMS interface for nvhost unit for sync point APIs.
*************************************************************************/
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
#undef NVKMS_SYNCPT_STUBS_NEEDED
#if defined(NVKMS_NVHOST_SYNCPT_SUPPORTED) && defined(CONFIG_TEGRA_GRHOST)
#include <linux/nvhost.h>
@@ -228,17 +234,21 @@ NvBool nvkms_syncpt_op(
enum NvKmsSyncPtOp op,
NvKmsSyncPtOpParams *params)
{
struct platform_device *pdev = nvhost_get_default_device();
if (nvhost_platform_device == NULL) {
nvkms_log(NVKMS_LOG_LEVEL_ERROR, NVKMS_LOG_PREFIX,
"Failed to get default nvhost device");
return NV_FALSE;
}
switch (op) {
case NVKMS_SYNCPT_OP_ALLOC:
params->alloc.id = nvhost_get_syncpt_client_managed(
pdev, params->alloc.syncpt_name);
nvhost_platform_device, params->alloc.syncpt_name);
break;
case NVKMS_SYNCPT_OP_PUT:
nvhost_syncpt_put_ref_ext(pdev, params->put.id);
nvhost_syncpt_put_ref_ext(nvhost_platform_device, params->put.id);
break;
case NVKMS_SYNCPT_OP_FD_TO_ID_AND_THRESH: {
@@ -272,7 +282,7 @@ NvBool nvkms_syncpt_op(
case NVKMS_SYNCPT_OP_ID_AND_THRESH_TO_FD:
nvhost_syncpt_create_fence_single_ext(
pdev,
nvhost_platform_device,
params->id_and_thresh_to_fd.id,
params->id_and_thresh_to_fd.thresh,
"nvkms-fence",
@@ -281,7 +291,7 @@ NvBool nvkms_syncpt_op(
case NVKMS_SYNCPT_OP_READ_MINVAL:
params->read_minval.minval =
nvhost_syncpt_read_minval(pdev, params->read_minval.id);
nvhost_syncpt_read_minval(nvhost_platform_device, params->read_minval.id);
break;
}
@@ -289,7 +299,7 @@ NvBool nvkms_syncpt_op(
return NV_TRUE;
}
#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT) && defined(NV_LINUX_NVHOST_H_PRESENT)
#elif defined(NVKMS_NVHOST_SYNCPT_SUPPORTED) && defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)
#include <linux/dma-fence.h>
#include <linux/file.h>
@@ -305,24 +315,20 @@ NvBool nvkms_syncpt_op(
#include <linux/nvhost.h>
#undef NVKMS_SYNCPT_STUBS_NEEDED
NvBool nvkms_syncpt_op(
enum NvKmsSyncPtOp op,
NvKmsSyncPtOpParams *params)
{
struct host1x_syncpt *host1x_sp;
struct platform_device *pdev;
struct host1x *host1x;
pdev = nvhost_get_default_device();
if (pdev == NULL) {
if (nvhost_platform_device == NULL) {
nvkms_log(NVKMS_LOG_LEVEL_ERROR, NVKMS_LOG_PREFIX,
"Failed to get nvhost default pdev");
return NV_FALSE;
"Failed to get default nvhost device");
return NV_FALSE;
}
host1x = nvhost_get_host1x(pdev);
host1x = nvhost_get_host1x(nvhost_platform_device);
if (host1x == NULL) {
nvkms_log(NVKMS_LOG_LEVEL_ERROR, NVKMS_LOG_PREFIX,
"Failed to get host1x");
@@ -436,9 +442,7 @@ NvBool nvkms_syncpt_op(
return NV_TRUE;
}
#endif
#ifdef NVKMS_SYNCPT_STUBS_NEEDED
#else
/* Unsupported STUB for nvkms_syncpt APIs */
NvBool nvkms_syncpt_op(
enum NvKmsSyncPtOp op,
@@ -2091,6 +2095,14 @@ static int __init nvkms_init(void)
atomic_set(&nvkms_alloc_called_count, 0);
#if defined(NVKMS_NVHOST_SYNCPT_SUPPORTED)
/*
* nvhost_get_default_device() might return NULL; don't check it
* until we use it.
*/
nvhost_platform_device = nvhost_get_default_device();
#endif
ret = nvkms_alloc_rm();
if (ret != 0) {
@@ -2152,6 +2164,10 @@ static void __exit nvkms_exit(void)
struct nvkms_timer_t *timer, *tmp_timer;
unsigned long flags = 0;
#if defined(NVKMS_NVHOST_SYNCPT_SUPPORTED)
platform_device_put(nvhost_platform_device);
#endif
nvkms_proc_exit();
down(&nvkms_lock);

View File

@@ -107,6 +107,7 @@ typedef struct {
enum FailAllocCoreChannelMethod {
FAIL_ALLOC_CORE_CHANNEL_RM_SETUP_CORE_CHANNEL = 0,
FAIL_ALLOC_CORE_CHANNEL_RESTORE_CONSOLE = 1,
FAIL_ALLOC_CORE_CHANNEL_NO_CLASS = 2,
};
NvBool nvkms_test_fail_alloc_core_channel(enum FailAllocCoreChannelMethod method);

View File

@@ -100,4 +100,5 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_register_backlight
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_platform_device_put

View File

@@ -159,9 +159,6 @@ struct nvidia_p2p_page_table {
*
* This API only supports pinned, GPU-resident memory, such as that provided
* by cudaMalloc().
* This API does not support Coherent Driver-based Memory Management(CDMM) mode.
* CDMM allows coherent GPU memory to be managed by the driver and not the OS.
* This is done by the driver not onlining the memory as a NUMA node.
*
* This API may sleep.
*

View File

@@ -30,9 +30,11 @@
#define NVC8B5_SET_SEMAPHORE_B (0x00000244)
#define NVC8B5_SET_SEMAPHORE_B_LOWER 31:0
#define NVC8B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
#define NVC8B5_SET_SRC_PHYS_MODE (0x00000260)
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET 1:0
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM (0x00000003)
#define NVC8B5_SET_SRC_PHYS_MODE_PEER_ID 8:6
#define NVC8B5_SET_SRC_PHYS_MODE_FLA 9:9
@@ -40,6 +42,7 @@
#define NVC8B5_SET_DST_PHYS_MODE_TARGET 1:0
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_PEERMEM (0x00000003)
#define NVC8B5_LAUNCH_DMA (0x00000300)
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
@@ -66,6 +69,7 @@
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
#define NVC8B5_LAUNCH_DMA_SRC_TYPE 12:12
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
#define NVC8B5_LAUNCH_DMA_DST_TYPE 13:13
#define NVC8B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)

View File

@@ -46,4 +46,8 @@
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B (0x0000000B)
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B (0x0000000B)
/* valid ARCHITECTURE_GB20x implementation values */
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB206 (0x00000006)
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB207 (0x00000007)
#endif /* _ctrl2080mc_h_ */

View File

@@ -1,152 +1,28 @@
/*******************************************************************************
Copyright (c) 2003-2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
/*
* SPDX-FileCopyrightText: Copyright (c) 2003-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __ga100_dev_runlist_h__
#define __ga100_dev_runlist_h__
/* This file is autogenerated. Do not edit */
#define NV_RUNLIST 0x000003ff:0x00000000 /* RW--D */
#define NV_CHRAM 0x00001fff:0x00000000 /* RW--D */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK 0x040 /* RW-4R */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION 3:0 /* RWIVF */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_DEFAULT_PRIV_LEVEL 15 /* RWI-V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_ALL_LEVELS_ENABLED 0x0000000F /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_ONLY_LEVEL3_ENABLED 0x00000008 /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0 0:0 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1 1:1 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2 2:2 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3 3:3 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION 7:4 /* RWIVF */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_DEFAULT_PRIV_LEVEL 8 /* RWI-V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_ALL_LEVELS_ENABLED 0x0F /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_ONLY_LEVEL3_ENABLED 0x08 /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0 4:4 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1 5:5 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2 6:6 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3 7:7 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_VIOLATION 8:8 /* RWIVF */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_VIOLATION_REPORT_ERROR 0x00000001 /* RWI-V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_VIOLATION_SOLDIER_ON 0x00000000 /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_VIOLATION 9:9 /* RWIVF */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_VIOLATION_REPORT_ERROR 0x00000001 /* RWI-V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_VIOLATION_SOLDIER_ON 0x00000000 /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL 10:10 /* RWIVF */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_BLOCKED 0x00000001 /* RWI-V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_LOWERED 0x00000000 /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL 11:11 /* RWIVF */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_BLOCKED 0x00000001 /* RWI-V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_LOWERED 0x00000000 /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_ENABLE 31:12 /* RWIVF */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_ENABLE_ALL_SOURCES_ENABLED 0x000FFFFF /* RWI-V */
#define NV_RUNLIST_INT_CYA_SPARE 0x044 /* RW-4R */
#define NV_RUNLIST_INT_CYA_SPARE__PRIV_LEVEL_MASK 0x040 /* */
#define NV_RUNLIST_INT_CYA_SPARE_DATA 31:0 /* RWIUF */
#define NV_RUNLIST_INT_CYA_SPARE_DATA_INIT 0x00000000 /* RWI-V */
#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE 0:0 /* */
#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE_VAL 1:1 /* */
#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE_VAL_1MTHD 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE_VAL_2MTHD 0x00000001 /* */
#define NV_RUNLIST_CONFIG 0x000 /* RW-4R */
#define NV_RUNLIST_CONFIG_SEM_ACQ_STRENGTH 0:0 /* RWIVF */
#define NV_RUNLIST_CONFIG_SEM_ACQ_STRENGTH_WEAK 0x00000000 /* RWI-V */
#define NV_RUNLIST_CONFIG_SEM_ACQ_STRENGTH_STRONG 0x00000001 /* RW--V */
#define NV_RUNLIST_CONFIG_SEM_REL_STRENGTH 4:4 /* RWIVF */
#define NV_RUNLIST_CONFIG_SEM_REL_STRENGTH_WEAK 0x00000000 /* RW--V */
#define NV_RUNLIST_CONFIG_SEM_REL_STRENGTH_STRONG 0x00000001 /* RWI-V */
#define NV_RUNLIST_CONFIG_L2_EVICT 9:8 /* RWIVF */
#define NV_RUNLIST_CONFIG_L2_EVICT_FIRST 0x00000000 /* RWI-V */
#define NV_RUNLIST_CONFIG_L2_EVICT_NORMAL 0x00000001 /* RW--V */
#define NV_RUNLIST_CONFIG_L2_EVICT_LAST 0x00000002 /* RW--V */
#define NV_RUNLIST_CONFIG_SUBCH4 10:10 /* RWXVF */
#define NV_RUNLIST_CONFIG_SUBCH4_INACTIVE 0x00000000 /* RW--V */
#define NV_RUNLIST_CONFIG_SUBCH4_ACTIVE 0x00000001 /* RW--V */
#define NV_RUNLIST_CHANNEL_CONFIG 0x004 /* R--4R */
#define NV_RUNLIST_CHANNEL_CONFIG_NUM_CHANNELS_LOG2 3:0 /* C--UF */
#define NV_RUNLIST_CHANNEL_CONFIG_NUM_CHANNELS_LOG2_2K 11 /* C---V */
#define NV_RUNLIST_CHANNEL_CONFIG_CHRAM_BAR0_OFFSET 31:4 /* R-XVF */
#define NV_RUNLIST_DOORBELL_CONFIG 0x008 /* R--4R */
#define NV_RUNLIST_DOORBELL_CONFIG_ID 31:16 /* R-XVF */
#define NV_RUNLIST_FB_CONFIG 0x00C /* R--4R */
#define NV_RUNLIST_FB_CONFIG_FB_THREAD_ID 7:0 /* R-XVF */
#define NV_RUNLIST_FB_CONFIG_ESC_ID 15:8 /* R-XVF */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG(i) (0x300+(i)*4) /* RW-4A */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG__SIZE_1 64 /* */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_MASK 11:0 /* */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_MASK_HW 10:0 /* RWIVF */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_MASK_HW_INIT 2047 /* RWI-V */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_SET 27:16 /* */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_SET_HW 26:16 /* RWIVF */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_SET_HW_INIT 0x0 /* RWI-V */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_PENDING_ENABLE 31:31 /* RWIVF */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_PENDING_ENABLE_TRUE 1 /* RW--V */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_PENDING_ENABLE_FALSE 0 /* RWI-V */
#define NV_RUNLIST_PBDMA_CONFIG(i) (0x010+(i)*4) /* R--4A */
#define NV_RUNLIST_PBDMA_CONFIG__SIZE_1 2 /* */
#define NV_RUNLIST_PBDMA_CONFIG_PBDMA_ID 7:0 /* R-XUF */
#define NV_RUNLIST_PBDMA_CONFIG_PBDMA_BAR0_OFFSET 25:10 /* R-XUF */
#define NV_RUNLIST_PBDMA_CONFIG_VALID 31:31 /* R-XUF */
#define NV_RUNLIST_PBDMA_CONFIG_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_PBDMA_CONFIG_VALID_FALSE 0x00000000 /* R---V */
#define NV_RUNLIST_ACQ_PRETEST 0x020 /* RW-4R */
#define NV_RUNLIST_ACQ_PRETEST_TIMEOUT 7:0 /* RWIUF */
#define NV_RUNLIST_ACQ_PRETEST_TIMEOUT_8 0x00000008 /* RWI-V */
#define NV_RUNLIST_ACQ_PRETEST_TIMESCALE 15:12 /* RWIUF */
#define NV_RUNLIST_ACQ_PRETEST_TIMESCALE_0 0x00000000 /* RWI-V */
#define NV_RUNLIST_ACQ_PRETEST_TIMESCALE_10 0x0000000a /* RW--V */
#define NV_RUNLIST_IDLE_FILTER 0x024 /* RW-4R */
#define NV_RUNLIST_IDLE_FILTER_PERIOD 7:0 /* RWIUF */
#define NV_RUNLIST_IDLE_FILTER_PERIOD_INIT 0x00000050 /* RWI-V */
#define NV_RUNLIST_IDLE_FILTER_PERIOD__PROD 0x00000064 /* RW--V */
#define NV_RUNLIST_IDLE_FILTER_PERIOD_8 0x00000008 /* RW--V */
#define NV_RUNLIST_IDLE_FILTER_PERIOD_32 0x00000020 /* RW--V */
#define NV_RUNLIST_USERD_WRITEBACK 0x028 /* RW-4R */
#define NV_RUNLIST_USERD_WRITEBACK_TIMER 7:0 /* RWIUF */
#define NV_RUNLIST_USERD_WRITEBACK_TIMER_DISABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_USERD_WRITEBACK_TIMER_SHORT 0x00000003 /* RW--V */
#define NV_RUNLIST_USERD_WRITEBACK_TIMER_100US 0x00000064 /* RWI-V */
#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE 15:12 /* RWIUF */
#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE_0 0x00000000 /* RWI-V */
#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE_SHORT 0x00000000 /* */
#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE_100US 0x00000000 /* */
#define NV_RUNLIST_ESCHED_CONFIG 0x02c /* C--4R */
#define NV_RUNLIST_ESCHED_CONFIG_ESCHED_CLASS_ID 15:0 /* C--UF */
#define NV_RUNLIST_ESCHED_CONFIG_ESCHED_CLASS_ID_VALUE 50543 /* C---V */
#define NV_CHRAM_CHANNEL(i) (0x000+(i)*4) /* RW-4A */
#define NV_CHRAM_CHANNEL__SIZE_1 2048 /* */
#define NV_CHRAM_CHANNEL_WRITE_CONTROL 0:0 /* -WIVF */
@@ -188,22 +64,6 @@
#define NV_CHRAM_CHANNEL_ACQUIRE_FAIL 12:12 /* RWIVF */
#define NV_CHRAM_CHANNEL_ACQUIRE_FAIL_FALSE 0x00000000 /* RWI-V */
#define NV_CHRAM_CHANNEL_ACQUIRE_FAIL_TRUE 0x00000001 /* RW--V */
#define NV_CHRAM_CHANNEL_STATUS 12:8 /* */
#define NV_CHRAM_CHANNEL_STATUS_IDLE 0x00000000 /* */
#define NV_CHRAM_CHANNEL_STATUS_PENDING 0x00000001 /* */
#define NV_CHRAM_CHANNEL_STATUS_PENDING_CTX_RELOAD 0x00000003 /* */
#define NV_CHRAM_CHANNEL_STATUS_PENDING_ACQUIRE_FAIL 0x00000011 /* */
#define NV_CHRAM_CHANNEL_STATUS_PENDING_ACQUIRE_FAIL_CTX_RELOAD 0x00000013 /* */
#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY 0x00000004 /* */
#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY_AND_ENG_BUSY 0x0000000C /* */
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY 0x00000008 /* */
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING_ACQUIRE_FAIL 0x00000019 /* */
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING 0x00000009 /* */
#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY_CTX_RELOAD 0x00000006 /* */
#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY_ENG_BUSY_CTX_RELOAD 0x0000000E /* */
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_CTX_RELOAD 0x0000000A /* */
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING_CTX_RELOAD 0x0000000B /* */
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING_ACQUIRE_FAIL_CTX_RELOAD 0x0000001B /* */
#define NV_CHRAM_CHANNEL_UPDATE 31:0 /* */
#define NV_CHRAM_CHANNEL_UPDATE_ENABLE_CHANNEL 0x00000002 /* */
#define NV_CHRAM_CHANNEL_UPDATE_DISABLE_CHANNEL 0x00000003 /* */
@@ -211,461 +71,10 @@
#define NV_CHRAM_CHANNEL_UPDATE_RESET_PBDMA_FAULTED 0x00000011 /* */
#define NV_CHRAM_CHANNEL_UPDATE_RESET_ENG_FAULTED 0x00000021 /* */
#define NV_CHRAM_CHANNEL_UPDATE_CLEAR_CHANNEL 0xFFFFFFFF /* */
#define NV_RUNLIST_SUBMIT_BASE_LO 0x080 /* RW-4R */
#define NV_RUNLIST_SUBMIT_BASE_LO_PTR_LO 31:12 /* RWIUF */
#define NV_RUNLIST_SUBMIT_BASE_LO_PTR_LO_NULL 0x00000000 /* RWI-V */
#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET 1:0 /* RWIVF */
#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET_VID_MEM 0x0 /* RWI-V */
#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET_SYS_MEM_COHERENT 0x2 /* RW--V */
#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET_SYS_MEM_NONCOHERENT 0x3 /* RW--V */
#define NV_RUNLIST_SUBMIT_BASE_LO_PTR_ALIGN_SHIFT 12 /* */
#define NV_RUNLIST_SUBMIT_BASE_HI 0x084 /* RW-4R */
#define NV_RUNLIST_SUBMIT_BASE_HI_PTR_HI 7:0 /* RWIUF */
#define NV_RUNLIST_SUBMIT_BASE_HI_PTR_HI_NULL 0x00000000 /* RWI-V */
#define NV_RUNLIST_SUBMIT 0x088 /* RW-4R */
#define NV_RUNLIST_SUBMIT_LENGTH 15:0 /* RWIUF */
#define NV_RUNLIST_SUBMIT_LENGTH_ZERO 0x00000000 /* RWI-V */
#define NV_RUNLIST_SUBMIT_LENGTH_MAX 0x0000ffff /* RW--V */
#define NV_RUNLIST_SUBMIT_OFFSET 31:16 /* RWIVF */
#define NV_RUNLIST_SUBMIT_OFFSET_ZERO 0x00000000 /* RWI-V */
#define NV_RUNLIST_SUBMIT_INFO 0x08C /* R--4R */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID 13:0 /* */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_HW 10:0 /* R-IUF */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_HW_DEFAULT 0x00000000 /* R-I-V */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_VALID 14:14 /* R-IUF */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_VALID_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_SUBMIT_INFO_PENDING 15:15 /* R-IVF */
#define NV_RUNLIST_SUBMIT_INFO_PENDING_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_SUBMIT_INFO_PENDING_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_OFFSET 31:16 /* R-IVF */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_OFFSET_ZERO 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK(i) (0x190+(i)*4) /* RW-4A */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION 3:0 /* RWIVF */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_DEFAULT_PRIV_LEVEL 15 /* RWI-V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_ALL_LEVELS_ENABLED 0x0000000F /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_ONLY_LEVEL3_ENABLED 0x00000008 /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0 0:0 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1 1:1 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2 2:2 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3 3:3 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION 7:4 /* RWIVF */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_DEFAULT_PRIV_LEVEL 15 /* RWI-V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_ALL_LEVELS_ENABLED 0x0F /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_ONLY_LEVEL3_ENABLED 0x08 /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0 4:4 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1 5:5 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2 6:6 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3 7:7 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_VIOLATION 8:8 /* RWIVF */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_VIOLATION_REPORT_ERROR 0x00000001 /* RWI-V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_VIOLATION_SOLDIER_ON 0x00000000 /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_VIOLATION 9:9 /* RWIVF */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_VIOLATION_REPORT_ERROR 0x00000001 /* RWI-V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_VIOLATION_SOLDIER_ON 0x00000000 /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL 10:10 /* RWIVF */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_BLOCKED 0x00000001 /* RWI-V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_LOWERED 0x00000000 /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL 11:11 /* RWIVF */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_BLOCKED 0x00000001 /* RWI-V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_LOWERED 0x00000000 /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_ENABLE 31:12 /* RWIVF */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_ENABLE_ALL_SOURCES_ENABLED 0x000FFFFF /* RWI-V */
#define NV_RUNLIST_INTR_VECTORID(i) (0x160+(i)*4) /* RW-4A */
#define NV_RUNLIST_INTR_VECTORID__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_VECTORID__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /* */
#define NV_RUNLIST_INTR_VECTORID_VECTOR 11:0 /* RWXUF */
#define NV_RUNLIST_INTR_VECTORID_GSP 30:30 /* RWIUF */
#define NV_RUNLIST_INTR_VECTORID_GSP_DISABLE 0 /* RW--V */
#define NV_RUNLIST_INTR_VECTORID_GSP_ENABLE 1 /* RWI-V */
#define NV_RUNLIST_INTR_VECTORID_CPU 31:31 /* RWIUF */
#define NV_RUNLIST_INTR_VECTORID_CPU_DISABLE 0 /* RW--V */
#define NV_RUNLIST_INTR_VECTORID_CPU_ENABLE 1 /* RWI-V */
#define NV_RUNLIST_INTR_RETRIGGER(i) (0x180+(i)*4) /* -W-4A */
#define NV_RUNLIST_INTR_RETRIGGER__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_RETRIGGER__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /* */
#define NV_RUNLIST_INTR_RETRIGGER_TRIGGER 0:0 /* -W-VF */
#define NV_RUNLIST_INTR_RETRIGGER_TRIGGER_TRUE 1 /* -W--V */
#define NV_RUNLIST_INTR_0 0x100 /* RW-4R */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0 0:0 /* RWIVF */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0_NOT_PENDING 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1 1:1 /* RWIVF */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1_NOT_PENDING 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2 2:2 /* RWIVF */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2_NOT_PENDING 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG(i) (i):(i) /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG__SIZE_1 3 /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG_NOT_PENDING 0x00000000 /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG_PENDING 0x00000001 /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG_RESET 0x00000001 /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE(i) ((i)+1):((i)+1) /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE_NOT_PENDING 0x00000000 /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE_PENDING 0x00000001 /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE_RESET 0x00000001 /* */
#define NV_RUNLIST_INTR_0_RUNLIST_IDLE 4:4 /* RWIVF */
#define NV_RUNLIST_INTR_0_RUNLIST_IDLE_NOT_PENDING 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_0_RUNLIST_IDLE_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_IDLE_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE 5:5 /* RWXVF */
#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE_NOT_PENDING 0x00000000 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE 6:6 /* RWXVF */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_NOT_PENDING 0x00000000 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE 7:7 /* RWXVF */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE_NOT_PENDING 0x00000000 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_BAD_TSG 12:12 /* RWIVF */
#define NV_RUNLIST_INTR_0_BAD_TSG_NOT_PENDING 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_0_BAD_TSG_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_BAD_TSG_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE 8:8 /* RWIVF */
#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE_NOT_PENDING 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_0 16:16 /* R-XVF */
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_0_NOT_PENDING 0x00000000 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_0_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_0 17:17 /* R-XVF */
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_0_NOT_PENDING 0x00000000 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_0_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_1 18:18 /* R-XVF */
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_1_NOT_PENDING 0x00000000 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_1_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_1 19:19 /* R-XVF */
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_1_NOT_PENDING 0x00000000 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_1_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j(i,j) (16+(i)+(j)*2):(16+(i)+(j)*2) /* */
#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j__SIZE_2 2 /* */
#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j_NOT_PENDING 0x00000000 /* */
#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j_PENDING 0x00000001 /* */
#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE 9:9 /* RWIVF */
#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE_NOT_PENDING 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_RUNLIST_EVENT 9:9 /* */
#define NV_RUNLIST_INTR_0_MASK_SET 0x110 /* RW-4R */
#define NV_RUNLIST_INTR_0_MASK_SET_TSG_PREEMPT_COMPLETE 8:8 /* RWIVF */
#define NV_RUNLIST_INTR_0_MASK_SET_TSG_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_MASK_SET_TSG_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_MASK_CLEAR 0x118 /* RW-4R */
#define NV_RUNLIST_INTR_0_MASK_CLEAR_TSG_PREEMPT_COMPLETE 8:8 /* RWIVF */
#define NV_RUNLIST_INTR_0_MASK_CLEAR_TSG_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_MASK_CLEAR_TSG_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE(i) (0x120+(i)*8) /* RW-4A */
#define NV_RUNLIST_INTR_0_EN_SET_TREE__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG0 0:0 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG0_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG0_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG1 1:1 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG1_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG1_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG2 2:2 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG2_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG2_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG(i) (i):(i) /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG__SIZE_1 3 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG_DISABLED 0x00000000 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG_ENABLED 0x00000001 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE(i) ((i)+1):((i)+1) /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE_DISABLED 0x00000000 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE_ENABLED 0x00000001 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_IDLE 4:4 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_IDLE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_IDLE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_AND_ENG_IDLE 5:5 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_AND_ENG_IDLE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_AND_ENG_IDLE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE 6:6 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE 7:7 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_TSG_PREEMPT_COMPLETE 8:8 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_TSG_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_TSG_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_PREEMPT_COMPLETE 9:9 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_BAD_TSG 12:12 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_BAD_TSG_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_BAD_TSG_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_0 16:16 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_0_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_0_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_0 17:17 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_0_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_0_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_1 18:18 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_1_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_1_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_1 19:19 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_1_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_1_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j(i,j) (16+(i)+(j)*2):(16+(i)+(j)*2) /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j__SIZE_2 2 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j_DISABLED 0x00000000 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j_ENABLED 0x00000001 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE(i) (0x140+(i)*8) /* RW-4A */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG0 0:0 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG0_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG0_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG1 1:1 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG1_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG1_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG2 2:2 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG2_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG2_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG(i) (i):(i) /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG__SIZE_1 3 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG_DISABLED 0x00000000 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG_ENABLED 0x00000001 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE(i) ((i)+1):((i)+1) /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE_DISABLED 0x00000000 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE_ENABLED 0x00000001 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_IDLE 4:4 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_IDLE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_IDLE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_AND_ENG_IDLE 5:5 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_AND_ENG_IDLE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_AND_ENG_IDLE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE 6:6 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE 7:7 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_TSG_PREEMPT_COMPLETE 8:8 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_TSG_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_TSG_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_PREEMPT_COMPLETE 9:9 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_BAD_TSG 12:12 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_BAD_TSG_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_BAD_TSG_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_0 16:16 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_0_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_0_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_0 17:17 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_0_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_0_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_1 18:18 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_1_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_1_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_1 19:19 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_1_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_1_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j(i,j) (16+(i)+(j)*2):(16+(i)+(j)*2) /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j__SIZE_2 2 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j_DISABLED 0x00000000 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j_ENABLED 0x00000001 /* */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO(i) (0x224+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_PREV_TSGID 13:0 /* R-IUF */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_PREV_TSGID_DEFAULT 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE 15:14 /* R-IUF */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE_LOAD 0x00000001 /* R-I-V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE_SAVE 0x00000002 /* R---V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE_SWITCH 0x00000003 /* R---V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_NEXT_TSGID 29:16 /* R-IUF */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_NEXT_TSGID_DEFAULT 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS 31:30 /* R-IUF */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_AWAITING_ACK 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_ENG_WAS_RESET 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_ACK_RECEIVED 0x00000002 /* R---V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_DROPPED_TIMEOUT 0x00000003 /* R---V */
#define NV_RUNLIST_INFO 0x108 /* R--4R */
#define NV_RUNLIST_INFO_RUNLIST_IDLE_INTR_ARM 0:0 /* R-IUF */
#define NV_RUNLIST_INFO_RUNLIST_IDLE_INTR_ARM_UNARMED 0x00000000 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_IDLE_INTR_ARM_ARMED 0x00000001 /* R---V */
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_INTR_ARM 1:1 /* R-IUF */
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_INTR_ARM_UNARMED 0x00000000 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_INTR_ARM_ARMED 0x00000001 /* R---V */
#define NV_RUNLIST_INFO_RUNLIST_AND_ENG_IDLE_INTR_ARM 4:4 /* R-IUF */
#define NV_RUNLIST_INFO_RUNLIST_AND_ENG_IDLE_INTR_ARM_UNARMED 0x00000000 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_AND_ENG_IDLE_INTR_ARM_ARMED 0x00000001 /* R---V */
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_AND_ENG_IDLE_INTR_ARM 5:5 /* R-IUF */
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_AND_ENG_IDLE_INTR_ARM_UNARMED 0x00000000 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_AND_ENG_IDLE_INTR_ARM_ARMED 0x00000001 /* R---V */
#define NV_RUNLIST_INFO_ENG_IDLE 8:8 /* R-IUF */
#define NV_RUNLIST_INFO_ENG_IDLE_FALSE 0x00000000 /* R---V */
#define NV_RUNLIST_INFO_ENG_IDLE_TRUE 0x00000001 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_IDLE 9:9 /* R-IUF */
#define NV_RUNLIST_INFO_RUNLIST_IDLE_FALSE 0x00000000 /* R---V */
#define NV_RUNLIST_INFO_RUNLIST_IDLE_TRUE 0x00000001 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_FETCH_STATUS 10:10 /* R-IVF */
#define NV_RUNLIST_INFO_RUNLIST_FETCH_STATUS_IDLE 0x00000000 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_FETCH_STATUS_BUSY 0x00000001 /* R---V */
#define NV_RUNLIST_INFO_ACQUIRE_STILL_PENDING 12:12 /* R-IUF */
#define NV_RUNLIST_INFO_ACQUIRE_STILL_PENDING_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_INFO_ACQUIRE_STILL_PENDING_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_INFO_RUNLIST_FETCH_NACKED 13:13 /* R-IUF */
#define NV_RUNLIST_INFO_RUNLIST_FETCH_NACKED_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_FETCH_NACKED_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_BAD_TSG 0x174 /* R--4R */
#define NV_RUNLIST_INTR_BAD_TSG_CODE 3:0 /* R-IVF */
#define NV_RUNLIST_INTR_BAD_TSG_CODE_NO_ERROR 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_BAD_TSG_CODE_ZERO_LENGTH_TSG 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_BAD_TSG_CODE_MAX_LENGTH_EXCEEDED 0x00000002 /* R---V */
#define NV_RUNLIST_INTR_BAD_TSG_CODE_RUNLIST_OVERFLOW 0x00000003 /* R---V */
#define NV_RUNLIST_INTR_BAD_TSG_CODE_EXPECTED_A_CHID_ENTRY 0x00000004 /* R---V */
#define NV_RUNLIST_INTR_BAD_TSG_CODE_EXPECTED_A_TSG_HEADER 0x00000005 /* R---V */
#define NV_RUNLIST_INTR_BAD_TSG_CODE_INVALID_RUNQUEUE 0x00000006 /* R---V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG(i) (0x220+(i)*64) /* RW-4A */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_PERIOD 30:0 /* RWIVF */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_PERIOD_INIT 0x003fffff /* RWI-V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_PERIOD_MAX 0x7fffffff /* RW--V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_DETECTION 31:31 /* RWIVF */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_DETECTION_DISABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_DETECTION_ENABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG 0x050 /* RW-4R */
#define NV_RUNLIST_BLKCG_IDLE_CG_DLY_CNT 5:0 /* RWIVF */
#define NV_RUNLIST_BLKCG_IDLE_CG_DLY_CNT_INIT 0x00000000 /* RWI-V */
#define NV_RUNLIST_BLKCG_IDLE_CG_DLY_CNT__PROD 0x00000002 /* RW--V */
#define NV_RUNLIST_BLKCG_IDLE_CG_EN 6:6 /* RWIVF */
#define NV_RUNLIST_BLKCG_IDLE_CG_EN_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_BLKCG_IDLE_CG_EN_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_BLKCG_IDLE_CG_EN__PROD 0x00000001 /* RW--V */
#define NV_RUNLIST_BLKCG_STATE_CG_EN 7:7 /* */
#define NV_RUNLIST_BLKCG_STATE_CG_EN_ENABLED 0x00000001 /* */
#define NV_RUNLIST_BLKCG_STATE_CG_EN_DISABLED 0x00000000 /* */
#define NV_RUNLIST_BLKCG_STATE_CG_EN__PROD 0x00000000 /* */
#define NV_RUNLIST_BLKCG_STALL_CG_DLY_CNT 13:8 /* */
#define NV_RUNLIST_BLKCG_STALL_CG_DLY_CNT_INIT 0x00000000 /* */
#define NV_RUNLIST_BLKCG_STALL_CG_DLY_CNT__PROD 0x00000002 /* */
#define NV_RUNLIST_BLKCG_STALL_CG_EN 14:14 /* RWIVF */
#define NV_RUNLIST_BLKCG_STALL_CG_EN_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_BLKCG_STALL_CG_EN_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_BLKCG_STALL_CG_EN__PROD 0x00000001 /* RW--V */
#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN 15:15 /* */
#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN_ENABLED 0x00000001 /* */
#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN_DISABLED 0x00000000 /* */
#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN__PROD 0x00000001 /* */
#define NV_RUNLIST_BLKCG_WAKEUP_DLY_CNT 19:16 /* RWIVF */
#define NV_RUNLIST_BLKCG_WAKEUP_DLY_CNT_INIT 0x00000000 /* RWI-V */
#define NV_RUNLIST_BLKCG_WAKEUP_DLY_CNT__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG_THROT_CLK_CNT 23:20 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_CNT_INIT 0x0000000f /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_CNT_FULLSPEED 0x0000000f /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_CNT__PROD 0x00000000 /* */
#define NV_RUNLIST_BLKCG_DI_DT_SKEW_VAL 27:24 /* */
#define NV_RUNLIST_BLKCG_DI_DT_SKEW_VAL_INIT 0x00000000 /* */
#define NV_RUNLIST_BLKCG_DI_DT_SKEW_VAL__PROD 0x00000000 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_EN 28:28 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_EN_ENABLED 0x00000001 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_EN_DISABLED 0x00000000 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_EN__PROD 0x00000000 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER 29:29 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER_EN 0x00000001 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER_DIS 0x00000000 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER__PROD 0x00000000 /* */
#define NV_RUNLIST_BLKCG_PAUSE_CG_EN 30:30 /* */
#define NV_RUNLIST_BLKCG_PAUSE_CG_EN_ENABLED 0x00000001 /* */
#define NV_RUNLIST_BLKCG_PAUSE_CG_EN_DISABLED 0x00000000 /* */
#define NV_RUNLIST_BLKCG_PAUSE_CG_EN__PROD 0x00000000 /* */
#define NV_RUNLIST_BLKCG_HALT_CG_EN 31:31 /* */
#define NV_RUNLIST_BLKCG_HALT_CG_EN_ENABLED 0x00000001 /* */
#define NV_RUNLIST_BLKCG_HALT_CG_EN_DISABLED 0x00000000 /* */
#define NV_RUNLIST_BLKCG_HALT_CG_EN__PROD 0x00000000 /* */
#define NV_RUNLIST_BLKCG1 0x054 /* RW-4R */
#define NV_RUNLIST_BLKCG1_MONITOR_CG_EN 0:0 /* RWIVF */
#define NV_RUNLIST_BLKCG1_MONITOR_CG_EN_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_BLKCG1_MONITOR_CG_EN_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG 16:1 /* */
#define NV_RUNLIST_BLKCG1_SLCG_ENABLED 0x00000000 /* */
#define NV_RUNLIST_BLKCG1_SLCG_DISABLED 0x0000FFFF /* */
#define NV_RUNLIST_BLKCG1_SLCG__PROD 0x00000001 /* */
#define NV_RUNLIST_BLKCG1_SLCG_RLP 1:1 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_RLP_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_RLP_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_RLP__PROD 0x00000001 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_EVH 3:3 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_EVH_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_EVH_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_EVH__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_EISM 7:7 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_EISM_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_EISM_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_EISM__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_LB 8:8 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_LB_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_LB_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_LB__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL 9:9 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP 10:10 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB 11:11 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PRI 13:13 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_PRI_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PRI_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_PRI__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_CHSW 14:14 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_CHSW_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_CHSW_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_CHSW__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_XBAR 15:15 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_XBAR_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_XBAR_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_XBAR__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_SLCG_MISC 0x05C /* RW-4R */
#define NV_RUNLIST_SLCG_MISC_EXTRA_BUSY_CLKS 3:0 /* RWIVF */
#define NV_RUNLIST_SLCG_MISC_EXTRA_BUSY_CLKS_ZERO 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTERNAL_DOORBELL 0x090 /* -W-4R */
#define NV_RUNLIST_INTERNAL_DOORBELL_CHID 11:0 /* */
#define NV_RUNLIST_INTERNAL_DOORBELL_CHID_HW 10:0 /* -WXUF */
#define NV_RUNLIST_INTERNAL_DOORBELL_GFID 21:16 /* -WXUF */
#define NV_RUNLIST_SCHED_DISABLE 0x094 /* RW-4R */
#define NV_RUNLIST_SCHED_DISABLE_RUNLIST 0:0 /* RWIVF */
#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_ENABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_DISABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_FALSE 0x00000000 /* */
#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_TRUE 0x00000001 /* */
#define NV_RUNLIST_PREEMPT 0x098 /* RW-4R */
#define NV_RUNLIST_PREEMPT_ID 11:0 /* */
#define NV_RUNLIST_PREEMPT_ID_HW 10:0 /* RWIUF */
@@ -679,104 +88,4 @@
#define NV_RUNLIST_PREEMPT_TYPE 25:24 /* RWIVF */
#define NV_RUNLIST_PREEMPT_TYPE_RUNLIST 0x00000000 /* RWI-V */
#define NV_RUNLIST_PREEMPT_TYPE_TSG 0x00000001 /* RW--V */
#define NV_RUNLIST_ENGINE_STATUS0(i) (0x200+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS0__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS0_TSGID 11:0 /* */
#define NV_RUNLIST_ENGINE_STATUS0_TSGID_HW 10:0 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS 15:13 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_INVALID 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_VALID 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_CTXSW_SAVE 0x00000005 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_CTXSW_LOAD 0x00000006 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_CTXSW_SWITCH 0x00000007 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS0_CTX 13:13 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_INVALID 0x00000000 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_VALID 0x00000001 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTXLOAD 14:14 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTXLOAD_NOT_IN_PROGRESS 0x00000000 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTXLOAD_IN_PROGRESS 0x00000001 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTXSW 15:15 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTXSW_NOT_IN_PROGRESS 0x00000000 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTXSW_IN_PROGRESS 0x00000001 /* */
#define NV_RUNLIST_ENGINE_STATUS0_NEXT_TSGID 27:16 /* */
#define NV_RUNLIST_ENGINE_STATUS0_NEXT_TSGID_HW 26:16 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS0_ENG_RELOAD 29:29 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS0_ENG_RELOAD_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS0_ENG_RELOAD_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS0_FAULTED 30:30 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS0_FAULTED_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS0_FAULTED_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS0_ENGINE 31:31 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS0_ENGINE_IDLE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS0_ENGINE_BUSY 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS1(i) (0x204+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS1__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS1_GFID 5:0 /* R-XVF */
#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID 13:8 /* R-XVF */
#define NV_RUNLIST_ENGINE_STATUS1_INTR_ID 20:16 /* R-XVF */
#define NV_RUNLIST_ENGINE_STATUS1_GFID_VALID 30:30 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS1_GFID_VALID_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS1_GFID_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID_VALID 31:31 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID_VALID_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL(i,j) (0x208+(i)*64+(j)*4) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL__SIZE_2 2 /* */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_CHID 11:0 /* */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_CHID_HW 10:0 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_VALID 15:15 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_VALID_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_CHID 27:16 /* */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_CHID_HW 26:16 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_VALID 31:31 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_VALID_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG(i) (0x228+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_IF_EN 0:0 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_IF_EN_DISABLED 0x00000000 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_IF_EN_ENABLED 0x00000001 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_NO_CREDITS 8:8 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_NO_CREDITS_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_NO_CREDITS_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_WFI 12:12 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_WFI_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_WFI_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_NO_CREDITS 16:16 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_NO_CREDITS_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_NO_CREDITS_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_WFI 20:20 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_WFI_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_WFI_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_ENGINE_ID 29:24 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_INST(i) (0x210+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS_INST__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET 1:0 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET_VID_MEM 0x00000000 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET_SYS_MEM_COHERENT 0x00000002 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET_SYS_MEM_NONCOHERENT 0x00000003 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_INST_VALID 11:11 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_INST_VALID_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_INST_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_INST_PTR_LO 31:12 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_INST_HI(i) (0x214+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS_INST_HI__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS_INST_HI_PTR_HI 31:0 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_INST_HI_PTR_HI_ZERO 0x00000000 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST(i) (0x218+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET 1:0 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET_VID_MEM 0x00000000 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET_SYS_MEM_COHERENT 0x00000002 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET_SYS_MEM_NONCOHERENT 0x00000003 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_VALID 11:11 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_VALID_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_PTR_LO 31:12 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI(i) (0x21C+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI_PTR_HI 31:0 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI_PTR_HI_ZERO 0x00000000 /* R---V */
#endif // __ga100_dev_runlist_h__

View File

@@ -22,7 +22,6 @@
*/
#include "nv-kthread-q.h"
#include "nv-list-helpers.h"
#include <linux/kthread.h>
#include <linux/interrupt.h>
@@ -43,17 +42,6 @@
// into the queue, and those functions will be run in the context of the
// queue's kthread.
#ifndef WARN
// Only *really* old kernels (2.6.9) end up here. Just use a simple printk
// to implement this, because such kernels won't be supported much longer.
#define WARN(condition, format...) ({ \
int __ret_warn_on = !!(condition); \
if (unlikely(__ret_warn_on)) \
printk(KERN_ERR format); \
unlikely(__ret_warn_on); \
})
#endif
#define NVQ_WARN(fmt, ...) \
do { \
if (in_interrupt()) { \

View File

@@ -78,6 +78,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_ce.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_host.c

View File

@@ -61,6 +61,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_arch_invalidate_secondary_tlb
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_device_range
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += get_dev_pagemap_has_pgmap_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present

View File

@@ -2354,7 +2354,9 @@ NV_STATUS UvmDisableReadDuplication(void *base,
//
// When a page is in its preferred location, a fault from another processor will
// not cause a migration if a mapping for that page from that processor can be
// established without migrating the page.
// established without migrating the page. Individual faulting pages will still
// migrate to service immediate access needs, but prefetch operations will not
// pull additional pages away from their preferred location.
//
// If the specified processor is a GPU and the GPU is not a NUMA node and the
// input range is system-allocated pageable memory and the system supports
@@ -2951,619 +2953,6 @@ NV_STATUS UvmIs8Supported(NvU32 *is8Supported);
// Tools API
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
// UvmDebugGetVersion
//
// Returns the version number of the UVM debug library
// See uvm_types.h for valid verion numbers, e.g. UVM_DEBUG_V1
//
//------------------------------------------------------------------------------
unsigned UvmDebugVersion(void);
//------------------------------------------------------------------------------
// UvmDebugCreateSession
//
// Creates a handle for a debugging session.
//
// When the client initializes, it will pass in a process handle and get a
// session ID for itself. Subsequent calls to the UVM API will take in that
// session ID.
//
// There are security requirements to this call.
// One of the following must be true:
// 1. The session owner must be running as an elevated user
// 2. The session owner and target must belong to the same user and the
// session owner is at least as privileged as the target.
//
// For CUDA 6.0 we can create at most 64 sessions per debugger process.
//
// Arguments:
// pid: (INPUT)
// Process id for which the debugging session will be created
//
// session: (OUTPUT)
// Handle to the debugging session associated to that pid.
//
// Error codes:
// NV_ERR_PID_NOT_FOUND:
// pid is invalid/ not associated with UVM.
//
// NV_ERR_INSUFFICIENT_PERMISSIONS:
// Function fails the security check.
//
// NV_ERR_INSUFFICIENT_RESOURCES:
// Attempt is made to allocate more than 64 sessions per process.
//
// NV_ERR_BUSY_RETRY:
// internal resources are blocked by other threads.
//
//------------------------------------------------------------------------------
NV_STATUS UvmDebugCreateSession(unsigned pid,
UvmDebugSession *session);
//------------------------------------------------------------------------------
// UvmDebugDestroySession
//
// Destroys a debugging session.
//
// Arguments:
// session: (INPUT)
// Handle to the debugging session associated to that pid.
//
// Error codes:
// NV_ERR_INVALID_ARGUMENT:
// session is invalid.
//
// NV_ERR_BUSY_RETRY:
// ebug session is in use by some other thread.
//
//------------------------------------------------------------------------------
NV_STATUS UvmDebugDestroySession(UvmDebugSession session);
//------------------------------------------------------------------------------
// UvmDebugCountersEnable
//
// Enables the counters following the user specified configuration.
//
// The user must fill a list with the configuration of the counters it needs to
// either enable or disable. It can only enable one counter per line.
//
// The structure (UvmCounterConfig) has several fields:
// - scope: Please see the UvmCounterScope enum (above), for details.
// - name: Name of the counter. Please check UvmCounterName for list.
// - gpuid: Identifies the GPU for which the counter will be enabled/disabled
// This parameter is ignored in AllGpu scopes.
// - state: A value of 0 will disable the counter, a value of 1 will enable
// the counter.
//
// Note: All counters are refcounted, that means that a counter will only be
// disable when its refcount reached zero.
//
// Arguments:
// session: (INPUT)
// Handle to the debugging session.
//
// config: (INPUT)
// pointer to configuration list as per above.
//
// count: (INPUT)
// number of entries in the config list.
//
// Error codes:
// NV_ERR_INSUFFICIENT_PERMISSIONS:
// Function fails the security check
//
// RM_INVALID_ARGUMENT:
// debugging session is invalid or one of the counter lines is invalid.
// If call returns this value, no action specified by the config list
// will have taken effect.
//
// NV_ERR_NOT_SUPPORTED:
// UvmCounterScopeGlobalSingleGpu is not supported for CUDA 6.0
//
// NV_ERR_BUSY_RETRY:
// the debug session is in use by some other thread.
//
//------------------------------------------------------------------------------
NV_STATUS UvmDebugCountersEnable(UvmDebugSession session,
UvmCounterConfig *config,
unsigned count);
//------------------------------------------------------------------------------
// UvmDebugGetCounterHandle
//
// Returns handle to a particular counter. This is an opaque handle that the
// implementation uses in order to find your counter, later. This handle can be
// used in subsequent calls to UvmDebugGetCounterVal().
//
// Arguments:
// session: (INPUT)
// Handle to the debugging session.
//
// scope: (INPUT)
// Scope that will be mapped.
//
// counterName: (INPUT)
// Name of the counter in that scope.
//
// gpu: (INPUT)
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition of the scoped GPU.
// This parameter is ignored in AllGpu scopes.
//
// pCounterHandle: (OUTPUT)
// Handle to the counter address.
//
// Error codes:
// NV_ERR_INVALID_ARGUMENT:
// Specified scope/gpu pair or session id is invalid
//
// NV_ERR_NOT_SUPPORTED:
// UvmCounterScopeGlobalSingleGpu is not supported for CUDA 6.0
//
// NV_ERR_BUSY_RETRY:
// debug session is in use by some other thread.
//
//------------------------------------------------------------------------------
NV_STATUS UvmDebugGetCounterHandle(UvmDebugSession session,
UvmCounterScope scope,
UvmCounterName counterName,
NvProcessorUuid gpu,
NvUPtr *pCounterHandle);
//------------------------------------------------------------------------------
// UvmDebugGetCounterVal
//
// Returns the counter value specified by the counter name.
//
// Arguments:
// session: (INPUT)
// Handle to the debugging session.
//
// counterHandleArray: (INPUT)
// Array of counter handles
//
// handleCount: (INPUT)
// Number of handles in the pPCounterHandle array.
//
// counterValArray: (OUTPUT)
// Array of counter values corresponding to the handles.
//
// Error codes:
// NV_ERR_INVALID_ARGUMENT:
// one of the specified handles is invalid.
//
//------------------------------------------------------------------------------
NV_STATUS UvmDebugGetCounterVal(UvmDebugSession session,
NvUPtr *counterHandleArray,
unsigned handleCount,
unsigned long long *counterValArray);
//------------------------------------------------------------------------------
// UvmEventQueueCreate
//
// This call creates an event queue of the given size.
// No events are added in the queue until they are enabled by the user.
// Event queue data is visible to the user even after the target process dies
// if the session is active and queue is not freed.
//
// User doesn't need to serialize multiple UvmEventQueueCreate calls as
// each call creates a new queue state associated with the returned queue
// handle.
//
// Arguments:
// sessionHandle: (INPUT)
// Handle to the debugging session.
//
// queueHandle: (OUTPUT)
// Handle to created queue.
//
// queueSize: (INPUT)
// Size of the event queue buffer in units of UvmEventEntry's.
// This quantity must be > 1.
//
// notificationCount: (INPUT)
// Number of entries after which the user should be notified that
// there are events to fetch.
// User is notified when queueEntries >= notification count.
//
// Error codes:
// NV_ERR_INSUFFICIENT_PERMISSIONS:
// Function fails the security check.
//
// NV_ERR_INVALID_ARGUMENT:
// One of the arguments is invalid.
//
// NV_ERR_INSUFFICIENT_RESOURCES:
// it's not possible to allocate a queue of requested size.
//
// NV_ERR_BUSY_RETRY:
// internal resources are blocked by other threads.
//
// NV_ERR_PID_NOT_FOUND:
// queue create call is made on a session after the target dies.
//
//------------------------------------------------------------------------------
NV_STATUS UvmEventQueueCreate(UvmDebugSession sessionHandle,
UvmEventQueueHandle *queueHandle,
NvS64 queueSize,
NvU64 notificationCount,
UvmEventTimeStampType timeStampType);
//------------------------------------------------------------------------------
// UvmEventQueueDestroy
//
// This call frees all interal resources associated with the queue, including
// unpinning of the memory associated with that queue. Freeing user buffer is
// responsibility of a caller. Event queue might be also destroyed as a side
// effect of destroying a session associated with this queue.
//
// User needs to ensure that a queue handle is not deleted while some other
// thread is using the same queue handle.
//
// Arguments:
// sessionHandle: (INPUT)
// Handle to the debugging session.
//
// queueHandle: (INPUT)
// Handle to the queue which is to be freed
//
// Error codes:
// RM_ERR_NOT_PERMITTED:
// Function fails the security check.
//
// NV_ERR_INVALID_ARGUMENT:
// One of the arguments is invalid.
//
// NV_ERR_BUSY_RETRY:
// internal resources are blocked by other threads.
//
//------------------------------------------------------------------------------
NV_STATUS UvmEventQueueDestroy(UvmDebugSession sessionHandle,
UvmEventQueueHandle queueHandle);
//------------------------------------------------------------------------------
// UvmEventEnable
//
// This call enables a particular event type in the event queue.
// All events are disabled by default when a queue is created.
//
// This API does not access the queue state maintained in the user
// library so the user doesn't need to acquire a lock to protect the queue
// state.
//
// Arguments:
// sessionHandle: (INPUT)
// Handle to the debugging session.
//
// queueHandle: (INPUT)
// Handle to the queue where events are to be enabled
//
// eventTypeFlags: (INPUT)
// This field specifies the event types to be enabled. For example:
// To enable migration events and memory violations: pass flags
// "UVM_EVENT_ENABLE_MEMORY_VIOLATION |UVM_EVENT_ENABLE_MIGRATION"
//
// Error codes:
// RM_ERR_NOT_PERMITTED:
// Function fails the security check.
//
// NV_ERR_INVALID_ARGUMENT:
// One of the arguments is invalid.
//
// NV_ERR_PID_NOT_FOUND:
// this call is made after the target process dies
//
// NV_ERR_BUSY_RETRY:
// internal resources are blocked by other threads.
//
//------------------------------------------------------------------------------
NV_STATUS UvmEventEnable(UvmDebugSession sessionHandle,
UvmEventQueueHandle queueHandle,
unsigned eventTypeFlags);
//------------------------------------------------------------------------------
// UvmEventDisable
//
// This call disables a particular event type in the queue.
//
// This API does not access the queue state maintained in the user
// library so the user doesn't need to acquire a lock to protect the queue
// state.
//
// Arguments:
// sessionHandle: (INPUT)
// Handle to the debugging session.
//
// queueHandle: (INPUT)
// Handle to the queue where events are to be enabled
//
// eventTypeFlags: (INPUT)
// This field specifies the event types to be enabled
// For example: To enable migration events and memory violations:
// pass "UVM_EVENT_ENABLE_MEMORY_VIOLATION |UVM_EVENT_ENABLE_MIGRATION"
// as flags
//
// Error codes:
// RM_ERR_NOT_PERMITTED:
// Function fails the security check.
//
// NV_ERR_INVALID_ARGUMENT:
// One of the arguments is invalid.
//
// NV_ERR_PID_NOT_FOUND:
// this call is made after the target process dies
//
// NV_ERR_BUSY_RETRY:
// internal resources are blocked by other threads.
//
//------------------------------------------------------------------------------
NV_STATUS UvmEventDisable(UvmDebugSession sessionHandle,
UvmEventQueueHandle queueHandle,
unsigned eventTypeFlags);
//------------------------------------------------------------------------------
// UvmEventWaitOnQueueHandles
//
// User is notified when queueEntries >= notification count.
// This call does a blocking wait for this notification. It returns when
// at least one of the queue handles has events to be fetched or if it timeouts
//
// This API accesses constant data maintained in the queue state. Hence,
// the user doesn't need to acquire a lock to protect the queue state.
//
// Arguments:
// queueHandles: (INPUT)
// array of queue handles.
//
// arraySize: (INPUT)
// number of handles in array.
//
// timeout: (INPUT)
// timeout in msec
//
// pNotificationFlags: (OUTPUT)
// If a particular queue handle in the input array is notified then
// the respective bit flag is set in pNotificationFlags.
//
// Error codes:
// NV_ERR_INVALID_ARGUMENT:
// one of the queueHandles is invalid.
//
//------------------------------------------------------------------------------
NV_STATUS UvmEventWaitOnQueueHandles(UvmEventQueueHandle *queueHandleArray,
unsigned arraySize,
NvU64 timeout,
unsigned *pNotificationFlags);
//------------------------------------------------------------------------------
// UvmEventGetNotificationHandles
//
// User is notified when queueEntries >= notification count.
// The user can directly get the queue notification handles rather than using
// a UVM API to wait on queue handles. This helps the user to wait on other
// objects (apart from queue notification) along with queue notification
// handles in the same thread. The user can safely use this call along with the
// library supported wait call UvmEventWaitOnQueueHandles.
//
// This API reads constant data maintained in the queue state. Hence,
// the user doesn't need to acquire a lock to protect the queue state.
//
// Arguments:
// queueHandles: (INPUT)
// array of queue handles.
//
// arraySize: (INPUT)
// number of handles in array.
//
// notificationHandles: (OUTPUT)
// Windows: Output of this call contains an array of 'windows event
// handles' corresponding to the queue handles passes as input.
// Linux: All queues belonging to the same process share the same
// file descriptor(fd) for notification. If the user chooses to use
// UvmEventGetNotificationHandles then he should check all queues
// for new events (by calling UvmEventFetch) when notified on
// the fd.
//
// Error codes:
// NV_ERR_INVALID_ARGUMENT:
// One of the arguments is invalid.
//
//------------------------------------------------------------------------------
NV_STATUS UvmEventGetNotificationHandles(UvmEventQueueHandle *queueHandleArray,
unsigned arraySize,
void **notificationHandleArray);
//------------------------------------------------------------------------------
// UvmEventGetGpuUuidTable
//
// Each migration event entry contains the gpu index to/from where data is
// migrated. This index maps to a corresponding physical gpu UUID in the
// gpuUuidTable. Using indices saves on the size of each event entry. This API
// provides the gpuIndex to gpuUuid relation to the user.
//
// This API does not access the queue state maintained in the user
// library and so the user doesn't need to acquire a lock to protect the
// queue state.
//
// Arguments:
// gpuUuidTable: (OUTPUT)
// The return value is an array of physical GPU UUIDs. The array index
// is the corresponding gpuIndex. There can be at max 32 GPUs
// associated with UVM, so array size is 32.
//
// validCount: (OUTPUT)
// The system doesn't normally contain 32 GPUs. This field gives the
// count of entries that are valid in the returned gpuUuidTable.
//
// Error codes:
// NV_ERR_BUSY_RETRY:
// internal resources are blocked by other threads.
//
//------------------------------------------------------------------------------
NV_STATUS UvmEventGetGpuUuidTable(NvProcessorUuid *gpuUuidTable,
unsigned *validCount);
//------------------------------------------------------------------------------
// UvmEventFetch
//
// This call is used to fetch the queue entries in a user buffer.
//
// This API updates the queue state. Hence simultaneous calls to fetch/skip
// events should be avoided as that might corrupt the queue state.
//
// Arguments:
// sessionHandle: (INPUT)
// Handle to the debugging session.
//
// queueHandle: (INPUT)
// queue from where to fetch the events.
//
// pBuffer: (OUTPUT)
// Pointer to the buffer where the API will copy the events. User
// shall ensure the size is enough.
//
// nEntries: (INPUT/OUTPUT)
// It provides the maximum number of entries that will be fetched
// from the queue. If this number is larger than the size of the
// queue it will be internally capped to that value.
// As output it returns the actual number of entries copies to the
// buffer.
//
// Error codes:
// RM_ERR_NOT_PERMITTED:
// Function fails the security check.
//
// NV_ERR_INVALID_ARGUMENT:
// One of the arguments is invalid.
//
// NV_ERR_INVALID_INDEX:
// The indices of the queue have been corrupted.
//
// NV_ERR_BUFFER_TOO_SMALL:
// The event queue buffer provided by the caller was too small to
// contain all of the events that occurred during this run.
// Events were therefore dropped (not recorded).
// Please re-run with a larger buffer.
//
//------------------------------------------------------------------------------
NV_STATUS UvmEventFetch(UvmDebugSession sessionHandle,
UvmEventQueueHandle queueHandle,
UvmEventEntry *pBuffer,
NvU64 *nEntries);
//------------------------------------------------------------------------------
// UvmEventSkipAll
//
// This API drops all event entries from the queue.
//
// This API updates the queue state. Hence simultaneous calls to fetch/
// skip events should be avoided as that might corrupt the queue state.
//
// Arguments:
// sessionHandle: (INPUT)
// Handle to the debugging session.
//
// queueHandle: (INPUT)
// target queue.
//
// Error codes:
// RM_ERR_NOT_PERMITTED:
// Function fails the security check.
//
// NV_ERR_INVALID_ARGUMENT:
// One of the arguments is invalid.
//
//------------------------------------------------------------------------------
NV_STATUS UvmEventSkipAll(UvmDebugSession sessionHandle,
UvmEventQueueHandle queueHandle);
//------------------------------------------------------------------------------
// UvmEventQueryTimeStampType
//
// This API returns the type of time stamp used in an event entry for a given
// queue.
//
// This API reads constant data maintained in the queue state. Hence,
// the user doesn't need to acquire a lock to protect the queue state.
//
// Arguments:
// sessionHandle: (INPUT)
// Handle to the debugging session.
//
// queueHandle: (INPUT)
// target queue.
//
// timeStampType: (OUTPUT)
// type of time stamp used in event entry. See UvmEventTimestampType
// for supported types of time stamps.
//
// Error codes:
// RM_ERR_NOT_PERMITTED:
// Function fails the security check.
//
// NV_ERR_INVALID_ARGUMENT:
// One of the arguments is invalid.
//
//------------------------------------------------------------------------------
NV_STATUS UvmEventQueryTimeStampType(UvmDebugSession sessionHandle,
UvmEventQueueHandle queueHandle,
UvmEventTimeStampType *timeStampType);
//------------------------------------------------------------------------------
// UvmDebugAccessMemory
//
// This call can be used by the debugger to read/write memory range. UVM driver
// may not be aware of all the pages in this range. A bit per page is set by the
// driver if it is read/written by UVM.
//
// Arguments:
// session: (INPUT)
// Handle to the debugging session.
//
// baseAddress: (INPUT)
// base address from where memory is to be accessed
//
// sizeInBytes: (INPUT)
// Number of bytes to be accessed
//
// accessType: (INPUT)
// Read or write access request
//
// buffer: (INPUT/OUTPUT)
// This buffer would be read or written to by the driver.
// User needs to allocate a big enough buffer to fit sizeInBytes.
//
// isBitmaskSet: (INPUT/OUTPUT)
// Set to 1, if any field in bitmask is set
// NULL(INPUT) if unused
//
// bitmask: (INPUT/OUTPUT)
// One bit per page is set if UVM reads or writes to it.
// User should allocate a bitmask big enough to fit one bit per page
// covered by baseAddress + sizeInBytes:
// (baseAlignmentBytes + sizeInBytes + pageSize - 1)/pageSize number
// of bits.
// NULL(IN) if unused.
//
// Error codes:
// NV_ERR_INVALID_ARGUMENT:
// One of the arguments is invalid.
//
//------------------------------------------------------------------------------
NV_STATUS UvmDebugAccessMemory(UvmDebugSession session,
void *baseAddress,
NvU64 sizeInBytes,
UvmDebugAccessType accessType,
void *buffer,
NvBool *isBitmaskSet,
NvU64 *bitmask);
//
// Uvm Tools uvm API
//
//------------------------------------------------------------------------------
// UvmToolsCreateSession
//

View File

@@ -78,6 +78,10 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->non_replayable_faults_supported = true;
parent_gpu->access_counters_serialize_clear_ops_by_type = false;
parent_gpu->access_bits_supported = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;
@@ -96,5 +100,7 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->ats.no_ats_range_required = false;
parent_gpu->ats.gmmu_pt_depth0_init_required = false;
parent_gpu->conf_computing.per_channel_key_rotation = false;
}

View File

@@ -82,6 +82,8 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->non_replayable_faults_supported = true;
parent_gpu->access_counters_serialize_clear_ops_by_type = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;
@@ -94,16 +96,22 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->sparse_mappings_supported = true;
parent_gpu->access_bits_supported = false;
UVM_ASSERT(parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100);
if (parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA100 ||
parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA000)
parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA000) {
parent_gpu->map_remap_larger_page_promotion = true;
else
}
else {
parent_gpu->map_remap_larger_page_promotion = false;
}
parent_gpu->plc_supported = true;
parent_gpu->ats.no_ats_range_required = false;
parent_gpu->ats.gmmu_pt_depth0_init_required = false;
parent_gpu->conf_computing.per_channel_key_rotation = false;
}

View File

@@ -73,6 +73,7 @@ static NvU32 ce_aperture(uvm_aperture_t aperture)
return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB);
}
else {
UVM_ASSERT(uvm_aperture_is_peer(aperture));
return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) |
HWVALUE(C6B5, SET_SRC_PHYS_MODE, FLA, 0) |
HWVALUE(C6B5, SET_SRC_PHYS_MODE, PEER_ID, UVM_APERTURE_PEER_ID(aperture));

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2018-2024 NVIDIA Corporation
Copyright (c) 2018-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -111,8 +111,6 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
uvm_spin_loop_t spin;
NvU32 channel_faulted_mask = 0;
NvU32 clear_type_value = 0;
NvU32 doorbell_value = 0;
volatile NvU32 *doorbell_ptr;
UVM_ASSERT(!user_channel->gpu->parent->has_clear_faulted_channel_method);
@@ -129,12 +127,6 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
uvm_mmu_engine_type_string(fault->fault_source.mmu_engine_type));
}
doorbell_ptr = (NvU32 *)((NvU8 *)user_channel->runlist_pri_base_register + NV_RUNLIST_INTERNAL_DOORBELL);
// GFID is not required since we clear faulted channel with a SW method on
// SRIOV. On baremetal, GFID is always zero.
doorbell_value = HWVALUE(_RUNLIST, INTERNAL_DOORBELL, CHID, user_channel->hw_channel_id);
// Wait for the channel to have the FAULTED bit set as this can race with
// interrupt notification
UVM_SPIN_WHILE(!(UVM_GPU_READ_ONCE(*user_channel->chram_channel_register) & channel_faulted_mask), &spin);
@@ -143,7 +135,7 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
wmb();
UVM_GPU_WRITE_ONCE(*doorbell_ptr, doorbell_value);
UVM_GPU_WRITE_ONCE(*user_channel->work_submission_offset, user_channel->work_submission_token);
}
static NvU32 instance_ptr_aperture_type_to_hw_value(uvm_aperture_t aperture)

View File

@@ -601,7 +601,12 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
uvm_page_mask_zero(faults_serviced_mask);
uvm_page_mask_zero(reads_serviced_mask);
if (!(vma->vm_flags & VM_READ))
// If the VMA doesn't have read or write permissions then all faults are
// fatal so we exit early.
// TODO: Bug 5451843: This fix brings to light potential issues in the ATS
// fault handling path as described in the bug. Those need to be handled
// to avoid any potential permission issues.
if (!(vma->vm_flags & (VM_READ | VM_WRITE)))
return NV_OK;
if (!(vma->vm_flags & VM_WRITE)) {

View File

@@ -26,6 +26,7 @@
#include "uvm_gpu.h"
#include "uvm_mem.h"
#include "uvm_blackwell_fault_buffer.h"
#include "ctrl2080mc.h"
void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
{
@@ -81,6 +82,16 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->non_replayable_faults_supported = true;
parent_gpu->access_counters_serialize_clear_ops_by_type = parent_gpu->rm_info.accessCntrBufferCount == 2;
// TODO: Bug 5262806: Remove this WAR once the bug is fixed.
// Before this override, accessCntrBufferCount has only been used to
// determine the support for access counters in uvm_gpu.c and the statement
// above. After the HAL init, it is used for buffer allocations, and must
// not change its value.
if (parent_gpu->rm_info.accessCntrBufferCount > 1)
parent_gpu->rm_info.accessCntrBufferCount = 1;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;
@@ -99,6 +110,10 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->ats.no_ats_range_required = true;
parent_gpu->ats.gmmu_pt_depth0_init_required = parent_gpu->ats.non_pasid_ats_enabled;
parent_gpu->access_bits_supported = false;
// Blackwell has a physical translation prefetcher, meaning SW must assume
// that any physical ATS translation can be fetched at any time. The
// specific behavior and impact differs with non-PASID ATS support, but
@@ -142,16 +157,26 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
// TODO: Bug 5023085: this should be queried from RM instead of determined
// by UVM.
if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100 &&
parent_gpu->rm_info.gpuImplementation ==
NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B) {
parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B) {
parent_gpu->is_integrated_gpu = true;
parent_gpu->access_bits_supported = false;
// GB10B has sticky L2 coherent cache lines.
// For details, refer to the comments in uvm_gpu.h
// where this field is declared.
parent_gpu->sticky_l2_coherent_cache_lines = true;
}
if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 &&
parent_gpu->rm_info.gpuImplementation ==
NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B)
parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B) {
parent_gpu->is_integrated_gpu = true;
parent_gpu->access_bits_supported = false;
}
if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 &&
(parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB206 ||
parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB207)) {
// TODO: Bug 3186788 : As reported in Bug 5309034, GB206
// and GB207 experience a GSP crash with VAB. Depending
// on whether RM fixes it or marks it as cannot fix, the
// below checks can be removed or retained.
parent_gpu->access_bits_supported = false;
}
}

View File

@@ -0,0 +1,77 @@
/*******************************************************************************
Copyright (c) 2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_hal.h"
#include "uvm_global.h"
#include "uvm_push.h"
#include "uvm_mem.h"
#include "uvm_conf_computing.h"
bool uvm_hal_blackwell_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
if (uvm_gpu_address_is_peer(gpu, src)) {
UVM_ERR_PRINT("Peer copy from peer address (0x%llx) is not allowed!", src.address);
return false;
}
if (push->channel && uvm_gpu_address_is_peer(gpu, dst) && !uvm_channel_is_p2p(push->channel)) {
UVM_ERR_PRINT("Peer copy from address (0x%llx) to address (0x%llx) should use designated p2p channels!",
src.address,
dst.address);
return false;
}
if (g_uvm_global.conf_computing_enabled) {
// Blackwell+ GPUs support secure P2P. In that case, memcopy using
// physical addresses is valid.
if (!uvm_aperture_is_peer(dst.aperture)) {
// In Confidential Computing, if a non-p2p memcopy uses physical
// addressing for either the destination or the source, then the
// corresponding aperture must be vidmem. If virtual addressing
// is used, and the backing storage is sysmem the access is only
// legal if the copy type is NONPROT2NONPROT, but the validation
// does not detect it.
if (!src.is_virtual && (src.aperture != UVM_APERTURE_VID))
return false;
if (!dst.is_virtual && dst.aperture != UVM_APERTURE_VID)
return false;
}
// The source and destination must be both unprotected, for sysmem
// copy, or both protected for p2p copy.
if (dst.is_unprotected != src.is_unprotected)
return false;
}
if (!gpu->parent->ce_phys_vidmem_write_supported && !dst.is_virtual && dst.aperture == UVM_APERTURE_VID) {
UVM_ERR_PRINT("Destination address of vidmem memcopy must be virtual, not physical: {%s, 0x%llx}\n",
uvm_gpu_address_aperture_string(dst),
dst.address);
return false;
}
return true;
}

View File

@@ -352,9 +352,8 @@ void uvm_hal_blackwell_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t apert
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
NvU32 aperture_value;
if (!gpu->parent->is_integrated_gpu) {
if (!gpu->parent->is_integrated_gpu)
return uvm_hal_ampere_host_l2_invalidate(push, aperture);
}
switch (aperture) {
case UVM_APERTURE_SYS:
@@ -369,9 +368,9 @@ void uvm_hal_blackwell_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t apert
uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
// Flush dirty
NV_PUSH_4U(C96F, MEM_OP_A, 0,
MEM_OP_B, 0,
MEM_OP_C, 0,
MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, L2_FLUSH_DIRTY));
MEM_OP_B, 0,
MEM_OP_C, 0,
MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, L2_FLUSH_DIRTY));
// Invalidate
NV_PUSH_4U(C96F, MEM_OP_A, 0,
MEM_OP_B, 0,

View File

@@ -3140,13 +3140,25 @@ static void pick_ces_conf_computing(uvm_channel_manager_t *manager,
UVM_CHANNEL_TYPE_MEMOPS,
UVM_CHANNEL_TYPE_WLC };
static const uvm_channel_type_t types_p2p[] = { UVM_CHANNEL_TYPE_CPU_TO_GPU,
UVM_CHANNEL_TYPE_GPU_TO_CPU,
UVM_CHANNEL_TYPE_GPU_INTERNAL,
UVM_CHANNEL_TYPE_GPU_TO_GPU,
UVM_CHANNEL_TYPE_MEMOPS,
UVM_CHANNEL_TYPE_WLC };
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
pick_ces_for_channel_types(manager, ce_caps, types, ARRAY_SIZE(types), preferred_ce);
if (gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_UNSUPPORTED) {
pick_ces_for_channel_types(manager, ce_caps, types, ARRAY_SIZE(types), preferred_ce);
// Direct transfers between GPUs are disallowed in Confidential Computing,
// but the preferred CE is still set to an arbitrary value for consistency.
preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_GPU] = preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_CPU];
// If direct transfers between GPUs are disallowed, the preferred
// CE is still set to an arbitrary value for consistency.
preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_GPU] = preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_CPU];
}
else {
pick_ces_for_channel_types(manager, ce_caps, types_p2p, ARRAY_SIZE(types_p2p), preferred_ce);
}
best_wlc_ce = preferred_ce[UVM_CHANNEL_TYPE_WLC];

View File

@@ -37,6 +37,16 @@
#define TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU 1024
#define TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU_EMU 64
// It is unsafe to destroy the GPU's channel manager of an active uvm_gpu_t
// object. We sync trackers to avoid having any of the GPU's channels in any
// trackers. We can only guarantee that because in these tests, we only allow
// a single reference to the GPU.
static void channel_manager_destroy(uvm_gpu_t *gpu)
{
uvm_parent_gpu_sync_trackers(gpu->parent);
uvm_channel_manager_destroy(gpu->channel_manager);
}
// Schedule pushes one after another on all GPUs and channel types that copy and
// increment a counter into an adjacent memory location in a buffer. And then
// verify that all the values are correct on the CPU.
@@ -177,7 +187,7 @@ static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
uvm_channel_update_progress_all(channel);
TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE);
uvm_channel_manager_destroy(gpu->channel_manager);
channel_manager_destroy(gpu);
// Destruction will hit the error again, so clear one more time.
uvm_global_reset_fatal_error();
@@ -306,7 +316,7 @@ static NV_STATUS test_rc(uvm_va_space_t *va_space)
test_status = uvm_test_rc_for_gpu(gpu);
g_uvm_global.disable_fatal_error_assert = false;
uvm_channel_manager_destroy(gpu->channel_manager);
channel_manager_destroy(gpu);
create_status = uvm_channel_manager_create(gpu, &gpu->channel_manager);
TEST_NV_CHECK_RET(test_status);
@@ -355,7 +365,10 @@ static NV_STATUS uvm_test_iommu_rc_for_gpu(uvm_gpu_t *gpu)
cpu_ptr = uvm_mem_get_cpu_addr_kernel(sysmem);
sysmem_dma_addr = uvm_mem_gpu_address_physical(sysmem, gpu, 0, data_size);
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Test memset to IOMMU mapped sysmem");
status = uvm_push_begin(gpu->channel_manager,
UVM_CHANNEL_TYPE_GPU_TO_CPU,
&push,
"Test memset to IOMMU mapped sysmem");
TEST_NV_CHECK_GOTO(status, done);
gpu->parent->ce_hal->memset_8(&push, sysmem_dma_addr, 0, data_size);
@@ -497,14 +510,15 @@ static NV_STATUS test_iommu(uvm_va_space_t *va_space)
NV_STATUS test_status, create_status;
// The GPU channel manager is destroyed and then re-created after
// testing ATS RC fault, so this test requires exclusive access to the GPU.
// testing ATS RC fault, so this test requires exclusive access to the
// GPU.
TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
g_uvm_global.disable_fatal_error_assert = true;
test_status = uvm_test_iommu_rc_for_gpu(gpu);
g_uvm_global.disable_fatal_error_assert = false;
uvm_channel_manager_destroy(gpu->channel_manager);
channel_manager_destroy(gpu);
create_status = uvm_channel_manager_create(gpu, &gpu->channel_manager);
TEST_NV_CHECK_RET(test_status);
@@ -989,7 +1003,9 @@ static NV_STATUS test_channel_iv_rotation(uvm_va_space_t *va_space)
uvm_conf_computing_dma_buffer_t *cipher_text;
void *cipher_cpu_va, *plain_cpu_va, *tag_cpu_va;
uvm_gpu_address_t cipher_gpu_address, plain_gpu_address, tag_gpu_address;
uvm_channel_t *work_channel = uvm_channel_is_lcic(channel) ? uvm_channel_lcic_get_paired_wlc(channel) : channel;
uvm_channel_t *work_channel = uvm_channel_is_lcic(channel) ?
uvm_channel_lcic_get_paired_wlc(channel) :
channel;
plain_cpu_va = &status;
data_size = sizeof(status);
@@ -1037,8 +1053,8 @@ release:
if (status != NV_OK)
return status;
// All channels except SEC2 used at least a single IV to release tracking.
// SEC2 doesn't support decrypt direction.
// All channels except SEC2 used at least a single IV to release
// tracking. SEC2 doesn't support decrypt direction.
if (uvm_channel_is_sec2(channel))
TEST_CHECK_RET(before_rotation_dec == after_rotation_dec);
else
@@ -1557,7 +1573,7 @@ static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space
TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
gpu->uvm_test_force_upper_pushbuffer_segment = 1;
uvm_channel_manager_destroy(gpu->channel_manager);
channel_manager_destroy(gpu);
TEST_NV_CHECK_GOTO(uvm_channel_manager_create(gpu, &gpu->channel_manager), error);
gpu->uvm_test_force_upper_pushbuffer_segment = 0;

View File

@@ -157,7 +157,7 @@ void on_uvm_assert(void);
#define UVM_ASSERT_MSG_IGNORE(expr, fmt, ...) \
do { \
UVM_IGNORE_EXPR(expr); \
UVM_NO_PRINT(fmt, ##__VA_ARGS__); \
no_printk(fmt, ##__VA_ARGS__); \
} while (0)
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2019 NVIDIA Corporation
Copyright (c) 2019-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -27,8 +27,6 @@
#include "uvm_linux.h"
#include "uvm_forward_decl.h"
extern int uvm_enable_debug_procfs;
extern unsigned uvm_perf_map_remote_on_native_atomics_fault;
extern uvm_global_t g_uvm_global;

View File

@@ -98,6 +98,4 @@ typedef struct uvm_non_replayable_fault_buffer_struct uvm_non_replayable_fault_b
typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
typedef struct uvm_access_counter_buffer_struct uvm_access_counter_buffer_t;
typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
typedef struct uvm_reverse_map_struct uvm_reverse_map_t;
#endif //__UVM_FORWARD_DECL_H__

View File

@@ -146,12 +146,20 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
// Add the physical offset for peer mappings
if (uvm_aperture_is_peer(aperture)) {
if (uvm_parent_gpus_are_direct_connected(memory_mapping_gpu->parent, memory_owning_gpu->parent))
if (uvm_parent_gpus_are_nvlink_direct_connected(memory_mapping_gpu->parent, memory_owning_gpu->parent))
phys_offset += memory_owning_gpu->parent->peer_address_info.peer_gpa_memory_window_start;
else if (uvm_parent_gpus_are_nvswitch_connected(memory_mapping_gpu->parent, memory_owning_gpu->parent))
phys_offset += memory_owning_gpu->parent->nvswitch_info.fabric_memory_window_start;
}
// Add DMA offset for bar1 p2p.
if (uvm_aperture_is_sys(aperture) && !memory_info->sysmem) {
uvm_gpu_phys_address_t phys_address = uvm_gpu_peer_phys_address(memory_owning_gpu, memory_info->physAddr, memory_mapping_gpu);
UVM_ASSERT(uvm_aperture_is_sys(phys_address.aperture));
phys_offset += (phys_address.address - memory_info->physAddr);
}
for (index = 0; index < ext_mapping_info->numWrittenPtes; index++) {
pte = hal->make_pte(aperture,
@@ -159,6 +167,16 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
prot,
pte_flags);
if (pte != ext_mapping_info->pteBuffer[index * skip]) {
UVM_ERR_PRINT("PTE mismatch for %s->%s at %d (aperture: %s) %llx vs. %llx (address: %llx)\n",
uvm_parent_gpu_name(memory_mapping_gpu->parent),
uvm_parent_gpu_name(memory_owning_gpu->parent),
index,
uvm_aperture_string(aperture),
pte,
ext_mapping_info->pteBuffer[index * skip],
memory_info->physAddr);
}
TEST_CHECK_RET(pte == ext_mapping_info->pteBuffer[index * skip]);
phys_offset += page_size;

View File

@@ -42,24 +42,21 @@
#include "nv_uvm_interface.h"
uvm_global_t g_uvm_global;
static struct UvmOpsUvmEvents g_exported_uvm_ops;
static struct UvmEventsLinux g_exported_uvm_events;
static bool g_ops_registered = false;
static NV_STATUS uvm_register_callbacks(void)
{
NV_STATUS status = NV_OK;
g_exported_uvm_ops.suspend = uvm_suspend_entry;
g_exported_uvm_ops.resume = uvm_resume_entry;
g_exported_uvm_ops.startDevice = NULL;
g_exported_uvm_ops.stopDevice = NULL;
g_exported_uvm_ops.isrTopHalf = uvm_isr_top_half_entry;
g_exported_uvm_ops.drainP2P = uvm_suspend_and_drainP2P_entry;
g_exported_uvm_ops.resumeP2P = uvm_resumeP2P_entry;
g_exported_uvm_events.isrTopHalf = uvm_isr_top_half_entry;
g_exported_uvm_events.suspend = uvm_suspend_entry;
g_exported_uvm_events.resume = uvm_resume_entry;
g_exported_uvm_events.drainP2P = uvm_suspend_and_drainP2P_entry;
g_exported_uvm_events.resumeP2P = uvm_resumeP2P_entry;
// Register the UVM callbacks with the main GPU driver:
status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmCallbacks(&g_exported_uvm_ops));
status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmEvents(&g_exported_uvm_events));
if (status != NV_OK)
return status;
@@ -71,7 +68,7 @@ static NV_STATUS uvm_register_callbacks(void)
static void uvm_unregister_callbacks(void)
{
if (g_ops_registered) {
uvm_rm_locked_call_void(nvUvmInterfaceDeRegisterUvmOps());
uvm_rm_locked_call_void(nvUvmInterfaceDeRegisterUvmEvents());
g_ops_registered = false;
}
}

View File

@@ -408,14 +408,14 @@ static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent
uvm_gpu_t *gpu = NULL;
uvm_gpu_id_t gpu_id;
NvU32 sub_processor_index;
NvU32 cur_sub_processor_index;
NvU32 start_search_index;
UVM_ASSERT(parent_gpu);
gpu_id = uvm_gpu_id_from_parent_gpu_id(parent_gpu->id);
cur_sub_processor_index = cur_gpu ? uvm_id_sub_processor_index(cur_gpu->id) : -1;
start_search_index = cur_gpu ? uvm_id_sub_processor_index(cur_gpu->id) + 1 : 0;
sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS, cur_sub_processor_index + 1);
sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS, start_search_index);
if (sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS) {
gpu = uvm_gpu_get(uvm_id_from_value(uvm_id_value(gpu_id) + sub_processor_index));
UVM_ASSERT(gpu != NULL);

View File

@@ -44,6 +44,7 @@
#include "uvm_conf_computing.h"
#include "uvm_linux.h"
#include "uvm_mmu.h"
#include "uvm_kvmalloc.h"
#define UVM_PROC_GPUS_PEER_DIR_NAME "peers"
@@ -67,6 +68,8 @@ static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
switch (link_type) {
case UVM_LINK_TYPE_PCIE:
return UVM_GPU_LINK_PCIE;
case UVM_LINK_TYPE_PCIE_BAR1:
return UVM_GPU_LINK_PCIE_BAR1;
case UVM_LINK_TYPE_NVLINK_1:
return UVM_GPU_LINK_NVLINK_1;
case UVM_LINK_TYPE_NVLINK_2:
@@ -107,18 +110,18 @@ static void fill_parent_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo
}
parent_gpu->nvswitch_info.is_nvswitch_connected = gpu_info->connectedToSwitch;
parent_gpu->peer_address_info.is_direct_connected = gpu_info->nvlDirectConnect;
parent_gpu->peer_address_info.is_nvlink_direct_connected = gpu_info->nvlDirectConnect;
// nvswitch is routed via physical pages, where the upper 13-bits of the
// 47-bit address space holds the routing information for each peer.
// Currently, this is limited to a 16GB framebuffer window size.
if (parent_gpu->nvswitch_info.is_nvswitch_connected) {
if (parent_gpu->peer_address_info.is_nvlink_direct_connected) {
parent_gpu->peer_address_info.peer_gpa_memory_window_start = gpu_info->nvlDirectConnectMemoryWindowStart;
}
else if (parent_gpu->nvswitch_info.is_nvswitch_connected) {
// nvswitch is routed via physical pages, where the upper 13-bits of the
// 47-bit address space holds the routing information for each peer.
// Currently, this is limited to a 16GB framebuffer window size.
parent_gpu->nvswitch_info.fabric_memory_window_start = gpu_info->nvswitchMemoryWindowStart;
parent_gpu->nvswitch_info.egm_fabric_memory_window_start = gpu_info->nvswitchEgmMemoryWindowStart;
}
else if (parent_gpu->peer_address_info.is_direct_connected) {
parent_gpu->peer_address_info.peer_gpa_memory_window_start = gpu_info->nvlDirectConnectMemoryWindowStart;
}
parent_gpu->ats.non_pasid_ats_enabled = gpu_info->nonPasidAtsSupport;
@@ -533,11 +536,12 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
{
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 8);
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 9);
switch (link_type) {
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_INVALID);
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_PCIE);
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_PCIE_BAR1);
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_1);
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_2);
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_3);
@@ -666,14 +670,14 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
gpu->parent->isr.access_counters[i].stats.cpu_exec_count[cpu]);
}
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_buffer_entries %u\n",
gpu->parent->access_counter_buffer[i].max_notifications);
gpu->parent->access_counters.buffer[i].max_notifications);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_cached_get %u\n",
gpu->parent->access_counter_buffer[i].cached_get);
gpu->parent->access_counters.buffer[i].cached_get);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_cached_put %u\n",
gpu->parent->access_counter_buffer[i].cached_put);
gpu->parent->access_counters.buffer[i].cached_put);
get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferGet);
put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferPut);
get = UVM_GPU_READ_ONCE(*gpu->parent->access_counters.buffer[i].rm_info.pAccessCntrBufferGet);
put = UVM_GPU_READ_ONCE(*gpu->parent->access_counters.buffer[i].rm_info.pAccessCntrBufferPut);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_get %u\n", get);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_put %u\n", put);
@@ -766,10 +770,10 @@ static void gpu_access_counters_print_common(uvm_parent_gpu_t *parent_gpu, struc
UVM_ASSERT(uvm_procfs_is_debug_enabled());
// procfs_files are created before gpu_init_isr, we need to check if the
// access_counter_buffer is allocated.
if (parent_gpu->access_counter_buffer) {
// access_counters.buffer is allocated.
if (parent_gpu->access_counters.buffer) {
for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++) {
uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[i];
uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counters.buffer[i];
num_pages_out = atomic64_read(&access_counters->stats.num_pages_out);
num_pages_in = atomic64_read(&access_counters->stats.num_pages_in);
@@ -885,6 +889,19 @@ static uvm_aperture_t parent_gpu_peer_aperture(uvm_parent_gpu_t *local,
else
peer_index = 1;
if (parent_peer_caps->link_type == UVM_GPU_LINK_PCIE_BAR1) {
// UVM_APERTURE_SYS can be used if either the local (accessing) GPU
// _DOES NOT_ use PCIE atomics, or the remote (owning) GPU _DOES_
// accept PCIE atomics. Moreover, the bus topology needs to support
// routing of PCIe atomics between the devices.
//
// If either of the above conditions is not met we need to use
// UVM_APERTURE_SYS_NON_COHERENT to prevent use of PCIe atomics.
// RM provides the consolidated information in P2P properties.
const bool enable_atomics = parent_peer_caps->bar1_p2p_pcie_atomics_enabled[peer_index];
return enable_atomics ? UVM_APERTURE_SYS : UVM_APERTURE_SYS_NON_COHERENT;
}
return UVM_APERTURE_PEER(parent_peer_caps->peer_ids[peer_index]);
}
@@ -1164,6 +1181,22 @@ static void deinit_semaphore_pools(uvm_gpu_t *gpu)
uvm_gpu_semaphore_pool_destroy(gpu->secure_semaphore_pool);
}
static void init_access_counters_serialize_clear_tracker(uvm_parent_gpu_t *parent)
{
NvU32 i;
for (i = 0; i < UVM_ACCESS_COUNTER_CLEAR_OP_COUNT; i++)
uvm_tracker_init(&parent->access_counters.serialize_clear_tracker[i]);
}
static void deinit_access_counters_serialize_clear_tracker(uvm_parent_gpu_t *parent)
{
NvU32 i;
for (i = 0; i < UVM_ACCESS_COUNTER_CLEAR_OP_COUNT; i++)
uvm_tracker_deinit(&parent->access_counters.serialize_clear_tracker[i]);
}
static NV_STATUS find_unused_gpu_id(uvm_parent_gpu_t *parent_gpu, uvm_gpu_id_t *out_id)
{
NvU32 i;
@@ -1209,9 +1242,11 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
uvm_uuid_copy(&parent_gpu->uuid, gpu_uuid);
uvm_sema_init(&parent_gpu->isr.replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
uvm_sema_init(&parent_gpu->isr.non_replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
uvm_mutex_init(&parent_gpu->access_counters_enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS);
uvm_mutex_init(&parent_gpu->access_counters_clear_tracker_lock, UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS);
uvm_tracker_init(&parent_gpu->access_counters_clear_tracker);
uvm_mutex_init(&parent_gpu->access_counters.enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS);
uvm_mutex_init(&parent_gpu->access_counters.clear_tracker_lock, UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS);
uvm_mutex_init(&parent_gpu->access_counters.serialize_clear_lock, UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS);
uvm_tracker_init(&parent_gpu->access_counters.clear_tracker);
init_access_counters_serialize_clear_tracker(parent_gpu);
uvm_spin_lock_irqsave_init(&parent_gpu->isr.interrupts_lock, UVM_LOCK_ORDER_LEAF);
uvm_spin_lock_init(&parent_gpu->instance_ptr_table_lock, UVM_LOCK_ORDER_LEAF);
uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
@@ -1229,7 +1264,8 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
return NV_OK;
cleanup:
uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker);
uvm_tracker_deinit(&parent_gpu->access_counters.clear_tracker);
deinit_access_counters_serialize_clear_tracker(parent_gpu);
uvm_kvfree(parent_gpu);
return status;
@@ -1686,24 +1722,41 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
}
// Sync the access counter clear tracker too.
if (parent_gpu->access_counters_supported && parent_gpu->access_counter_buffer) {
uvm_mutex_lock(&parent_gpu->access_counters_clear_tracker_lock);
status = uvm_tracker_wait(&parent_gpu->access_counters_clear_tracker);
uvm_mutex_unlock(&parent_gpu->access_counters_clear_tracker_lock);
if (parent_gpu->access_counters_supported && parent_gpu->access_counters.buffer) {
uvm_mutex_lock(&parent_gpu->access_counters.clear_tracker_lock);
status = uvm_tracker_wait(&parent_gpu->access_counters.clear_tracker);
uvm_mutex_unlock(&parent_gpu->access_counters.clear_tracker_lock);
if (status != NV_OK)
UVM_ASSERT(status == uvm_global_get_status());
if (parent_gpu->access_counters_serialize_clear_ops_by_type) {
uvm_access_counter_clear_op_t op;
uvm_mutex_lock(&parent_gpu->access_counters.serialize_clear_lock);
for (op = 0; op < UVM_ACCESS_COUNTER_CLEAR_OP_COUNT; op++) {
status = uvm_tracker_wait(&parent_gpu->access_counters.serialize_clear_tracker[op]);
if (status != NV_OK)
UVM_ASSERT(status == uvm_global_get_status());
}
uvm_mutex_unlock(&parent_gpu->access_counters.serialize_clear_lock);
}
}
}
void uvm_parent_gpu_sync_trackers(uvm_parent_gpu_t *parent_gpu)
{
sync_parent_gpu_trackers(parent_gpu,
parent_gpu->isr.replayable_faults.handling,
parent_gpu->isr.non_replayable_faults.handling);
}
// Remove all references the given GPU has to other GPUs, since one of those
// other GPUs is getting removed. This involves waiting for any unfinished
// trackers contained by this GPU.
static void remove_gpus_from_gpu(uvm_gpu_t *gpu)
{
sync_parent_gpu_trackers(gpu->parent,
gpu->parent->isr.replayable_faults.handling,
gpu->parent->isr.non_replayable_faults.handling);
uvm_parent_gpu_sync_trackers(gpu->parent);
// Sync all trackers in PMM
uvm_pmm_gpu_sync(&gpu->pmm);
@@ -1713,7 +1766,7 @@ static void remove_gpus_from_gpu(uvm_gpu_t *gpu)
}
// Remove all references to the given GPU from its parent, since it is being
// removed. This involves waiting for any unfinished trackers contained
// removed. This involves waiting for any unfinished trackers contained
// by the parent GPU.
static void remove_gpu_from_parent_gpu(uvm_gpu_t *gpu)
{
@@ -1823,7 +1876,8 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)
for_each_sub_processor_index(sub_processor_index)
UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);
uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker);
uvm_tracker_deinit(&parent_gpu->access_counters.clear_tracker);
deinit_access_counters_serialize_clear_tracker(parent_gpu);
uvm_kvfree(parent_gpu);
}
@@ -1960,7 +2014,7 @@ static void update_stats_migration_cb(uvm_va_space_t *va_space,
}
else if (is_access_counter) {
NvU32 index = event_data->migration.access_counters_buffer_index;
atomic64_add(pages, &gpu_dst->parent->access_counter_buffer[index].stats.num_pages_in);
atomic64_add(pages, &gpu_dst->parent->access_counters.buffer[index].stats.num_pages_in);
}
}
if (gpu_src) {
@@ -1973,7 +2027,7 @@ static void update_stats_migration_cb(uvm_va_space_t *va_space,
}
else if (is_access_counter) {
NvU32 index = event_data->migration.access_counters_buffer_index;
atomic64_add(pages, &gpu_src->parent->access_counter_buffer[index].stats.num_pages_out);
atomic64_add(pages, &gpu_src->parent->access_counters.buffer[index].stats.num_pages_out);
}
}
}
@@ -2114,11 +2168,19 @@ bool uvm_parent_gpus_are_nvswitch_connected(const uvm_parent_gpu_t *parent_gpu0,
return false;
}
bool uvm_parent_gpus_are_direct_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1)
bool uvm_parent_gpus_are_bar1_peers(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1)
{
if (parent_gpu0 != parent_gpu1)
return parent_gpu_peer_caps(parent_gpu0, parent_gpu1)->link_type == UVM_GPU_LINK_PCIE_BAR1;
return false;
}
bool uvm_parent_gpus_are_nvlink_direct_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1)
{
if (parent_gpu0 != parent_gpu1 &&
parent_gpu0->peer_address_info.is_direct_connected &&
parent_gpu1->peer_address_info.is_direct_connected)
parent_gpu0->peer_address_info.is_nvlink_direct_connected &&
parent_gpu1->peer_address_info.is_nvlink_direct_connected)
return true;
return false;
@@ -2419,6 +2481,17 @@ static NV_STATUS parent_peers_init(uvm_parent_gpu_t *parent_gpu0,
parent_peer_caps->optimalNvlinkWriteCEs[0] = p2p_caps_params.optimalNvlinkWriteCEs[0];
parent_peer_caps->optimalNvlinkWriteCEs[1] = p2p_caps_params.optimalNvlinkWriteCEs[1];
// Set IOMMU/DMA mappings for bar1 p2p
parent_peer_caps->bar1_p2p_dma_base_address[0] = p2p_caps_params.bar1DmaAddress[0];
parent_peer_caps->bar1_p2p_dma_base_address[1] = p2p_caps_params.bar1DmaAddress[1];
parent_peer_caps->bar1_p2p_dma_size[0] = p2p_caps_params.bar1DmaSize[0];
parent_peer_caps->bar1_p2p_dma_size[1] = p2p_caps_params.bar1DmaSize[1];
parent_peer_caps->bar1_p2p_pcie_atomics_enabled[0] = p2p_caps_params.bar1PcieAtomics[0];
parent_peer_caps->bar1_p2p_pcie_atomics_enabled[1] = p2p_caps_params.bar1PcieAtomics[1];
if (parent_peer_caps->bar1_p2p_dma_size[0] || parent_peer_caps->bar1_p2p_dma_size[1])
UVM_ASSERT(link_type == UVM_GPU_LINK_PCIE_BAR1);
return NV_OK;
cleanup:
@@ -2563,7 +2636,7 @@ static void peers_release(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
peers_destroy(gpu0, gpu1, peer_caps);
}
static void parent_peers_destroy_nvlink(uvm_parent_gpu_t *parent_gpu)
static void parent_peers_destroy_static_link(uvm_parent_gpu_t *parent_gpu)
{
uvm_parent_gpu_t *other_parent_gpu;
@@ -2585,7 +2658,7 @@ static void parent_peers_destroy_nvlink(uvm_parent_gpu_t *parent_gpu)
}
}
static NV_STATUS parent_peers_discover_nvlink(uvm_parent_gpu_t *parent_gpu)
static NV_STATUS parent_peers_discover_static_link(uvm_parent_gpu_t *parent_gpu)
{
uvm_parent_gpu_t *other_parent_gpu;
NV_STATUS status;
@@ -2617,12 +2690,12 @@ static NV_STATUS parent_peers_discover_nvlink(uvm_parent_gpu_t *parent_gpu)
return NV_OK;
cleanup:
parent_peers_destroy_nvlink(parent_gpu);
parent_peers_destroy_static_link(parent_gpu);
return status;
}
static void peers_destroy_nvlink(uvm_gpu_t *gpu)
static void peers_destroy_static_link(uvm_gpu_t *gpu)
{
uvm_parent_gpu_t *other_parent_gpu;
uvm_parent_gpu_t *parent_gpu;
@@ -2656,7 +2729,7 @@ static void peers_destroy_nvlink(uvm_gpu_t *gpu)
}
}
static NV_STATUS peers_discover_nvlink(uvm_gpu_t *gpu)
static NV_STATUS peers_discover_static_link(uvm_gpu_t *gpu)
{
uvm_parent_gpu_t *parent_gpu = gpu->parent;
uvm_parent_gpu_t *other_parent_gpu;
@@ -2688,11 +2761,26 @@ static NV_STATUS peers_discover_nvlink(uvm_gpu_t *gpu)
return NV_OK;
cleanup:
peers_destroy_nvlink(gpu);
peers_destroy_static_link(gpu);
return status;
}
static NV_STATUS uvm_gpu_init_access_bits(uvm_parent_gpu_t *parent_gpu)
{
return uvm_rm_locked_call(nvUvmInterfaceAccessBitsBufAlloc(parent_gpu->rm_device, &parent_gpu->vab_info));
}
static NV_STATUS uvm_gpu_update_access_bits(uvm_parent_gpu_t *parent_gpu, UVM_ACCESS_BITS_DUMP_MODE mode)
{
return nvUvmInterfaceAccessBitsDump(parent_gpu->rm_device, &parent_gpu->vab_info, mode);
}
static NV_STATUS uvm_gpu_deinit_access_bits(uvm_parent_gpu_t *parent_gpu)
{
return uvm_rm_locked_call(nvUvmInterfaceAccessBitsBufFree(parent_gpu->rm_device, &parent_gpu->vab_info));
}
// Remove a gpu and unregister it from RM
// Note that this is also used in most error paths in add_gpu()
static void remove_gpu(uvm_gpu_t *gpu)
@@ -2700,6 +2788,7 @@ static void remove_gpu(uvm_gpu_t *gpu)
NvU32 sub_processor_index;
uvm_parent_gpu_t *parent_gpu;
bool free_parent;
NV_STATUS status;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
@@ -2716,12 +2805,17 @@ static void remove_gpu(uvm_gpu_t *gpu)
free_parent = (parent_gpu->num_retained_gpus == 0);
if (free_parent && parent_gpu->access_bits_supported) {
status = uvm_gpu_deinit_access_bits(parent_gpu);
UVM_ASSERT(status == NV_OK);
}
// NVLINK peers must be removed and the relevant access counter buffers must
// be flushed before removing this GPU from the global table.
peers_destroy_nvlink(gpu);
peers_destroy_static_link(gpu);
if (free_parent)
parent_peers_destroy_nvlink(parent_gpu);
parent_peers_destroy_static_link(parent_gpu);
// uvm_mem_free and other uvm_mem APIs invoked by the Confidential Compute
// deinitialization must be called before the GPU is removed from the global
@@ -2865,21 +2959,27 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
if (alloc_parent) {
status = parent_peers_discover_nvlink(parent_gpu);
status = parent_peers_discover_static_link(parent_gpu);
if (status != NV_OK)
goto error_retained;
}
status = peers_discover_nvlink(gpu);
status = peers_discover_static_link(gpu);
if (status != NV_OK)
goto error_retained;
*gpu_out = gpu;
if (alloc_parent && parent_gpu->access_bits_supported) {
status = uvm_gpu_init_access_bits(parent_gpu);
if (status != NV_OK)
goto error_retained;
}
return NV_OK;
error_retained:
UVM_ERR_PRINT("Failed to discover NVLINK peers: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
UVM_ERR_PRINT("Failed to discover NVLINK/BAR1 peers: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
// Nobody can have retained the GPU yet, since we still hold the
// global lock.
@@ -2933,10 +3033,6 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
if (status != NV_OK)
goto error_unregister;
// TODO: Bug 5262806: Remove this WAR once the bug is fixed.
if (gpu_info->accessCntrBufferCount > 1)
gpu_info->accessCntrBufferCount = 1;
if (parent_gpu != NULL) {
// If the UUID has been seen before, and if SMC is enabled, then check
// if this specific partition has been seen previously. The UUID-based
@@ -3082,10 +3178,25 @@ uvm_gpu_phys_address_t uvm_gpu_peer_phys_address(uvm_gpu_t *owning_gpu, NvU64 ad
{
uvm_aperture_t aperture = uvm_gpu_peer_aperture(accessing_gpu, owning_gpu);
if (uvm_parent_gpus_are_direct_connected(accessing_gpu->parent, owning_gpu->parent))
if (uvm_parent_gpus_are_nvlink_direct_connected(accessing_gpu->parent, owning_gpu->parent)) {
UVM_ASSERT(uvm_aperture_is_peer(aperture));
address += owning_gpu->parent->peer_address_info.peer_gpa_memory_window_start;
else if (uvm_parent_gpus_are_nvswitch_connected(accessing_gpu->parent, owning_gpu->parent))
}
else if (uvm_parent_gpus_are_nvswitch_connected(accessing_gpu->parent, owning_gpu->parent)) {
UVM_ASSERT(uvm_aperture_is_peer(aperture));
address += owning_gpu->parent->nvswitch_info.fabric_memory_window_start;
}
else if (uvm_aperture_is_sys(aperture)) {
// BAR1 P2P can use either coherent or non-coherent sysmem,
// depending on atomic capabilities of the peer devices.
uvm_parent_gpu_peer_t *parent_peer_caps = parent_gpu_peer_caps(accessing_gpu->parent, owning_gpu->parent);
int peer_index = (uvm_id_cmp(accessing_gpu->id, owning_gpu->id) < 0) ? 0 : 1;
UVM_ASSERT(parent_peer_caps->link_type == UVM_GPU_LINK_PCIE_BAR1);
UVM_ASSERT(parent_peer_caps->bar1_p2p_dma_size[peer_index] != 0);
address += parent_peer_caps->bar1_p2p_dma_base_address[peer_index];
}
return uvm_gpu_phys_address(aperture, address);
}
@@ -3134,6 +3245,69 @@ NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
return gpu_peer_caps(gpu0, gpu1)->ref_count;
}
static bool gpu_address_is_coherent_peer(uvm_gpu_t *gpu, uvm_gpu_phys_address_t address)
{
bool is_peer = false;
uvm_parent_gpu_t *parent_gpu;
phys_addr_t phys_addr;
if (address.aperture != UVM_APERTURE_SYS)
return false;
// GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
// either inline, or via ATS.
phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
// Exposed coherent vidmem can be accessed via sys aperture even without
// GPUs being explicit peers, so each parent GPU is a potential peer.
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
for_each_parent_gpu(parent_gpu) {
if (parent_gpu == gpu->parent)
continue;
if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
phys_addr <= parent_gpu->system_bus.memory_window_end) {
is_peer = true;
break;
}
}
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
return is_peer;
}
static bool gpu_phys_address_is_bar1p2p_peer(uvm_gpu_t *gpu, uvm_gpu_phys_address_t address)
{
bool is_peer = false;
uvm_parent_processor_mask_t peer_parent_gpus;
uvm_parent_gpu_t *peer_parent_gpu;
// BAR1 P2P is accessed via sys aperture
if (!uvm_aperture_is_sys(address.aperture))
return false;
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
uvm_parent_gpus_from_processor_mask(&peer_parent_gpus, &gpu->peer_info.peer_gpu_mask);
for_each_parent_gpu_in_mask(peer_parent_gpu, &peer_parent_gpus) {
const uvm_parent_gpu_peer_t *peer_caps = parent_gpu_peer_caps(gpu->parent, peer_parent_gpu);
const int peer_index = (uvm_parent_id_cmp(gpu->parent->id, peer_parent_gpu->id) < 0) ? 0 : 1;
UVM_ASSERT(peer_caps->ref_count > 0);
if (peer_caps->link_type != UVM_GPU_LINK_PCIE_BAR1)
continue;
if (address.address >= peer_caps->bar1_p2p_dma_base_address[peer_index] &&
address.address < (peer_caps->bar1_p2p_dma_base_address[peer_index] + peer_caps->bar1_p2p_dma_size[peer_index])) {
is_peer = true;
break;
}
}
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
return is_peer;
}
bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
{
if (address.is_virtual) {
@@ -3145,21 +3319,18 @@ bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
}
}
else {
uvm_parent_gpu_t *parent_gpu;
phys_addr_t phys_addr;
if (uvm_aperture_is_peer(address.aperture)) {
uvm_parent_processor_mask_t parent_gpus;
uvm_parent_gpu_t *parent_peer_gpu;
uvm_parent_processor_mask_t peer_parent_gpus;
uvm_parent_gpu_t *peer_parent_gpu;
// Local EGM accesses don't go over NVLINK
if (gpu->parent->egm.enabled && address.aperture == gpu->parent->egm.local_peer_id)
return false;
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
uvm_parent_gpus_from_processor_mask(&parent_gpus, &gpu->peer_info.peer_gpu_mask);
for_each_parent_gpu_in_mask(parent_peer_gpu, &parent_gpus) {
if (!parent_peer_gpu->egm.enabled)
uvm_parent_gpus_from_processor_mask(&peer_parent_gpus, &gpu->peer_info.peer_gpu_mask);
for_each_parent_gpu_in_mask(peer_parent_gpu, &peer_parent_gpus) {
if (!peer_parent_gpu->egm.enabled)
continue;
// EGM uses peer IDs but they are different from VIDMEM peer
@@ -3171,32 +3342,18 @@ bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
// when accessing EGM memory
// TODO: Bug: 5007527 [UVM] Extend STO recovery to EGM enabled
// systems
UVM_ASSERT(address.aperture != uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu));
UVM_ASSERT(address.aperture != uvm_gpu_egm_peer_aperture(gpu->parent, peer_parent_gpu));
}
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
return true;
} else if (address.aperture == UVM_APERTURE_SYS) {
bool is_peer = false;
// GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
// either inline, or via ATS.
phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
// Exposed coherent vidmem can be accessed via sys aperture
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
for_each_parent_gpu(parent_gpu) {
if (parent_gpu == gpu->parent)
continue;
if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
phys_addr <= parent_gpu->system_bus.memory_window_end) {
is_peer = true;
}
}
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
return is_peer;
}
else if (uvm_aperture_is_sys(address.aperture)) {
// SYS aperture is used for coherent peers or BAR1 P2P.
// SYS_NON_COHERNET aperture is used for BAR1 P2P.
uvm_gpu_phys_address_t phys_addr = uvm_gpu_phys_address(address.aperture, address.address);
return gpu_address_is_coherent_peer(gpu, phys_addr) || gpu_phys_address_is_bar1p2p_peer(gpu, phys_addr);
}
UVM_ASSERT(address.aperture == UVM_APERTURE_VID);
@@ -3927,3 +4084,50 @@ NV_STATUS uvm_test_get_gpu_time(UVM_TEST_GET_GPU_TIME_PARAMS *params, struct fil
return status;
}
NV_STATUS uvm_test_dump_access_bits(UVM_TEST_DUMP_ACCESS_BITS_PARAMS *params, struct file *filp)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);
uvm_gpu_t *gpu = NULL;
NV_STATUS status = NV_OK;
NvU64 granularity_size_kb = 0;
gpu = uvm_va_space_retain_gpu_by_uuid(va_space, &params->gpu_uuid);
if (!gpu || !gpu->parent->access_bits_supported) {
status = NV_ERR_INVALID_DEVICE;
goto done;
}
if (!gpu->parent->vab_info.accessBitsBufferHandle) {
status = NV_ERR_INVALID_STATE;
goto done;
}
// See resman/interface/rmapi/finn/ctrl/ctrlc763.finn for 'granularity' enum values
granularity_size_kb = (NvU64)(64) << gpu->parent->vab_info.granularity;
params->granularity_size_kb = granularity_size_kb;
status = uvm_gpu_update_access_bits(gpu->parent, params->mode);
if (status != NV_OK)
goto done;
// If this is a length query, we are done after we set the length
if (params->current_bits_length == 0) {
params->current_bits_length = ARRAY_SIZE(gpu->parent->vab_info.currentBits);
goto done;
}
// Copy the bits to user space
if (copy_to_user(params->current_bits,
gpu->parent->vab_info.currentBits,
sizeof(NvU64) * params->current_bits_length)) {
status = NV_ERR_INVALID_ADDRESS;
goto done;
}
done:
if (gpu)
uvm_gpu_release(gpu);
return status;
}

View File

@@ -615,6 +615,7 @@ typedef enum
{
UVM_GPU_LINK_INVALID = 0,
UVM_GPU_LINK_PCIE,
UVM_GPU_LINK_PCIE_BAR1,
UVM_GPU_LINK_NVLINK_1,
UVM_GPU_LINK_NVLINK_2,
UVM_GPU_LINK_NVLINK_3,
@@ -996,6 +997,9 @@ struct uvm_parent_gpu_struct
// Total amount of physical memory available on the parent GPU.
NvU64 max_allocatable_address;
// Access bits buffer information
UvmGpuAccessBitsBufferAlloc vab_info;
#if UVM_IS_CONFIG_HMM() || defined(NV_MEMORY_DEVICE_COHERENT_PRESENT)
uvm_pmm_gpu_devmem_t *devmem;
#endif
@@ -1069,6 +1073,11 @@ struct uvm_parent_gpu_struct
bool access_counters_supported;
// True when HW does not allow mixing different clear types concurrently.
bool access_counters_serialize_clear_ops_by_type;
bool access_bits_supported;
bool fault_cancel_va_supported;
// True if the GPU has hardware support for scoped atomics
@@ -1209,15 +1218,25 @@ struct uvm_parent_gpu_struct
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
nv_kthread_q_t lazy_free_q;
// This is only valid if supports_access_counters is set to true. This array
// has rm_info.accessCntrBufferCount entries.
uvm_access_counter_buffer_t *access_counter_buffer;
uvm_mutex_t access_counters_enablement_lock;
struct
{
// This is only valid if supports_access_counters is set to true. This
// array has rm_info.accessCntrBufferCount entries.
uvm_access_counter_buffer_t *buffer;
uvm_mutex_t enablement_lock;
// Tracker used to aggregate access counters clear operations, needed for
// GPU removal. It is only used when supports_access_counters is set.
uvm_tracker_t access_counters_clear_tracker;
uvm_mutex_t access_counters_clear_tracker_lock;
// Tracker used to aggregate access counters clear operations, needed
// for GPU removal. It is used when supports_access_counters is set.
uvm_tracker_t clear_tracker;
uvm_mutex_t clear_tracker_lock;
// The following access_counters fields are used when
// access_counters_serialize_clear_ops_by_type is set.
// The serialize_clear_tracker is not the common case, its use is
// decoupled from the clear_tracker (above.)
uvm_tracker_t serialize_clear_tracker[UVM_ACCESS_COUNTER_CLEAR_OP_COUNT];
uvm_mutex_t serialize_clear_lock;
} access_counters;
// Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
NvU32 utlb_per_gpc_count;
@@ -1335,6 +1354,14 @@ struct uvm_parent_gpu_struct
// only affects ATS systems.
bool no_ats_range_required : 1;
// Page tree initialization requires the initialization of the entire
// depth-0 allocated area, not only the HW supported entry count range.
// The GMMU page table walk cache operates at its own CL granularity
// (32B). We must have an allocated depth-0 page table of at least this
// size, regardless of how many entries are supported by HW.
// The allocation size is determined by MMU HAL allocation_size().
bool gmmu_pt_depth0_init_required : 1;
// See the comments on uvm_dma_map_invalidation_t
uvm_dma_map_invalidation_t dma_map_invalidation;
@@ -1371,7 +1398,7 @@ struct uvm_parent_gpu_struct
struct
{
// Is the GPU directly connected to peer GPUs.
bool is_direct_connected;
bool is_nvlink_direct_connected;
// 48-bit fabric memory physical offset that peer gpus need in order
// access to be rounted to the correct peer.
@@ -1467,6 +1494,22 @@ typedef struct
// iff max(gpu_id_1, gpu_id_2) is EGM-enabled.
NvU8 egm_peer_ids[2];
// IOMMU/DMA mappings of the peer vidmem via bar1. Access to this window
// are routed to peer GPU vidmem. The values are provided by RM and RM is
// responsible for creating IOMMU mappings if such mappings are required.
// RM is also responsible for querying PCIe bus topology and determining
// if PCIe atomics are supported between the peers.
// These fields are valid for link type UVM_GPU_LINK_PCIE_BAR1, and the
// address is only valid if size > 0.
// bar1_p2p_dma_base_address[i] provides DMA window used by GPU[i] to
// access bar1 region of GPU[1-i].
NvU64 bar1_p2p_dma_base_address[2];
NvU64 bar1_p2p_dma_size[2];
// True if GPU[i] can use PCIe atomic operations when accessing BAR1
// region of GPU[i-1].
bool bar1_p2p_pcie_atomics_enabled[2];
// The link type between the peer parent GPUs, currently either PCIe or
// NVLINK.
uvm_gpu_link_type_t link_type;
@@ -1580,9 +1623,10 @@ static NvU64 uvm_gpu_retained_count(uvm_gpu_t *gpu)
// Decrease the refcount on the parent GPU object, and actually delete the
// object if the refcount hits zero.
void uvm_parent_gpu_kref_put(uvm_parent_gpu_t *gpu);
void uvm_parent_gpu_kref_put(uvm_parent_gpu_t *parent_gpu);
// Returns a GPU peer pair index in the range [0 .. UVM_MAX_UNIQUE_GPU_PAIRS).
// waiting for any unfinished trackers contained by the parent GPU.
void uvm_parent_gpu_sync_trackers(uvm_parent_gpu_t *parent_gpu);
static bool uvm_parent_gpu_supports_full_coherence(uvm_parent_gpu_t *parent_gpu)
{
@@ -1591,6 +1635,7 @@ static bool uvm_parent_gpu_supports_full_coherence(uvm_parent_gpu_t *parent_gpu)
return parent_gpu->is_integrated_gpu;
}
// Returns a GPU peer pair index in the range [0 .. UVM_MAX_UNIQUE_GPU_PAIRS).
NvU32 uvm_gpu_pair_index(const uvm_gpu_id_t id0, const uvm_gpu_id_t id1);
// Either retains an existing PCIe peer entry or creates a new one. In both
@@ -1633,7 +1678,9 @@ uvm_aperture_t uvm_gpu_egm_peer_aperture(uvm_parent_gpu_t *local_gpu, uvm_parent
bool uvm_parent_gpus_are_nvswitch_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);
bool uvm_parent_gpus_are_direct_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);
bool uvm_parent_gpus_are_bar1_peers(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);
bool uvm_parent_gpus_are_nvlink_direct_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);
static bool uvm_gpus_are_smc_peers(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
{
@@ -1700,7 +1747,7 @@ static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_
// Check whether the provided address points to peer memory:
// * Physical address using one of the PEER apertures
// * Physical address using SYS aperture that belongs to an exposed coherent
// memory
// memory, or a BAR1 P2P address
// * Virtual address in the region [peer_va_base, peer_va_base + peer_va_size)
bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address);

View File

@@ -126,15 +126,15 @@ static uvm_access_counter_buffer_t *parent_gpu_access_counter_buffer_get(uvm_par
{
UVM_ASSERT(parent_gpu->access_counters_supported);
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
UVM_ASSERT(parent_gpu->access_counter_buffer);
UVM_ASSERT(parent_gpu->access_counters.buffer);
return &parent_gpu->access_counter_buffer[notif_buf_index];
return &parent_gpu->access_counters.buffer[notif_buf_index];
}
static uvm_access_counter_buffer_t *parent_gpu_access_counter_buffer_get_or_null(uvm_parent_gpu_t *parent_gpu,
NvU32 notif_buf_index)
{
if (parent_gpu->access_counter_buffer)
if (parent_gpu->access_counters.buffer)
return parent_gpu_access_counter_buffer_get(parent_gpu, notif_buf_index);
return NULL;
}
@@ -221,9 +221,70 @@ static NV_STATUS parent_gpu_clear_tracker_wait(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status;
uvm_mutex_lock(&parent_gpu->access_counters_clear_tracker_lock);
status = uvm_tracker_wait(&parent_gpu->access_counters_clear_tracker);
uvm_mutex_unlock(&parent_gpu->access_counters_clear_tracker_lock);
uvm_mutex_lock(&parent_gpu->access_counters.clear_tracker_lock);
status = uvm_tracker_wait(&parent_gpu->access_counters.clear_tracker);
uvm_mutex_unlock(&parent_gpu->access_counters.clear_tracker_lock);
return status;
}
static NV_STATUS access_counters_push_begin(uvm_gpu_t *gpu, uvm_push_t *push, uvm_access_counter_clear_op_t clear_op)
{
NV_STATUS status;
uvm_tracker_t *pending_clear_op_tracker = NULL;
static const char *push_info_msg[2] = { "Clear access counter: batch",
"Clear access counter: all" };
if (gpu->parent->access_counters_serialize_clear_ops_by_type) {
// The following logic only works when we have 2 clear_op options.
// Otherwise, we must select the pending clear op tracker.
BUILD_BUG_ON(UVM_ACCESS_COUNTER_CLEAR_OP_COUNT != 2);
pending_clear_op_tracker = &gpu->parent->access_counters.serialize_clear_tracker[!clear_op];
// On push_begin (below) success, this lock is released in
// access_counters_push_end();
uvm_mutex_lock(&gpu->parent->access_counters.serialize_clear_lock);
}
// uvm_push_begin_acquire() is converted to uvm_push_begin() when
// pending_clear_op_tracker is NULL. Otherwise, it adds a semaphore acquire
// at the push prologue. The semaphore acquire waits until all pending clear
// ops are finished before processing the different type clear op. The wait
// could be none, if there is no pending clear ops in flight.
status = uvm_push_begin_acquire(gpu->channel_manager,
UVM_CHANNEL_TYPE_MEMOPS,
pending_clear_op_tracker,
push,
push_info_msg[clear_op]);
if (status != NV_OK && gpu->parent->access_counters_serialize_clear_ops_by_type)
uvm_mutex_unlock(&gpu->parent->access_counters.serialize_clear_lock);
return status;
}
static NV_STATUS access_counters_push_end(uvm_push_t *push, uvm_access_counter_clear_op_t clear_op)
{
NV_STATUS status = NV_OK;
uvm_push_end(push);
if (push->gpu->parent->access_counters_serialize_clear_ops_by_type) {
uvm_tracker_t *tracker = &push->gpu->parent->access_counters.serialize_clear_tracker[clear_op];
uvm_tracker_remove_completed(tracker);
status = uvm_tracker_add_push_safe(tracker, push);
// This lock is acquired in access_counters_push_begin();
uvm_mutex_unlock(&push->gpu->parent->access_counters.serialize_clear_lock);
if (status != NV_OK)
return status;
}
uvm_mutex_lock(&push->gpu->parent->access_counters.clear_tracker_lock);
uvm_tracker_remove_completed(&push->gpu->parent->access_counters.clear_tracker);
status = uvm_tracker_add_push_safe(&push->gpu->parent->access_counters.clear_tracker, push);
uvm_mutex_unlock(&push->gpu->parent->access_counters.clear_tracker_lock);
return status;
}
@@ -235,28 +296,19 @@ static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu, uvm_access_counter_buf
NV_STATUS status;
uvm_push_t push;
status = uvm_push_begin(gpu->channel_manager,
UVM_CHANNEL_TYPE_MEMOPS,
&push,
"Clear access counter: all");
status = access_counters_push_begin(gpu, &push, UVM_ACCESS_COUNTER_CLEAR_OP_ALL);
if (status != NV_OK) {
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n",
nvstatusToString(status),
uvm_gpu_name(gpu),
access_counters->index);
return status;
}
gpu->parent->host_hal->access_counter_clear_all(&push);
uvm_push_end(&push);
uvm_mutex_lock(&gpu->parent->access_counters_clear_tracker_lock);
uvm_tracker_remove_completed(&gpu->parent->access_counters_clear_tracker);
status = uvm_tracker_add_push_safe(&gpu->parent->access_counters_clear_tracker, &push);
uvm_mutex_unlock(&gpu->parent->access_counters_clear_tracker_lock);
return status;
return access_counters_push_end(&push, UVM_ACCESS_COUNTER_CLEAR_OP_ALL);
}
// Clear the access counter notifications and add it to the per-GPU clear
@@ -277,26 +329,20 @@ static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
UVM_ASSERT(clear_op == UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED);
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch");
status = access_counters_push_begin(gpu, &push, UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED);
if (status != NV_OK) {
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n",
nvstatusToString(status),
uvm_gpu_name(gpu),
access_counters->index);
return status;
}
for (i = 0; i < num_notifications; i++)
gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]);
uvm_push_end(&push);
uvm_mutex_lock(&gpu->parent->access_counters_clear_tracker_lock);
uvm_tracker_remove_completed(&gpu->parent->access_counters_clear_tracker);
status = uvm_tracker_add_push_safe(&gpu->parent->access_counters_clear_tracker, &push);
uvm_mutex_unlock(&gpu->parent->access_counters_clear_tracker_lock);
return status;
return access_counters_push_end(&push, UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED);
}
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index)
@@ -603,7 +649,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
UVM_ASSERT(gpu->parent->access_counters_supported);
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
uvm_mutex_lock(&gpu->parent->access_counters.enablement_lock);
if (uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
status = NV_OK;
@@ -631,7 +677,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
}
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);
return status;
@@ -646,7 +692,7 @@ cleanup:
uvm_access_counters_isr_unlock(access_counters);
}
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);
return status;
}
@@ -703,7 +749,7 @@ void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
UVM_ASSERT(gpu->parent->access_counters_supported);
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
uvm_mutex_lock(&gpu->parent->access_counters.enablement_lock);
if (uvm_parent_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors,
gpu->parent->id)) {
@@ -719,7 +765,7 @@ void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
}
}
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);
}
static void write_get(uvm_access_counter_buffer_t *access_counters, NvU32 get)
@@ -2054,7 +2100,7 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
goto exit_release_gpu;
}
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
uvm_mutex_lock(&gpu->parent->access_counters.enablement_lock);
for (notif_buf_index = 0; notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount; notif_buf_index++) {
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
@@ -2072,7 +2118,7 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
exit_ac_lock:
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);
exit_release_gpu:
uvm_gpu_release(gpu);
@@ -2101,15 +2147,15 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
goto exit_release_gpu;
}
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
uvm_mutex_lock(&gpu->parent->access_counters.enablement_lock);
// Access counters not enabled. Nothing to reset
if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);
goto exit_release_gpu;
}
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);
// Clear operations affect all notification buffers, we use the
// notif_buf_index = 0;
@@ -2233,10 +2279,10 @@ NV_STATUS uvm_test_query_access_counters(UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS *
goto exit_release_gpu;
}
buffer_size = gpu->parent->access_counter_buffer[0].rm_info.bufferSize;
buffer_size = gpu->parent->access_counters.buffer[0].rm_info.bufferSize;
for (index = 1; index < gpu->parent->rm_info.accessCntrBufferCount; index++)
UVM_ASSERT(gpu->parent->access_counter_buffer[index].rm_info.bufferSize == buffer_size);
UVM_ASSERT(gpu->parent->access_counters.buffer[index].rm_info.bufferSize == buffer_size);
params->num_notification_buffers = gpu->parent->rm_info.accessCntrBufferCount;
params->num_notification_entries = buffer_size / gpu->parent->access_counter_buffer_hal->entry_size(gpu->parent);

View File

@@ -183,7 +183,7 @@ static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu, N
nv_kref_get(&parent_gpu->gpu_kref);
// Interrupts need to be disabled to avoid an interrupt storm
uvm_access_counters_intr_disable(&parent_gpu->access_counter_buffer[notif_buf_index]);
uvm_access_counters_intr_disable(&parent_gpu->access_counters.buffer[notif_buf_index]);
nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item);
@@ -327,12 +327,12 @@ static NV_STATUS uvm_isr_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU3
if (!block_context)
return NV_ERR_NO_MEMORY;
parent_gpu->access_counter_buffer[notif_buf_index].batch_service_context.block_service_context.block_context =
parent_gpu->access_counters.buffer[notif_buf_index].batch_service_context.block_service_context.block_context =
block_context;
nv_kthread_q_item_init(&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item,
access_counters_isr_bottom_half_entry,
&parent_gpu->access_counter_buffer[notif_buf_index]);
&parent_gpu->access_counters.buffer[notif_buf_index]);
// Access counters interrupts are initially disabled. They are
// dynamically enabled when the GPU is registered on a VA space.
@@ -431,9 +431,9 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_buffer)
return NV_ERR_NO_MEMORY;
parent_gpu->access_counter_buffer = uvm_kvmalloc_zero(sizeof(*parent_gpu->access_counter_buffer) *
index_count);
if (!parent_gpu->access_counter_buffer)
parent_gpu->access_counters.buffer = uvm_kvmalloc_zero(sizeof(*parent_gpu->access_counters.buffer) *
index_count);
if (!parent_gpu->access_counters.buffer)
return NV_ERR_NO_MEMORY;
if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc)
@@ -535,8 +535,8 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
// been successfully initialized.
uvm_parent_gpu_deinit_access_counters(parent_gpu, notif_buf_index);
if (parent_gpu->access_counter_buffer) {
uvm_access_counter_buffer_t *access_counter = &parent_gpu->access_counter_buffer[notif_buf_index];
if (parent_gpu->access_counters.buffer) {
uvm_access_counter_buffer_t *access_counter = &parent_gpu->access_counters.buffer[notif_buf_index];
block_context = access_counter->batch_service_context.block_service_context.block_context;
uvm_va_block_context_free(block_context);
}
@@ -546,7 +546,7 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
}
uvm_kvfree(parent_gpu->isr.access_counters);
uvm_kvfree(parent_gpu->access_counter_buffer);
uvm_kvfree(parent_gpu->access_counters.buffer);
}
if (parent_gpu->non_replayable_faults_supported) {

View File

@@ -154,6 +154,7 @@ static uvm_hal_class_ops_t ce_table[] =
.id = HOPPER_DMA_COPY_A,
.parent_id = AMPERE_DMA_COPY_B,
.u.ce_ops = {
.phys_mode = uvm_hal_hopper_ce_phys_mode,
.semaphore_release = uvm_hal_hopper_ce_semaphore_release,
.semaphore_timestamp = uvm_hal_hopper_ce_semaphore_timestamp,
.semaphore_reduction_inc = uvm_hal_hopper_ce_semaphore_reduction_inc,
@@ -172,7 +173,9 @@ static uvm_hal_class_ops_t ce_table[] =
{
.id = BLACKWELL_DMA_COPY_A,
.parent_id = HOPPER_DMA_COPY_A,
.u.ce_ops = {},
.u.ce_ops = {
.memcopy_is_valid = uvm_hal_blackwell_ce_memcopy_is_valid,
},
},
{
.id = BLACKWELL_DMA_COPY_B,
@@ -1166,8 +1169,6 @@ void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
void uvm_hal_host_l2_invalidate_unsupported(uvm_push_t *push, uvm_aperture_t aperture)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
UVM_ERR_PRINT("L2 cache invalidation: Called on unsupported GPU %s (arch: 0x%x, impl: 0x%x)\n",
uvm_gpu_name(gpu), gpu->parent->rm_info.gpuArch, gpu->parent->rm_info.gpuImplementation);
UVM_ASSERT_MSG(false, "L2 invalidate is not supported on %s",
uvm_parent_gpu_name(gpu->parent));
}
}

View File

@@ -348,6 +348,7 @@ void uvm_hal_hopper_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 of
typedef NvU32 (*uvm_hal_ce_phys_mode_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
NvU32 uvm_hal_maxwell_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
NvU32 uvm_hal_ampere_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
NvU32 uvm_hal_hopper_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
typedef NvU32 (*uvm_hal_ce_plc_mode_t)(void);
NvU32 uvm_hal_maxwell_ce_plc_mode(void);
@@ -368,6 +369,7 @@ typedef bool (*uvm_hal_ce_memcopy_is_valid)(uvm_push_t *push, uvm_gpu_address_t
bool uvm_hal_maxwell_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
bool uvm_hal_blackwell_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
// Patching of the memcopy source; if not needed for a given architecture use
// the (empty) uvm_hal_ce_memcopy_patch_src_stub implementation

View File

@@ -50,6 +50,10 @@ typedef enum
// It is directly encoded as SYS_COH in PTEs and CE/esched methods.
UVM_APERTURE_SYS,
// SYS_NON_COHERENT aperture is used when we must prevent PCIe atomics from
// being issued to BAR1 P2P addresses. It's only used to control the use of
// atomics with no other impact on the coherence model.
//
// On platforms that support the GPU coherently caching system memory,
// SYS_NON_COHERENT prevents other clients from snooping the GPU L2 cache.
// This allows noncoherent caching of system memory by GPUs on these
@@ -93,6 +97,11 @@ typedef enum
const char *uvm_aperture_string(uvm_aperture_t aperture);
static bool uvm_aperture_is_sys(uvm_aperture_t aperture)
{
return (aperture == UVM_APERTURE_SYS) || (aperture == UVM_APERTURE_SYS_NON_COHERENT);
}
static bool uvm_aperture_is_peer(uvm_aperture_t aperture)
{
return (aperture >= UVM_APERTURE_PEER_0) && (aperture < UVM_APERTURE_PEER_MAX);
@@ -514,9 +523,9 @@ static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)
typedef enum
{
UVM_ACCESS_COUNTER_CLEAR_OP_NONE = 0,
UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED,
UVM_ACCESS_COUNTER_CLEAR_OP_ALL
UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED = 0,
UVM_ACCESS_COUNTER_CLEAR_OP_ALL,
UVM_ACCESS_COUNTER_CLEAR_OP_COUNT,
} uvm_access_counter_clear_op_t;
struct uvm_access_counter_buffer_entry_struct

View File

@@ -1758,7 +1758,6 @@ static void gpu_chunk_free(uvm_va_block_t *va_block,
return;
UVM_ASSERT(gpu_chunk->va_block == va_block);
UVM_ASSERT(gpu_chunk->va_block_page_index == page_index);
uvm_mmu_chunk_unmap(gpu_chunk, &va_block->tracker);
gpu_state->chunks[page_index] = NULL;
@@ -1867,7 +1866,6 @@ static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
if (gpu_state->chunks[page_index] == gpu_chunk) {
UVM_ASSERT(gpu_chunk->va_block == va_block);
UVM_ASSERT(gpu_chunk->va_block_page_index == page_index);
return NV_OK;
}
@@ -1887,11 +1885,7 @@ static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
uvm_processor_mask_set(&va_block->resident, gpu->id);
uvm_page_mask_set(&gpu_state->resident, page_index);
// It is safe to modify the page index field without holding any PMM locks
// because the chunk is allocated, which means that none of the other
// fields in the bitmap can change.
gpu_chunk->va_block = va_block;
gpu_chunk->va_block_page_index = page_index;
gpu_state->chunks[page_index] = gpu_chunk;

View File

@@ -95,6 +95,10 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->non_replayable_faults_supported = true;
parent_gpu->access_counters_serialize_clear_ops_by_type = false;
parent_gpu->access_bits_supported = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;
@@ -113,6 +117,8 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->ats.no_ats_range_required = true;
parent_gpu->ats.gmmu_pt_depth0_init_required = false;
// Hopper doesn't prefetch translations for physical requests, so the only
// concern would be if we enabled physical ATS with 4K pages. In that case
// we could see a mix of cached valid and invalid translations in the same

View File

@@ -34,22 +34,68 @@ static NvU32 ce_aperture(uvm_aperture_t aperture)
HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM) !=
HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, NONCOHERENT_SYSMEM) !=
HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, NONCOHERENT_SYSMEM));
BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) !=
HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, PEERMEM));
if (aperture == UVM_APERTURE_SYS) {
return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM);
}
else if (aperture == UVM_APERTURE_SYS_NON_COHERENT) {
// SYS_NON_COHERENT aperture is currently only used for certain
// BAR1 P2P addresses. The use of SYS vs. SYS_NON_COHERENT aperture
// controls the ability to use PCIe atomics to access the BAR1 region.
// The only way to potentially use atomic operations in UVM is a
// semaphore reduction operation.
// Since UVM doesn't use semaphore operations on peer (or physical)
// addresses, it'd be safe to encode SYS_NON_COHERENT aperture as
// COHERENT_SYSMEM for CE methods.
// NONCOHERENT_SYSMEM encoding is used for correctness and potential
// future use of SYS_NON_COHERENT aperture outside of atomics control
// in BAR1 P2P.
return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, NONCOHERENT_SYSMEM);
}
else if (aperture == UVM_APERTURE_VID) {
return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB);
}
else {
UVM_ASSERT(uvm_aperture_is_peer(aperture));
return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) |
HWVALUE(C8B5, SET_SRC_PHYS_MODE, FLA, 0) |
HWVALUE(C8B5, SET_SRC_PHYS_MODE, PEER_ID, UVM_APERTURE_PEER_ID(aperture));
}
}
// Push SET_{SRC,DST}_PHYS mode if needed and return LAUNCH_DMA_{SRC,DST}_TYPE
// flags
NvU32 uvm_hal_hopper_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
{
NvU32 launch_dma_src_dst_type = 0;
if (src.is_virtual)
launch_dma_src_dst_type |= HWCONST(C8B5, LAUNCH_DMA, SRC_TYPE, VIRTUAL);
else
launch_dma_src_dst_type |= HWCONST(C8B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL);
if (dst.is_virtual)
launch_dma_src_dst_type |= HWCONST(C8B5, LAUNCH_DMA, DST_TYPE, VIRTUAL);
else
launch_dma_src_dst_type |= HWCONST(C8B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
if (!src.is_virtual && !dst.is_virtual) {
NV_PUSH_2U(C8B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture),
SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
}
else if (!src.is_virtual) {
NV_PUSH_1U(C8B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture));
}
else if (!dst.is_virtual) {
NV_PUSH_1U(C8B5, SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
}
return launch_dma_src_dst_type;
}
void uvm_hal_hopper_ce_offset_out(uvm_push_t *push, NvU64 offset_out)
{
NV_PUSH_2U(C8B5, OFFSET_OUT_UPPER, HWVALUE(C8B5, OFFSET_OUT_UPPER, UPPER, NvOffset_HI32(offset_out)),

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2020-2024 NVIDIA Corporation
Copyright (c) 2020-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -121,7 +121,10 @@ static NvLength allocation_size_hopper(NvU32 depth, NvU64 page_size)
if (depth == 5 && page_size == UVM_PAGE_SIZE_64K)
return 256;
// depth 0 requires only a 16-byte allocation, but it must be 4k aligned.
// depth 0 requires only a 16-byte allocation to house the 2 entries at the
// 0-depth level. Due to HW constraints, the effective minimum allocation
// size is 32 bytes. It must be 4k aligned.
// See comments in uvm_gpu.h:gmmu_pt_depth0_init_required for details.
return 4096;
}
@@ -250,7 +253,7 @@ static NvU64 unmapped_pte_hopper(NvU64 page_size)
HWCONST64(_MMU_VER3, PTE, PCF, NO_VALID_4KB_PAGE);
}
static NvU64 poisoned_pte_hopper(void)
static NvU64 poisoned_pte_hopper(uvm_page_tree_t *tree)
{
// An invalid PTE won't be fatal from faultable units like SM, which is the
// most likely source of bad PTE accesses.
@@ -264,7 +267,7 @@ static NvU64 poisoned_pte_hopper(void)
// be aligned to page_size.
NvU64 phys_addr = 0x2bad000000ULL;
NvU64 pte_bits = make_pte_hopper(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
NvU64 pte_bits = tree->hal->make_pte(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
return WRITE_HWCONST64(pte_bits, _MMU_VER3, PTE, PCF, PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD);
}
@@ -380,10 +383,10 @@ static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_
pde_bits |= HWCONST64(_MMU_VER3, PDE, APERTURE, VIDEO_MEMORY);
break;
case UVM_APERTURE_SYS_NON_COHERENT:
// SYS_NON_COHERENT aperture is currently only used for testing
// in kernel_driver_get_rm_ptes. Since UVM never places page
// tables in peer memory. SYS_NON_COHERENT should never be used
// in PDEs.
// SYS_NON_COHERENT aperture is currently only used for some
// BAR1 P2P addresses and testing in kernel_driver_get_rm_ptes.
// Since UVM never places page tables in peer memory,
// SYS_NON_COHERENT should never be used in PDEs.
// falls through
default:
UVM_ASSERT_MSG(0, "Invalid aperture: %d\n", phys_alloc->addr.aperture);
@@ -415,10 +418,10 @@ static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_pag
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, APERTURE_BIG, VIDEO_MEMORY);
break;
case UVM_APERTURE_SYS_NON_COHERENT:
// SYS_NON_COHERENT aperture is currently only used for testing
// in kernel_driver_get_rm_ptes. Since UVM never places page
// tables in peer memory. SYS_NON_COHERENT should never be used
// in PDEs.
// SYS_NON_COHERENT aperture is currently only used for some
// BAR1 P2P addresses and testing in kernel_driver_get_rm_ptes.
// Since UVM never places page tables in peer memory,
// SYS_NON_COHERENT should never be used in PDEs.
// falls through
default:
UVM_ASSERT_MSG(0, "Invalid big aperture %d\n", phys_alloc->addr.aperture);
@@ -453,10 +456,10 @@ static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_p
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, APERTURE_SMALL, VIDEO_MEMORY);
break;
case UVM_APERTURE_SYS_NON_COHERENT:
// SYS_NON_COHERENT aperture is currently only used for testing
// in kernel_driver_get_rm_ptes. Since UVM never places page
// tables in peer memory. SYS_NON_COHERENT should never be used
// in PDEs.
// SYS_NON_COHERENT aperture is currently only used for some
// BAR1 P2P addresses and testing in kernel_driver_get_rm_ptes.
// Since UVM never places page tables in peer memory,
// SYS_NON_COHERENT should never be used in PDEs.
// falls through
default:
UVM_ASSERT_MSG(0, "Invalid small aperture %d\n", phys_alloc->addr.aperture);

View File

@@ -147,40 +147,6 @@ typedef struct
NV_STATUS rmStatus; // OUT
} UVM_SET_STREAM_STOPPED_PARAMS;
//
// UvmCallTestFunction
//
#define UVM_RUN_TEST UVM_IOCTL_BASE(9)
typedef struct
{
NvProcessorUuid gpuUuid; // IN
NvU32 test; // IN
struct
{
NvProcessorUuid peerGpuUuid; // IN
NvU32 peerId; // IN
} multiGpu;
NV_STATUS rmStatus; // OUT
} UVM_RUN_TEST_PARAMS;
//
// This is a magic offset for mmap. Any mapping of an offset above this
// threshold will be treated as a counters mapping, not as an allocation
// mapping. Since allocation offsets must be identical to the virtual address
// of the mapping, this threshold has to be an offset that cannot be
// a valid virtual address.
//
#if defined(__linux__)
#if defined(NV_64_BITS)
#define UVM_EVENTS_OFFSET_BASE (1UL << 63)
#define UVM_COUNTERS_OFFSET_BASE (1UL << 62)
#else
#define UVM_EVENTS_OFFSET_BASE (1UL << 31)
#define UVM_COUNTERS_OFFSET_BASE (1UL << 30)
#endif
#endif // defined(__linux___)
//
// UvmAddSession
//
@@ -189,27 +155,9 @@ typedef struct
typedef struct
{
NvU32 pidTarget; // IN
#ifdef __linux__
NvP64 countersBaseAddress NV_ALIGN_BYTES(8); // IN
NvS32 sessionIndex; // OUT (session index that got added)
#endif
NV_STATUS rmStatus; // OUT
} UVM_ADD_SESSION_PARAMS;
//
// UvmRemoveSession
//
#define UVM_REMOVE_SESSION UVM_IOCTL_BASE(11)
typedef struct
{
#ifdef __linux__
NvS32 sessionIndex; // IN (session index to be removed)
#endif
NV_STATUS rmStatus; // OUT
} UVM_REMOVE_SESSION_PARAMS;
#define UVM_MAX_COUNTERS_PER_IOCTL_CALL 32
//
@@ -219,9 +167,6 @@ typedef struct
typedef struct
{
#ifdef __linux__
NvS32 sessionIndex; // IN
#endif
UvmCounterConfig config[UVM_MAX_COUNTERS_PER_IOCTL_CALL]; // IN
NvU32 count; // IN
NV_STATUS rmStatus; // OUT
@@ -234,9 +179,6 @@ typedef struct
typedef struct
{
#ifdef __linux__
NvS32 sessionIndex; // IN
#endif
NvU32 scope; // IN (UvmCounterScope)
NvU32 counterName; // IN (UvmCounterName)
NvProcessorUuid gpuUuid; // IN
@@ -251,15 +193,10 @@ typedef struct
typedef struct
{
#ifdef __linux__
NvS32 sessionIndex; // IN
#endif
NvU32 eventQueueIndex; // OUT
NvU64 queueSize NV_ALIGN_BYTES(8); // IN
NvU64 notificationCount NV_ALIGN_BYTES(8); // IN
#if defined(WIN32) || defined(WIN64)
NvU64 notificationHandle NV_ALIGN_BYTES(8); // IN
#endif
NvU32 timeStampType; // IN (UvmEventTimeStampType)
NV_STATUS rmStatus; // OUT
} UVM_CREATE_EVENT_QUEUE_PARAMS;
@@ -271,9 +208,6 @@ typedef struct
typedef struct
{
#ifdef __linux__
NvS32 sessionIndex; // IN
#endif
NvU32 eventQueueIndex; // IN
NV_STATUS rmStatus; // OUT
} UVM_REMOVE_EVENT_QUEUE_PARAMS;
@@ -285,9 +219,6 @@ typedef struct
typedef struct
{
#ifdef __linux__
NvS32 sessionIndex; // IN
#endif
NvU32 eventQueueIndex; // IN
NvP64 userRODataAddr NV_ALIGN_BYTES(8); // IN
NvP64 userRWDataAddr NV_ALIGN_BYTES(8); // IN
@@ -304,39 +235,12 @@ typedef struct
typedef struct
{
#ifdef __linux__
NvS32 sessionIndex; // IN
#endif
NvU32 eventQueueIndex; // IN
NvS32 eventType; // IN
NvU32 enable; // IN
NV_STATUS rmStatus; // OUT
} UVM_EVENT_CTRL_PARAMS;
//
// UvmRegisterMpsServer
//
#define UVM_REGISTER_MPS_SERVER UVM_IOCTL_BASE(18)
typedef struct
{
NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS_V1]; // IN
NvU32 numGpus; // IN
NvU64 serverId NV_ALIGN_BYTES(8); // OUT
NV_STATUS rmStatus; // OUT
} UVM_REGISTER_MPS_SERVER_PARAMS;
//
// UvmRegisterMpsClient
//
#define UVM_REGISTER_MPS_CLIENT UVM_IOCTL_BASE(19)
typedef struct
{
NvU64 serverId NV_ALIGN_BYTES(8); // IN
NV_STATUS rmStatus; // OUT
} UVM_REGISTER_MPS_CLIENT_PARAMS;
//
// UvmEventGetGpuUuidTable
//
@@ -442,7 +346,6 @@ typedef struct
typedef struct
{
NvProcessorUuid gpuUuid; // IN
NvHandle hClient; // IN
NvHandle hChannel; // IN
NV_STATUS rmStatus; // OUT
@@ -510,7 +413,6 @@ typedef struct
typedef struct
{
NvU64 base NV_ALIGN_BYTES(8); // IN
NvU64 length NV_ALIGN_BYTES(8); // IN
NV_STATUS rmStatus; // OUT
} UVM_FREE_PARAMS;
@@ -1128,8 +1030,8 @@ typedef struct
typedef struct
{
NvU64 base NV_ALIGN_BYTES(8); // IN
NvU64 length; // IN
NvU64 flags; // IN
NvU64 length NV_ALIGN_BYTES(8); // IN
NvU64 flags NV_ALIGN_BYTES(8); // IN
NV_STATUS rmStatus; // OUT
} UVM_DISCARD_PARAMS;

View File

@@ -40,10 +40,6 @@
#include "nv-linux.h"
#include <linux/log2.h>
#if defined(NV_PRIO_TREE_PRESENT)
#include <linux/prio_tree.h>
#endif
#include <linux/jhash.h>
#include <linux/rwsem.h>
#include <linux/rbtree.h>
@@ -117,21 +113,14 @@
#define NVIDIA_UVM_PRETTY_PRINTING_PREFIX "nvidia-uvm: "
#define pr_fmt(fmt) NVIDIA_UVM_PRETTY_PRINTING_PREFIX fmt
// Dummy printing function that maintains syntax and format specifier checking
// but doesn't print anything and doesn't evaluate the print parameters. This is
// roughly equivalent to the kernel's no_printk function. We use this instead
// because:
// 1) no_printk was not available until 2.6.36
// 2) Until 4.5 no_printk was implemented as a static function, meaning its
// parameters were always evaluated
#define UVM_NO_PRINT(fmt, ...) \
do { \
if (0) \
printk(fmt, ##__VA_ARGS__); \
} while (0)
#define NV_UVM_GFP_FLAGS (GFP_KERNEL | __GFP_NOMEMALLOC)
#if defined(NV_GET_DEV_PAGEMAP_HAS_PGMAP_ARG)
#define NV_GET_DEV_PAGEMAP(pfn) get_dev_pagemap(pfn, NULL)
#else
#define NV_GET_DEV_PAGEMAP get_dev_pagemap
#endif
/* Return a nanosecond-precise value */
static inline NvU64 NV_GETTIME(void)
{
@@ -141,25 +130,6 @@ static inline NvU64 NV_GETTIME(void)
return (NvU64) timespec64_to_ns(&tm);
}
// atomic_long_read_acquire and atomic_long_set_release were added in commit
// b5d47ef9ea5c5fe31d7eabeb79f697629bd9e2cb ("locking/atomics: Switch to
// generated atomic-long") in v5.1 (2019-05-05).
// TODO: Bug 3849079: We always use these definitions on newer kernels.
#define atomic_long_read_acquire uvm_atomic_long_read_acquire
static inline long uvm_atomic_long_read_acquire(atomic_long_t *p)
{
long val = atomic_long_read(p);
smp_mb();
return val;
}
#define atomic_long_set_release uvm_atomic_long_set_release
static inline void uvm_atomic_long_set_release(atomic_long_t *p, long v)
{
smp_mb();
atomic_long_set(p, v);
}
typedef struct
{
struct mem_cgroup *new_memcg;

View File

@@ -407,6 +407,11 @@
// Operations allowed while holding this lock
// - Pushing work to SEC2 channels
//
// - Access counters clear operations
// Order: UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS
//
// It protects the parent_gpu's access counters clear tracker.
//
// - Concurrent push semaphore
// Order: UVM_LOCK_ORDER_PUSH
// Semaphore (uvm_semaphore_t)
@@ -432,11 +437,6 @@
// Order: UVM_LOCK_ORDER_PMM_ROOT_CHUNK
// Exclusive bitlock (mutex) per each root chunk internal to PMM.
//
// - Access counters clear operations
// Order: UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS
//
// It protects the parent_gpu's access counters clear tracker.
//
// - Channel lock
// Order: UVM_LOCK_ORDER_CHANNEL
// Spinlock (uvm_spinlock_t) or exclusive lock (mutex)
@@ -524,11 +524,11 @@ typedef enum
UVM_LOCK_ORDER_KEY_ROTATION_WLC,
UVM_LOCK_ORDER_CSL_WLC_PUSH,
UVM_LOCK_ORDER_CSL_SEC2_PUSH,
UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS,
UVM_LOCK_ORDER_PUSH,
UVM_LOCK_ORDER_PMM,
UVM_LOCK_ORDER_PMM_PMA,
UVM_LOCK_ORDER_PMM_ROOT_CHUNK,
UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS,
UVM_LOCK_ORDER_CHANNEL,
UVM_LOCK_ORDER_WLC_CHANNEL,
UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST,

View File

@@ -61,6 +61,10 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->non_replayable_faults_supported = false;
parent_gpu->access_counters_serialize_clear_ops_by_type = false;
parent_gpu->access_bits_supported = false;
parent_gpu->fault_cancel_va_supported = false;
parent_gpu->scoped_atomics_supported = false;
@@ -75,5 +79,7 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->ats.no_ats_range_required = false;
parent_gpu->ats.gmmu_pt_depth0_init_required = false;
parent_gpu->conf_computing.per_channel_key_rotation = false;
}

View File

@@ -268,7 +268,7 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
// Check if the copy is over NVLINK and simulate dropped traffic if there's
// an NVLINK error.
// Src address cannot be peer as that wouldn't pass the valid check above.
if (uvm_gpu_address_is_peer(gpu, dst) && uvm_gpu_get_injected_nvlink_error(gpu) != NV_OK)
if (uvm_gpu_get_injected_nvlink_error(gpu) != NV_OK && uvm_gpu_address_is_peer(gpu, dst))
size = 0;
gpu->parent->ce_hal->memcopy_patch_src(push, &src);

View File

@@ -352,7 +352,7 @@ uvm_hal_maxwell_access_counter_query_clear_op_unsupported(uvm_parent_gpu_t *pare
NvU32 num_entries)
{
UVM_ASSERT_MSG(false, "host access_counter_query_clear_op called on Maxwell GPU\n");
return UVM_ACCESS_COUNTER_CLEAR_OP_NONE;
return UVM_ACCESS_COUNTER_CLEAR_OP_COUNT;
}
NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu)

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2023 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -253,7 +253,7 @@ static NvU64 make_pte_maxwell(uvm_aperture_t aperture, NvU64 address, uvm_prot_t
else
pte_bits |= HWCONST64(_MMU, PTE, VOL, TRUE);
// aperture 34:32
// aperture 34:33
if (aperture == UVM_APERTURE_SYS)
aperture_bits = NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY;
else if (aperture == UVM_APERTURE_VID)
@@ -296,7 +296,7 @@ static NvU64 make_sked_reflected_pte_maxwell(void)
return pte_bits;
}
static NvU64 poisoned_pte_maxwell(void)
static NvU64 poisoned_pte_maxwell(uvm_page_tree_t *tree)
{
// An invalid PTE is also fatal on Maxwell, but a PRIV violation will
// immediately identify bad PTE usage.
@@ -309,7 +309,7 @@ static NvU64 poisoned_pte_maxwell(void)
// This address has to fit within 37 bits (max address width of vidmem) and
// be aligned to page_size.
NvU64 phys_addr = 0x1bad000000ULL;
NvU64 pte_bits = make_pte_maxwell(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
NvU64 pte_bits = tree->hal->make_pte(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
return WRITE_HWCONST64(pte_bits, _MMU, PTE, PRIVILEGE, TRUE);
}
@@ -317,8 +317,13 @@ static NvU64 poisoned_pte_maxwell(void)
// Sparse mappings are not supported.
static NvU64 make_sparse_pte_maxwell_unsupported(void)
{
NvU64 pte_bits;
UVM_ASSERT_MSG(0, "Sparse mappings unsupported on pre-Pascal GPUs\n");
return poisoned_pte_maxwell();
pte_bits = HWCONST64(_MMU, PTE, VALID, FALSE);
return pte_bits;
}
static uvm_mmu_mode_hal_t maxwell_64_mmu_mode_hal =

View File

@@ -588,7 +588,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
service_context->block_context->make_resident.dest_nid))
skipped_migrate = true;
}
else if (uvm_processor_mask_test(&managed_range->va_range.uvm_lite_gpus, dest_id) &&
else if (uvm_processor_mask_test(&managed_range->uvm_lite_gpus, dest_id) &&
!uvm_va_policy_preferred_location_equal(policy, dest_id, NUMA_NO_NODE)) {
// Don't migrate to a non-faultable GPU that is in UVM-Lite mode,
// unless it's the preferred location

View File

@@ -43,8 +43,6 @@
#ifdef UVM_MIGRATE_VMA_SUPPORTED
static struct kmem_cache *g_uvm_migrate_vma_state_cache __read_mostly;
static const gfp_t g_migrate_vma_gfp_flags = NV_UVM_GFP_FLAGS | GFP_HIGHUSER_MOVABLE | __GFP_THISNODE;
static uvm_sgt_t *uvm_select_sgt(uvm_processor_id_t src_id, int src_nid, migrate_vma_state_t *state)
@@ -1497,7 +1495,7 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
uvm_migrate_args->dst_node_id = uvm_gpu_numa_node(gpu);
}
state = nv_kmem_cache_zalloc(g_uvm_migrate_vma_state_cache, NV_UVM_GFP_FLAGS);
state = uvm_kvmalloc_zero(sizeof(migrate_vma_state_t));
if (!state)
return NV_ERR_NO_MEMORY;
@@ -1519,22 +1517,17 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
out:
uvm_kvfree(state->dma.sgt_cpu);
uvm_kvfree(state->cpu_page_mask);
kmem_cache_free(g_uvm_migrate_vma_state_cache, state);
uvm_kvfree(state);
return status;
}
NV_STATUS uvm_migrate_pageable_init(void)
{
g_uvm_migrate_vma_state_cache = NV_KMEM_CACHE_CREATE("migrate_vma_state_t", migrate_vma_state_t);
if (!g_uvm_migrate_vma_state_cache)
return NV_ERR_NO_MEMORY;
return NV_OK;
}
void uvm_migrate_pageable_exit(void)
{
kmem_cache_destroy_safe(&g_uvm_migrate_vma_state_cache);
}
#endif

View File

@@ -152,9 +152,10 @@ static NV_STATUS phys_mem_allocate_sysmem(uvm_page_tree_t *tree, NvLength size,
}
// The aperture may filter the biggest page size:
// - UVM_APERTURE_VID biggest page size on vidmem mappings
// - UVM_APERTURE_SYS biggest page size on sysmem mappings
// - UVM_APERTURE_PEER_0-7 biggest page size on peer mappings
// - UVM_APERTURE_VID biggest page size on vidmem mappings
// - UVM_APERTURE_SYS biggest page size on sysmem mappings
// - UVM_APERTURE_SYS_NON_COHERENT biggest page size on BAR1 mappings
// - UVM_APERTURE_PEER_0-7 biggest page size on peer mappings
static NvU64 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
{
UVM_ASSERT(aperture < UVM_APERTURE_DEFAULT);
@@ -306,7 +307,7 @@ static void *uvm_mmu_page_table_cpu_map(uvm_gpu_t *gpu, uvm_mmu_page_table_alloc
}
else {
NvU64 page_offset = offset_in_page(phys_alloc->addr.address);
return kmap(uvm_mmu_page_table_page(gpu, phys_alloc)) + page_offset;
return (char *)kmap(uvm_mmu_page_table_page(gpu, phys_alloc)) + page_offset;
}
}
@@ -392,7 +393,7 @@ static void pde_fill_gpu(uvm_page_tree_t *tree,
NvU64 pde_data[2], entry_size;
uvm_gpu_address_t pde_entry_addr = uvm_mmu_gpu_address(tree->gpu, directory->phys_alloc.addr);
NvU32 max_inline_entries;
uvm_push_flag_t push_membar_flag = UVM_PUSH_FLAG_COUNT;
uvm_membar_t push_membar;
uvm_gpu_address_t inline_data_addr;
uvm_push_inline_data_t inline_data;
NvU32 entry_count, i, j;
@@ -403,12 +404,7 @@ static void pde_fill_gpu(uvm_page_tree_t *tree,
UVM_ASSERT(sizeof(pde_data) >= entry_size);
max_inline_entries = UVM_PUSH_INLINE_DATA_MAX_SIZE / entry_size;
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_NONE;
else if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_GPU;
push_membar = uvm_push_get_and_reset_membar_flag(push);
pde_entry_addr.address += start_index * entry_size;
for (i = 0; i < pde_count;) {
@@ -420,11 +416,11 @@ static void pde_fill_gpu(uvm_page_tree_t *tree,
entry_count = min(pde_count - i, max_inline_entries);
// No membar is needed until the last memory operation. Otherwise,
// use caller's membar flag.
// use caller's membar.
if ((i + entry_count) < pde_count)
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
else if (push_membar_flag != UVM_PUSH_FLAG_COUNT)
uvm_push_set_flag(push, push_membar_flag);
uvm_push_set_membar(push, UVM_MEMBAR_NONE);
else
uvm_push_set_membar(push, push_membar);
uvm_push_inline_data_begin(push, &inline_data);
for (j = 0; j < entry_count; j++) {
@@ -458,6 +454,16 @@ static void pde_fill(uvm_page_tree_t *tree,
pde_fill_cpu(tree, directory, start_index, pde_count, phys_addr);
}
static void phys_mem_init_memset(uvm_gpu_t *gpu, uvm_push_t *push, uvm_page_directory_t *dir, NvU64 value)
{
NvU64 size = dir->phys_alloc.size;
if (push)
gpu->parent->ce_hal->memset_8(push, uvm_mmu_gpu_address(push->gpu, dir->phys_alloc.addr), value, size);
else
uvm_mmu_page_table_cpu_memset_8(gpu, &dir->phys_alloc, 0, value, size / sizeof(value));
}
static void phys_mem_init(uvm_page_tree_t *tree, NvU64 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
{
NvU64 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
@@ -490,24 +496,38 @@ static void phys_mem_init(uvm_page_tree_t *tree, NvU64 page_size, uvm_page_direc
}
// Initialize the memory to a reasonable value.
if (push) {
tree->gpu->parent->ce_hal->memset_8(push,
uvm_mmu_gpu_address(tree->gpu, dir->phys_alloc.addr),
*clear_bits,
dir->phys_alloc.size);
}
else {
uvm_mmu_page_table_cpu_memset_8(tree->gpu,
&dir->phys_alloc,
0,
*clear_bits,
dir->phys_alloc.size / sizeof(*clear_bits));
}
phys_mem_init_memset(tree->gpu, push, dir, *clear_bits);
}
else {
// Initialize the entire directory allocated page table area due to Bug
// 5282495. See comment in ats.gmmu_pt_depth0_init_required declaration.
if (dir->depth == 0 && tree->gpu->parent->ats.gmmu_pt_depth0_init_required) {
uvm_membar_t push_membar;
// Retrieve and store the caller's membar, since
// phys_mem_init_memset() will consume it.
if (push) {
push_membar = uvm_push_get_and_reset_membar_flag(push);
// No membar is required, pde_fill() will push the caller's
// membar.
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
}
// phys_mem_init_memset() consumes and resets the CE's push pipeline
// flag, which is required to avoid WaW issues since pde_fill()
// will write to the same range and its first operation is not
// pipelined.
phys_mem_init_memset(tree->gpu, push, dir, 0);
if (push) {
// Restore the caller's membar for pde_fill().
uvm_push_set_membar(push, push_membar);
}
}
pde_fill(tree, dir, 0, entries_count, phys_allocs, push);
}
}
static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
@@ -1671,7 +1691,7 @@ static NV_STATUS poison_ptes(uvm_page_tree_t *tree,
tree->gpu->parent->ce_hal->memset_8(&push,
uvm_mmu_gpu_address(tree->gpu, pte_dir->phys_alloc.addr),
tree->hal->poisoned_pte(),
tree->hal->poisoned_pte(tree),
pte_dir->phys_alloc.size);
// If both the new PTEs and the parent PDE are in vidmem, then a GPU-
@@ -2388,23 +2408,21 @@ NV_STATUS uvm_mmu_create_peer_identity_mappings(uvm_gpu_t *gpu, uvm_gpu_t *peer)
uvm_aperture_t aperture;
NvU64 phys_offset;
uvm_gpu_identity_mapping_t *peer_mapping;
uvm_gpu_phys_address_t phys_address;
UVM_ASSERT(gpu->parent->peer_copy_mode < UVM_GPU_PEER_COPY_MODE_COUNT);
if (gpu->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_VIRTUAL || peer->mem_info.size == 0)
return NV_OK;
aperture = uvm_gpu_peer_aperture(gpu, peer);
// Use transformation of address 0 to get offset and aperture for all
// other addresses.
phys_address = uvm_gpu_peer_phys_address(peer, 0, gpu);
aperture = phys_address.aperture;
phys_offset = phys_address.address;
page_size = mmu_biggest_page_size(&gpu->address_space_tree, aperture);
size = UVM_ALIGN_UP(peer->mem_info.max_allocatable_address + 1, page_size);
peer_mapping = uvm_gpu_get_peer_mapping(gpu, peer->id);
phys_offset = 0ULL;
if (uvm_parent_gpus_are_nvswitch_connected(gpu->parent, peer->parent)) {
// Add the 47-bit physical address routing bits for this peer to the
// generated PTEs
phys_offset = peer->parent->nvswitch_info.fabric_memory_window_start;
}
UVM_ASSERT(page_size);
UVM_ASSERT(size);
@@ -2983,16 +3001,12 @@ NV_STATUS uvm_mmu_l2_invalidate(uvm_gpu_t *gpu, uvm_aperture_t aperture)
UVM_CHANNEL_TYPE_MEMOPS,
&push,
"L2 cache invalidate");
if (status != NV_OK) {
UVM_ERR_PRINT("L2 cache invalidation: Failed to begin push, status: %s\n", nvstatusToString(status));
if (status != NV_OK)
return status;
}
gpu->parent->host_hal->l2_invalidate(&push, aperture);
status = uvm_push_end_and_wait(&push);
if (status != NV_OK)
UVM_ERR_PRINT("ERROR: L2 cache invalidation: Failed to complete push, status: %s\n", nvstatusToString(status));
return status;
}

View File

@@ -217,7 +217,7 @@ struct uvm_mmu_mode_hal_struct
// Bit pattern used for debug purposes to clobber PTEs which ought to be
// unused. In practice this will generate a PRIV violation or a physical
// memory out-of-range error so we can immediately identify bad PTE usage.
NvU64 (*poisoned_pte)(void);
NvU64 (*poisoned_pte)(uvm_page_tree_t *tree);
// Write a PDE bit-pattern to entry based on the data in allocs (which may
// point to two items for dual PDEs).
@@ -228,7 +228,7 @@ struct uvm_mmu_mode_hal_struct
// invalid/clean PDE entries.
void (*make_pde)(void *entry, uvm_mmu_page_table_alloc_t **allocs, uvm_page_directory_t *dir, NvU32 child_index);
// size of an entry in a directory/table. Generally either 8 or 16 bytes.
// size of an entry in a directory/table. Generally either 8 or 16 bytes.
// (in the case of Pascal dual PDEs)
NvLength (*entry_size)(NvU32 depth);

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -44,6 +44,10 @@
#include "clc36f.h"
#include "clc3b5.h"
// TURING_*
#include "clc46f.h"
#include "clc5b5.h"
// AMPERE_*
#include "clc56f.h"
#include "clc6b5.h"
@@ -1783,11 +1787,16 @@ static NV_STATUS entry_test_page_size_volta(uvm_gpu_t *gpu, size_t page_size)
return entry_test_page_size_pascal(gpu, page_size);
}
static NV_STATUS entry_test_page_size_ampere(uvm_gpu_t *gpu, size_t page_size)
static NV_STATUS entry_test_page_size_turing(uvm_gpu_t *gpu, size_t page_size)
{
return entry_test_page_size_volta(gpu, page_size);
}
static NV_STATUS entry_test_page_size_ampere(uvm_gpu_t *gpu, size_t page_size)
{
return entry_test_page_size_turing(gpu, page_size);
}
static NV_STATUS entry_test_page_size_hopper(uvm_gpu_t *gpu, size_t page_size)
{
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
@@ -1810,11 +1819,13 @@ typedef NV_STATUS (*entry_test_page_size_func)(uvm_gpu_t *gpu, size_t page_size)
static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
{
NV_STATUS status = NV_OK;
static const NvU64 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
NvU64 pde_bits;
uvm_mmu_page_table_alloc_t *phys_allocs[2];
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999000LL);
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
uvm_page_tree_t tree;
uvm_mmu_mode_hal_t *hal;
uvm_page_directory_t dir;
NvU64 big_page_size, page_size;
@@ -1895,19 +1906,30 @@ static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
0x1BBBBBB000LL,
UVM_PROT_READ_ONLY,
UVM_MMU_PTE_FLAGS_CACHED) == 0x80000002FBBBBBB5LL);
TEST_NV_CHECK_RET(test_page_tree_init(gpu, big_page_size, &tree));
TEST_CHECK_GOTO(tree.hal->poisoned_pte(&tree) == 0x800000011bad0007ull, cleanup_tree);
uvm_page_tree_deinit(&tree);
}
return NV_OK;
cleanup_tree:
uvm_page_tree_deinit(&tree);
return status;
}
static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
{
NV_STATUS status = NV_OK;
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
NvU64 pde_bits[2];
size_t i, num_page_sizes;
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999000LL);
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
uvm_page_tree_t tree;
uvm_page_directory_t dir;
// big versions have [11:8] set as well to test the page table merging
@@ -1993,7 +2015,16 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
for (i = 0; i < num_page_sizes; i++)
TEST_NV_CHECK_RET(entry_test_page_size(gpu, page_sizes[i]));
TEST_NV_CHECK_RET(test_page_tree_init(gpu, UVM_PAGE_SIZE_64K, &tree));
TEST_CHECK_GOTO(tree.hal->poisoned_pte(&tree) == 0x1bad000e9ull, cleanup_tree);
uvm_page_tree_deinit(&tree);
return NV_OK;
cleanup_tree:
uvm_page_tree_deinit(&tree);
return status;
}
static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
@@ -2070,6 +2101,30 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
return NV_OK;
}
static NV_STATUS entry_test_turing(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
{
NV_STATUS status = NV_OK;
uvm_page_tree_t tree;
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
NvU32 i, num_page_sizes;
num_page_sizes = get_page_sizes(gpu, page_sizes);
for (i = 0; i < num_page_sizes; i++)
TEST_NV_CHECK_RET(entry_test_page_size(gpu, page_sizes[i]));
TEST_NV_CHECK_RET(test_page_tree_init(gpu, UVM_PAGE_SIZE_64K, &tree));
TEST_CHECK_GOTO(tree.hal->poisoned_pte(&tree) == 0x6000001bad000e9ull, cleanup_tree);
uvm_page_tree_deinit(&tree);
return NV_OK;
cleanup_tree:
uvm_page_tree_deinit(&tree);
return status;
}
static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
{
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
@@ -2093,6 +2148,7 @@ static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func ent
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999000LL);
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0xBBBBBBB000LL);
uvm_page_tree_t tree;
// Big versions have [11:8] set as well to test the page table merging
uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999900LL);
@@ -2101,6 +2157,7 @@ static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func ent
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
memset(dirs, 0, sizeof(dirs));
// Fake directory tree.
for (i = 0; i < ARRAY_SIZE(dirs); i++) {
dirs[i] = uvm_kvmalloc_zero(sizeof(uvm_page_directory_t) + sizeof(dirs[i]->entries[0]) * 512);
@@ -2272,10 +2329,19 @@ static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func ent
// sked reflected
TEST_CHECK_GOTO(hal->make_sked_reflected_pte() == 0xF0F, cleanup);
// poisoned - use a fake tree as it is required by poisoned_pte's MMU HAL.
// The tests above manually set the MMU HAL but used functions that don't
// have a uvm_page_tree_t argument.
TEST_NV_CHECK_GOTO(test_page_tree_init(gpu, UVM_PAGE_SIZE_64K, &tree), cleanup);
TEST_CHECK_GOTO(tree.hal->poisoned_pte(&tree) == 0x2bad0006f9ull, cleanup_tree);
num_page_sizes = get_page_sizes(gpu, page_sizes);
for (i = 0; i < num_page_sizes; i++)
TEST_NV_CHECK_GOTO(entry_test_page_size(gpu, page_sizes[i]), cleanup);
TEST_NV_CHECK_GOTO(entry_test_page_size(gpu, page_sizes[i]), cleanup_tree);
cleanup_tree:
uvm_page_tree_deinit(&tree);
cleanup:
for (i = 0; i < ARRAY_SIZE(dirs); i++)
@@ -2286,6 +2352,9 @@ cleanup:
static NV_STATUS entry_test_blackwell(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
{
// We use entry_test_ampere() because we only want to check for an
// additional page size, no MMU page table format changes between Hopper and
// Blackwell.
return entry_test_ampere(gpu, entry_test_page_size_blackwell);
}
@@ -2523,6 +2592,15 @@ static NV_STATUS fake_gpu_init_volta(uvm_gpu_t *fake_gpu)
fake_gpu);
}
static NV_STATUS fake_gpu_init_turing(uvm_gpu_t *fake_gpu)
{
return fake_gpu_init(TURING_CHANNEL_GPFIFO_A,
TURING_DMA_COPY_A,
NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
0,
fake_gpu);
}
static NV_STATUS fake_gpu_init_ampere(uvm_gpu_t *fake_gpu)
{
return fake_gpu_init(AMPERE_CHANNEL_GPFIFO_A,
@@ -2641,6 +2719,15 @@ static NV_STATUS volta_test_page_tree(uvm_gpu_t *volta)
return NV_OK;
}
static NV_STATUS turing_test_page_tree(uvm_gpu_t *turing)
{
TEST_CHECK_RET(fake_gpu_init_turing(turing) == NV_OK);
MEM_NV_CHECK_RET(entry_test_turing(turing, entry_test_page_size_turing), NV_OK);
return NV_OK;
}
static NV_STATUS ampere_test_page_tree(uvm_gpu_t *ampere)
{
NvU32 i, tlb_batch_saved_max_pages;
@@ -2803,6 +2890,7 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
TEST_NV_CHECK_GOTO(maxwell_test_page_tree(gpu), done);
TEST_NV_CHECK_GOTO(pascal_test_page_tree(gpu), done);
TEST_NV_CHECK_GOTO(volta_test_page_tree(gpu), done);
TEST_NV_CHECK_GOTO(turing_test_page_tree(gpu), done);
TEST_NV_CHECK_GOTO(ampere_test_page_tree(gpu), done);
TEST_NV_CHECK_GOTO(hopper_test_page_tree(gpu), done);
TEST_NV_CHECK_GOTO(blackwell_test_page_tree(gpu), done);

View File

@@ -90,6 +90,10 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->non_replayable_faults_supported = false;
parent_gpu->access_counters_serialize_clear_ops_by_type = false;
parent_gpu->access_bits_supported = false;
parent_gpu->fault_cancel_va_supported = false;
parent_gpu->scoped_atomics_supported = false;
@@ -104,5 +108,7 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->ats.no_ats_range_required = false;
parent_gpu->ats.gmmu_pt_depth0_init_required = false;
parent_gpu->conf_computing.per_channel_key_rotation = false;
}

View File

@@ -297,7 +297,6 @@ static NvU64 make_pte_pascal(uvm_aperture_t aperture, NvU64 address, uvm_prot_t
// vid address 32:8
pte_bits |= HWVALUE64(_MMU_VER2, PTE, ADDRESS_VID, address);
// peer id 35:33
if (aperture != UVM_APERTURE_VID)
pte_bits |= HWVALUE64(_MMU_VER2, PTE, ADDRESS_VID_PEER, UVM_APERTURE_PEER_ID(aperture));
@@ -306,6 +305,7 @@ static NvU64 make_pte_pascal(uvm_aperture_t aperture, NvU64 address, uvm_prot_t
pte_bits |= HWVALUE64(_MMU_VER2, PTE, COMPTAGLINE, 0);
}
// kind 63:56
pte_bits |= HWVALUE64(_MMU_VER2, PTE, KIND, NV_MMU_PTE_KIND_PITCH);
return pte_bits;
@@ -327,7 +327,7 @@ static NvU64 make_sparse_pte_pascal(void)
HWCONST64(_MMU_VER2, PTE, VOL, TRUE);
}
static NvU64 poisoned_pte_pascal(void)
static NvU64 poisoned_pte_pascal(uvm_page_tree_t *tree)
{
// An invalid PTE won't be fatal from faultable units like SM, which is the
// most likely source of bad PTE accesses.
@@ -340,7 +340,7 @@ static NvU64 poisoned_pte_pascal(void)
// be aligned to page_size.
NvU64 phys_addr = 0x1bad000000ULL;
NvU64 pte_bits = make_pte_pascal(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
NvU64 pte_bits = tree->hal->make_pte(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
return WRITE_HWCONST64(pte_bits, _MMU_VER2, PTE, PRIVILEGE, TRUE);
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -318,6 +318,60 @@ static void compute_prefetch_mask(uvm_va_block_region_t faulted_region,
}
}
// Determine whether prefetching should be applied for the given migration.
//
// This function evaluates multiple conditions to decide if prefetching is
// beneficial:
//
// 1. No preferred location policy: Always allow prefetching when no preferred
// location is set, as there are no policy constraints.
//
// 2. Moving to preferred location: Always allow prefetching when migrating
// toward the preferred location, as this aligns with the policy.
//
// 3. Confidential Computing exceptions: In CC environments, remote memory
// mapping is not always available, forcing memory migration. Allow
// prefetching out of the preferred location to facilitate these flows
// and improve performance:
// - DtoH transfers (migrating to CPU/sysmem)
// - HtoD transfers when pages are already resident on CPU
//
// Returns true if prefetching logic should be applied, false otherwise.
static bool should_apply_prefetch_logic(const uvm_va_policy_t *policy,
uvm_processor_id_t new_residency,
uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
const uvm_page_mask_t *faulted_pages)
{
// No preferred location set - always allow prefetching
if (!UVM_ID_IS_VALID(policy->preferred_location))
return true;
// Moving to preferred location - always allow prefetching
if (uvm_id_equal(policy->preferred_location, new_residency))
return true;
// CC sysmem exception logic - allow prefetching out of preferred location
// for CC-related sysmem transfers when remote mapping is not available
if (!g_uvm_global.conf_computing_enabled)
return false;
// DtoH: migrating to CPU/sysmem
if (UVM_ID_IS_CPU(new_residency))
return true;
// HtoD: check if any faulted pages are currently resident on CPU
if (UVM_ID_IS_GPU(new_residency)) {
const uvm_page_mask_t *cpu_resident_mask = uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU, NUMA_NO_NODE);
if (cpu_resident_mask && uvm_page_mask_intersects(faulted_pages, cpu_resident_mask))
return true;
}
// PPCIE, MPT CC (P2P access) can do remote mappings - no prefetching
// needed
return false;
}
// Within a block we only allow prefetching to a single processor. Therefore,
// if two processors are accessing non-overlapping regions within the same
// block they won't benefit from prefetching.
@@ -358,13 +412,16 @@ static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_blo
if (UVM_ID_IS_CPU(new_residency) || va_block->gpus[uvm_id_gpu_index(new_residency)] != NULL)
resident_mask = uvm_va_block_resident_mask_get(va_block, new_residency, NUMA_NO_NODE);
// If this is a first-touch fault and the destination processor is the
// preferred location, populate the whole max_prefetch_region.
// - If this is a first-touch fault and the destination processor is the
// preferred location, populate the whole max_prefetch_region.
// - Do not prefetch pages out of the preferred location (policy location
// is valid and does not match the new residency), unless confidential
// computing is enabled.
if (uvm_processor_mask_empty(&va_block->resident) &&
uvm_id_equal(new_residency, policy->preferred_location)) {
uvm_page_mask_region_fill(prefetch_pages, max_prefetch_region);
}
else {
else if (should_apply_prefetch_logic(policy, new_residency, va_block, va_block_context, faulted_pages)) {
init_bitmap_tree_from_region(bitmap_tree, max_prefetch_region, resident_mask, faulted_pages);
update_bitmap_tree_from_va_block(bitmap_tree,

View File

@@ -197,6 +197,8 @@ typedef struct
unsigned max_resets;
NvU64 pin_ns;
NvS8 lapse_stat;
} params;
uvm_va_space_t *va_space;
@@ -262,13 +264,22 @@ static unsigned uvm_perf_thrashing_pin_threshold = UVM_PERF_THRASHING_PIN_THRESH
// TODO: Bug 1768615: [uvm] Automatically tune default values for thrashing
// detection/prevention parameters
#define UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT 500
#define UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT 500ULL
#define UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT_EMULATION (UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT * 800)
#define UVM_PERF_THRASHING_LAPSE_USEC_MAX (UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT * 1000)
#define UVM_PERF_THRASHING_LAPSE_USEC_MIN (UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT / 100)
// Lapse of time in microseconds that determines if two consecutive events on
// the same page can be considered thrashing
static unsigned uvm_perf_thrashing_lapse_usec = UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT;
#define UVM_PERF_LAPSE_VOTE_THRESHOLD 32
// Number of lapse intervals greater than uvm_perf_thrashing_lapse_usec,
// big enough to consider readjusting.
static unsigned int uvm_perf_lapse_vote_threshold = UVM_PERF_LAPSE_VOTE_THRESHOLD;
#define UVM_PERF_THRASHING_NAP_DEFAULT 1
#define UVM_PERF_THRASHING_NAP_MAX 100
@@ -309,6 +320,7 @@ module_param(uvm_perf_thrashing_enable, uint, S_IRUGO);
module_param(uvm_perf_thrashing_threshold, uint, S_IRUGO);
module_param(uvm_perf_thrashing_pin_threshold, uint, S_IRUGO);
module_param(uvm_perf_thrashing_lapse_usec, uint, S_IRUGO);
module_param(uvm_perf_lapse_vote_threshold, uint, S_IRUGO);
module_param(uvm_perf_thrashing_nap, uint, S_IRUGO);
module_param(uvm_perf_thrashing_epoch, uint, S_IRUGO);
module_param(uvm_perf_thrashing_pin, uint, S_IRUGO);
@@ -324,6 +336,7 @@ static bool g_uvm_perf_thrashing_enable;
static unsigned g_uvm_perf_thrashing_threshold;
static unsigned g_uvm_perf_thrashing_pin_threshold;
static NvU64 g_uvm_perf_thrashing_lapse_usec;
static unsigned g_uvm_perf_lapse_vote_threshold;
static NvU64 g_uvm_perf_thrashing_nap;
static NvU64 g_uvm_perf_thrashing_epoch;
static NvU64 g_uvm_perf_thrashing_pin;
@@ -1607,6 +1620,29 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
return hint;
}
static void adjust_thrashing_lapse(va_space_thrashing_info_t *ti, NvU64 lapse)
{
// If lapse is non-default, i.e. provided by user explicitly, don't adjust it
if (g_uvm_perf_thrashing_lapse_usec != UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT)
return;
// Update statistics without if-else conditionals.
ti->params.lapse_stat += 2 * !(lapse < ti->params.lapse_ns) - 1;
// Voting capped exponential adjustment
if (ti->params.lapse_stat >= g_uvm_perf_lapse_vote_threshold &&
ti->params.lapse_ns < UVM_PERF_THRASHING_LAPSE_USEC_MAX * 1000)
ti->params.lapse_ns += min(ti->params.lapse_ns / 8, UVM_PERF_THRASHING_LAPSE_USEC_MAX / 10 * 1000);
else
if (-ti->params.lapse_stat <= -(int)g_uvm_perf_lapse_vote_threshold &&
ti->params.lapse_ns > UVM_PERF_THRASHING_LAPSE_USEC_MIN * 1000)
ti->params.lapse_ns -= max(ti->params.lapse_ns / 8, UVM_PERF_THRASHING_LAPSE_USEC_MIN * 1000);
else
return;
ti->params.lapse_stat /= 2;
}
// Function called on fault that tells the fault handler if any operation
// should be performed to minimize thrashing. The logic is as follows:
//
@@ -1710,6 +1746,8 @@ uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
last_time_stamp = page_thrashing_get_time_stamp(page_thrashing);
adjust_thrashing_lapse(va_space_thrashing, time_stamp - last_time_stamp);
// If the lapse since the last thrashing event is longer than a thrashing
// lapse we are no longer thrashing
if (time_stamp - last_time_stamp > va_space_thrashing->params.lapse_ns &&
@@ -2012,6 +2050,8 @@ NV_STATUS uvm_perf_thrashing_init(void)
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_lapse_usec, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT);
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_lapse_vote_threshold, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT);
INIT_THRASHING_PARAMETER_NONZERO_MAX(uvm_perf_thrashing_nap,
UVM_PERF_THRASHING_NAP_DEFAULT,
UVM_PERF_THRASHING_NAP_MAX);

View File

@@ -98,14 +98,16 @@
//
// When a memory allocation from PMA fails and eviction is requested, PMM will
// check whether it can evict any user memory chunks to satisfy the request.
// All allocated user memory root chunks are tracked in an LRU list
// (root_chunks.va_block_used). A root chunk is moved to the tail of that list
// whenever any of its subchunks is allocated (unpinned) by a VA block (see
// uvm_pmm_gpu_unpin_allocated()). When a root chunk is selected for eviction,
// it has the eviction flag set (see pick_root_chunk_to_evict()). This flag
// affects many of the PMM operations on all of the subchunks of the root chunk
// being evicted. See usage of (root_)chunk_is_in_eviction(), in particular in
// chunk_free_locked() and claim_free_chunk().
// All allocated user memory root chunks are tracked in one of several LRU lists
// (root_chunks.alloc_list[n]). The list used depends on the state of the chunk
// (see uvm_pmm_alloc_list_t). A root chunk is moved to the tail of the used
// list (UVM_PMM_ALLOC_LIST_USED) whenever any of its subchunks is allocated
// (unpinned) by a VA block (see uvm_pmm_gpu_unpin_allocated()). When a root
// chunk is selected for eviction, it has the eviction flag set
// (see pick_root_chunk_to_evict()). This flag affects many of the PMM
// operations on all of the subchunks of the root chunk being evicted. See usage
// of (root_)chunk_is_in_eviction(), in particular in chunk_free_locked() and
// claim_free_chunk().
//
// To evict a root chunk, all of its free subchunks are pinned, then all
// resident pages backed by it are moved to the CPU one VA block at a time.
@@ -645,7 +647,7 @@ static void chunk_update_lists_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk
else if (root_chunk->chunk.state != UVM_PMM_GPU_CHUNK_STATE_FREE) {
UVM_ASSERT(root_chunk->chunk.state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT ||
root_chunk->chunk.state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
list_move_tail(&root_chunk->chunk.list, &pmm->root_chunks.va_block_used);
list_move_tail(&root_chunk->chunk.list, &pmm->root_chunks.alloc_list[UVM_PMM_ALLOC_LIST_USED]);
}
}
@@ -663,7 +665,6 @@ void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm
UVM_ASSERT(list_empty(&chunk->list));
UVM_ASSERT(va_block);
UVM_ASSERT(chunk->va_block == va_block);
UVM_ASSERT(chunk->va_block_page_index < uvm_va_block_num_cpu_pages(va_block));
uvm_spin_lock(&pmm->list_lock);
@@ -757,12 +758,8 @@ static bool assert_chunk_mergeable(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
UVM_ASSERT(child->state == first_child->state);
if ((first_child->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) && uvm_gpu_chunk_is_user(first_child)) {
uvm_gpu_chunk_t *prev_child = chunk->suballoc->subchunks[i-1];
UVM_ASSERT(child->va_block == child_va_block);
UVM_ASSERT(child->va_block_page_index ==
prev_child->va_block_page_index + uvm_gpu_chunk_get_size(prev_child) / PAGE_SIZE);
UVM_ASSERT(child->is_referenced == prev_child->is_referenced);
UVM_ASSERT(child->is_referenced == first_child->is_referenced);
}
}
@@ -798,7 +795,6 @@ static void merge_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
UVM_ASSERT(subchunk->va_block);
chunk->va_block = subchunk->va_block;
chunk->va_block_page_index = subchunk->va_block_page_index;
chunk->is_referenced = subchunk->is_referenced;
}
else if (child_state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
@@ -1198,7 +1194,6 @@ void uvm_pmm_gpu_mark_chunk_evicted(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
UVM_ASSERT(chunk->va_block);
chunk->va_block = NULL;
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
chunk_pin(pmm, chunk);
uvm_spin_unlock(&pmm->list_lock);
@@ -1412,7 +1407,7 @@ static void chunk_start_eviction(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
uvm_gpu_chunk_set_in_eviction(chunk, true);
}
static void root_chunk_update_eviction_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, struct list_head *list)
static void root_chunk_update_eviction_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_pmm_alloc_list_t alloc_list)
{
uvm_spin_lock(&pmm->list_lock);
@@ -1426,7 +1421,7 @@ static void root_chunk_update_eviction_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t
// eviction lists.
UVM_ASSERT(!list_empty(&chunk->list));
list_move_tail(&chunk->list, list);
list_move_tail(&chunk->list, &pmm->root_chunks.alloc_list[alloc_list]);
}
uvm_spin_unlock(&pmm->list_lock);
@@ -1434,17 +1429,49 @@ static void root_chunk_update_eviction_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t
void uvm_pmm_gpu_mark_root_chunk_used(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
{
root_chunk_update_eviction_list(pmm, chunk, &pmm->root_chunks.va_block_used);
root_chunk_update_eviction_list(pmm, chunk, UVM_PMM_ALLOC_LIST_USED);
}
void uvm_pmm_gpu_mark_root_chunk_unused(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
{
root_chunk_update_eviction_list(pmm, chunk, &pmm->root_chunks.va_block_unused);
root_chunk_update_eviction_list(pmm, chunk, UVM_PMM_ALLOC_LIST_UNUSED);
}
void uvm_pmm_gpu_mark_root_chunk_discarded(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
{
root_chunk_update_eviction_list(pmm, chunk, &pmm->root_chunks.va_block_discarded);
root_chunk_update_eviction_list(pmm, chunk, UVM_PMM_ALLOC_LIST_DISCARDED);
}
static uvm_pmm_alloc_list_t get_alloc_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
{
uvm_pmm_alloc_list_t alloc_list;
uvm_assert_spinlock_locked(&pmm->list_lock);
for (alloc_list = 0; alloc_list < UVM_PMM_ALLOC_LIST_COUNT; alloc_list++) {
uvm_gpu_chunk_t *entry;
list_for_each_entry(entry, &pmm->root_chunks.alloc_list[alloc_list], list) {
if (entry == chunk)
return alloc_list;
}
}
return UVM_PMM_ALLOC_LIST_COUNT;
}
static uvm_gpu_chunk_t *get_first_allocated_chunk(uvm_pmm_gpu_t *pmm)
{
uvm_pmm_alloc_list_t alloc_list;
uvm_assert_spinlock_locked(&pmm->list_lock);
for (alloc_list = 0; alloc_list < UVM_PMM_ALLOC_LIST_COUNT; alloc_list++) {
uvm_gpu_chunk_t *chunk = list_first_chunk(&pmm->root_chunks.alloc_list[alloc_list]);
if (chunk)
return chunk;
}
return NULL;
}
static uvm_gpu_root_chunk_t *pick_root_chunk_to_evict(uvm_pmm_gpu_t *pmm)
@@ -1471,19 +1498,10 @@ static uvm_gpu_root_chunk_t *pick_root_chunk_to_evict(uvm_pmm_gpu_t *pmm)
UVM_ASSERT(chunk->is_zero);
}
if (!chunk)
chunk = list_first_chunk(&pmm->root_chunks.va_block_unused);
if (!chunk) {
// Discarded pages are chosen to be evicted after unused pages,
// as we expect some of them to get reverted to used pages.
chunk = list_first_chunk(&pmm->root_chunks.va_block_discarded);
}
// TODO: Bug 1765193: Move the chunks to the tail of the used list whenever
// they get mapped.
if (!chunk)
chunk = list_first_chunk(&pmm->root_chunks.va_block_used);
chunk = get_first_allocated_chunk(pmm);
if (chunk)
chunk_start_eviction(pmm, chunk);
@@ -1492,6 +1510,7 @@ static uvm_gpu_root_chunk_t *pick_root_chunk_to_evict(uvm_pmm_gpu_t *pmm)
if (chunk)
return root_chunk_from_chunk(pmm, chunk);
return NULL;
}
@@ -1867,7 +1886,6 @@ static void init_root_chunk(uvm_pmm_gpu_t *pmm,
UVM_ASSERT(!chunk->parent);
UVM_ASSERT(!chunk->suballoc);
UVM_ASSERT(!chunk->va_block);
UVM_ASSERT(chunk->va_block_page_index == PAGES_PER_UVM_VA_BLOCK);
UVM_ASSERT(list_empty(&chunk->list));
UVM_ASSERT(uvm_gpu_chunk_get_size(chunk) == UVM_CHUNK_SIZE_MAX);
UVM_ASSERT(!root_chunk_has_elevated_page(pmm, root_chunk));
@@ -2109,7 +2127,6 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
subchunk->type = chunk->type;
uvm_gpu_chunk_set_size(subchunk, subchunk_size);
subchunk->parent = chunk;
subchunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
subchunk->is_zero = chunk->is_zero;
INIT_LIST_HEAD(&subchunk->list);
@@ -2121,7 +2138,6 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
uvm_assert_mutex_locked(&chunk->va_block->lock);
subchunk->va_block = chunk->va_block;
subchunk->va_block_page_index = chunk->va_block_page_index + (i * subchunk_size) / PAGE_SIZE;
subchunk->is_referenced = chunk->is_referenced;
}
else if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
@@ -2140,7 +2156,6 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
chunk->va_block = NULL;
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
chunk->is_referenced = false;
}
else if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
@@ -2154,7 +2169,6 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
root_chunk->chunk.suballoc->pinned_leaf_chunks += 1;
chunk->va_block = NULL;
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
chunk->is_referenced = false;
}
@@ -2243,7 +2257,6 @@ static void chunk_free_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
UVM_ASSERT(chunk->va_block);
UVM_ASSERT(list_empty(&chunk->list));
chunk->va_block = NULL;
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
chunk->is_zero = false;
chunk_pin(pmm, chunk);
}
@@ -2261,7 +2274,6 @@ static void chunk_free_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
}
chunk->va_block = NULL;
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
chunk->is_zero = false;
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED)
@@ -2913,134 +2925,6 @@ cleanup:
return status;
}
typedef struct
{
// Start/end of the physical region to be traversed (IN)
NvU64 phys_start;
NvU64 phys_end;
// Pointer to the array of mappins where to store results (OUT)
uvm_reverse_map_t *mappings;
// Number of entries written to mappings (OUT)
NvU32 num_mappings;
} get_chunk_mappings_data_t;
// Chunk traversal function used for phys-to-virt translation. These are the
// possible return values.
//
// - NV_ERR_OUT_OF_RANGE: no allocated physical chunks were found
// - NV_ERR_MORE_DATA_AVAILABLE: allocated physical chunks were found
// - NV_OK: allocated physical chunks may have been found. Check num_mappings
static NV_STATUS get_chunk_mappings_in_range(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, void *data)
{
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
get_chunk_mappings_data_t *get_chunk_mappings_data = (get_chunk_mappings_data_t *)data;
NvU64 chunk_end = chunk->address + uvm_gpu_chunk_get_size(chunk) - 1;
uvm_assert_mutex_locked(&pmm->lock);
// Kernel chunks do not have assigned VA blocks so we can just skip them
if (chunk->type == UVM_PMM_GPU_MEMORY_TYPE_KERNEL)
return NV_WARN_NOTHING_TO_DO;
// This chunk is located before the requested physical range. Skip its
// children and keep going
if (chunk_end < get_chunk_mappings_data->phys_start)
return NV_WARN_NOTHING_TO_DO;
// We are beyond the search phys range. Stop traversing.
if (chunk->address > get_chunk_mappings_data->phys_end) {
if (get_chunk_mappings_data->num_mappings > 0)
return NV_ERR_MORE_DATA_AVAILABLE;
else
return NV_ERR_OUT_OF_RANGE;
}
uvm_spin_lock(&pmm->list_lock);
// Return results for allocated leaf chunks, only
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
uvm_reverse_map_t *reverse_map;
UVM_ASSERT(chunk->va_block);
uvm_va_block_retain(chunk->va_block);
reverse_map = &get_chunk_mappings_data->mappings[get_chunk_mappings_data->num_mappings];
reverse_map->va_block = chunk->va_block;
reverse_map->region = uvm_va_block_region(chunk->va_block_page_index,
chunk->va_block_page_index + uvm_gpu_chunk_get_size(chunk) / PAGE_SIZE);
reverse_map->owner = gpu->id;
// If we land in the middle of a chunk, adjust the offset
if (get_chunk_mappings_data->phys_start > chunk->address) {
NvU64 offset = get_chunk_mappings_data->phys_start - chunk->address;
reverse_map->region.first += offset / PAGE_SIZE;
}
// If the physical range doesn't cover the whole chunk, adjust num_pages
if (get_chunk_mappings_data->phys_end < chunk_end)
reverse_map->region.outer -= (chunk_end - get_chunk_mappings_data->phys_end) / PAGE_SIZE;
++get_chunk_mappings_data->num_mappings;
}
uvm_spin_unlock(&pmm->list_lock);
return NV_OK;
}
NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region_size, uvm_reverse_map_t *out_mappings)
{
NvU64 chunk_base_addr = UVM_ALIGN_DOWN(phys_addr, UVM_CHUNK_SIZE_MAX);
NvU64 size_in_chunk = min(UVM_CHUNK_SIZE_MAX - (phys_addr - chunk_base_addr), region_size);
NvU32 num_mappings = 0;
UVM_ASSERT(PAGE_ALIGNED(phys_addr));
UVM_ASSERT(PAGE_ALIGNED(region_size));
uvm_mutex_lock(&pmm->lock);
// Traverse the whole requested region
do {
NV_STATUS status = NV_OK;
uvm_gpu_root_chunk_t *root_chunk = root_chunk_from_address(pmm, phys_addr);
uvm_gpu_chunk_t *chunk = &root_chunk->chunk;
get_chunk_mappings_data_t get_chunk_mappings_data;
get_chunk_mappings_data.phys_start = phys_addr;
get_chunk_mappings_data.phys_end = phys_addr + size_in_chunk - 1;
get_chunk_mappings_data.mappings = out_mappings + num_mappings;
get_chunk_mappings_data.num_mappings = 0;
// Walk the chunks for the current root chunk
status = chunk_walk_pre_order(pmm,
chunk,
get_chunk_mappings_in_range,
&get_chunk_mappings_data);
if (status == NV_ERR_OUT_OF_RANGE)
break;
if (get_chunk_mappings_data.num_mappings > 0) {
UVM_ASSERT(status == NV_OK || status == NV_ERR_MORE_DATA_AVAILABLE);
num_mappings += get_chunk_mappings_data.num_mappings;
}
else {
UVM_ASSERT(status == NV_OK);
}
region_size -= size_in_chunk;
phys_addr += size_in_chunk;
size_in_chunk = min((NvU64)UVM_CHUNK_SIZE_MAX, region_size);
} while (region_size > 0);
uvm_mutex_unlock(&pmm->lock);
return num_mappings;
}
#if UVM_IS_CONFIG_HMM()
uvm_gpu_chunk_t *uvm_pmm_devmem_page_to_chunk(struct page *page)
@@ -3139,7 +3023,6 @@ static void devmem_page_free(struct page *page)
UVM_ASSERT(chunk->is_referenced);
chunk->va_block = NULL;
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
chunk->is_referenced = false;
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
@@ -3477,12 +3360,10 @@ void uvm_pmm_gpu_device_p2p_init(uvm_parent_gpu_t *parent_gpu)
void uvm_pmm_gpu_device_p2p_deinit(uvm_parent_gpu_t *parent_gpu)
{
unsigned long pci_start_pfn = pci_resource_start(parent_gpu->pci_dev,
uvm_device_p2p_static_bar(parent_gpu)) >> PAGE_SHIFT;
struct page *p2p_page;
if (parent_gpu->device_p2p_initialised && !uvm_parent_gpu_is_coherent(parent_gpu)) {
p2p_page = pfn_to_page(pci_start_pfn);
struct page *p2p_page = pfn_to_page(pci_resource_start(parent_gpu->pci_dev,
uvm_device_p2p_static_bar(parent_gpu)) >> PAGE_SHIFT);
devm_memunmap_pages(&parent_gpu->pci_dev->dev, page_pgmap(p2p_page));
}
@@ -3546,6 +3427,7 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
{ 0, uvm_mem_kernel_chunk_sizes(gpu)},
};
NV_STATUS status = NV_OK;
uvm_pmm_alloc_list_t alloc_list;
size_t i, j, k;
// UVM_CHUNK_SIZE_INVALID is UVM_CHUNK_SIZE_MAX shifted left by 1. This
@@ -3560,10 +3442,11 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
INIT_LIST_HEAD(&pmm->free_list[i][j][k]);
}
}
INIT_LIST_HEAD(&pmm->root_chunks.va_block_used);
INIT_LIST_HEAD(&pmm->root_chunks.va_block_unused);
for (alloc_list = 0; alloc_list < UVM_PMM_ALLOC_LIST_COUNT; alloc_list++)
INIT_LIST_HEAD(&pmm->root_chunks.alloc_list[alloc_list]);
INIT_LIST_HEAD(&pmm->root_chunks.va_block_lazy_free);
INIT_LIST_HEAD(&pmm->root_chunks.va_block_discarded);
nv_kthread_q_item_init(&pmm->root_chunks.va_block_lazy_free_q_item, process_lazy_free_entry, pmm);
uvm_mutex_init(&pmm->lock, UVM_LOCK_ORDER_PMM);
@@ -3620,7 +3503,6 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
chunk->state = UVM_PMM_GPU_CHUNK_STATE_PMA_OWNED;
uvm_gpu_chunk_set_size(chunk, UVM_CHUNK_SIZE_MAX);
chunk->address = i * UVM_CHUNK_SIZE_MAX;
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
}
status = uvm_bit_locks_init(&pmm->root_chunks.bitlocks, pmm->root_chunks.count, UVM_LOCK_ORDER_PMM_ROOT_CHUNK);
@@ -4073,3 +3955,55 @@ NV_STATUS uvm_test_pmm_query_pma_stats(UVM_TEST_PMM_QUERY_PMA_STATS_PARAMS *para
uvm_gpu_release(gpu);
return NV_OK;
}
NV_STATUS uvm_test_pmm_get_alloc_list(UVM_TEST_PMM_GET_ALLOC_LIST_PARAMS *params, struct file *filp)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);
uvm_gpu_t *gpu;
uvm_va_block_t *va_block;
uvm_gpu_chunk_t *chunk;
NV_STATUS status = NV_OK;
// -Wall implies -Wenum-compare, so cast through int to avoid warnings
BUILD_BUG_ON((int)UVM_TEST_PMM_ALLOC_LIST_UNUSED != (int)UVM_PMM_ALLOC_LIST_UNUSED);
BUILD_BUG_ON((int)UVM_TEST_PMM_ALLOC_LIST_DISCARDED != (int)UVM_PMM_ALLOC_LIST_DISCARDED);
BUILD_BUG_ON((int)UVM_TEST_PMM_ALLOC_LIST_USED != (int)UVM_PMM_ALLOC_LIST_USED);
BUILD_BUG_ON((int)UVM_TEST_PMM_ALLOC_LIST_COUNT != (int)UVM_PMM_ALLOC_LIST_COUNT);
uvm_va_space_down_read(va_space);
gpu = uvm_va_space_get_gpu_by_uuid(va_space, &params->gpu_uuid);
if (!gpu) {
status = NV_ERR_INVALID_DEVICE;
goto out;
}
status = uvm_va_block_find(va_space, params->address, &va_block);
if (status != NV_OK)
goto out;
// No chunk or chunk not on an alloc list
status = NV_ERR_INVALID_STATE;
uvm_mutex_lock(&va_block->lock);
chunk = uvm_va_block_lookup_gpu_chunk(va_block, gpu, params->address);
if (chunk) {
uvm_pmm_alloc_list_t alloc_list;
uvm_spin_lock(&gpu->pmm.list_lock);
alloc_list = get_alloc_list(&gpu->pmm, chunk);
uvm_spin_unlock(&gpu->pmm.list_lock);
if (alloc_list != UVM_PMM_ALLOC_LIST_COUNT) {
params->list_type = alloc_list;
status = NV_OK;
}
}
uvm_mutex_unlock(&va_block->lock);
out:
uvm_va_space_up_read(va_space);
return status;
}

View File

@@ -174,6 +174,34 @@ static void uvm_pmm_list_zero_checks(void)
BUILD_BUG_ON(UVM_PMM_LIST_ZERO_COUNT > 2);
}
// Lists for allocated root chunks. When picking a root chunk to evict, lists
// with lower numerical order are checked first.
typedef enum
{
// Root chunks unused by VA blocks, i.e. allocated, but not holding any
// resident pages. These take priority when evicting as no data needs to be
// migrated for them to be evicted.
//
// For simplicity, the list is approximate, tracking unused chunks only from
// root chunk sized (2M) VA blocks.
//
// Updated by the VA block code with uvm_pmm_gpu_mark_root_chunk_(un)used().
UVM_PMM_ALLOC_LIST_UNUSED,
// Discarded root GPU chunks, which are still resident on the GPU. Chunks on
// this list are evicted with a lower priority than unused chunks because we
// expect some of them to get reverted to used pages.
//
// Updated by the VA block code with
// uvm_pmm_gpu_mark_root_chunk_discarded().
UVM_PMM_ALLOC_LIST_DISCARDED,
// Root chunks used by VA blocks, likely with resident pages.
UVM_PMM_ALLOC_LIST_USED,
UVM_PMM_ALLOC_LIST_COUNT
} uvm_pmm_alloc_list_t;
// Maximum chunk sizes per type of allocation in single GPU.
// The worst case today is Maxwell with 4 allocations sizes for page tables and
// 2 page sizes used by uvm_mem_t. Notably one of the allocations for page
@@ -277,7 +305,7 @@ struct uvm_gpu_chunk_struct
size_t log2_size : order_base_2(UVM_CHUNK_SIZE_MASK_SIZE);
// Start page index within va_block
// Start page index within va_block.
uvm_page_index_t va_block_page_index : order_base_2(PAGES_PER_UVM_VA_BLOCK + 1);
// This allows determining what PMM owns the chunk. Users of this field
@@ -348,30 +376,12 @@ typedef struct uvm_pmm_gpu_struct
// Bit locks for the root chunks with 1 bit per each root chunk
uvm_bit_locks_t bitlocks;
// List of root chunks unused by VA blocks, i.e. allocated, but not
// holding any resident pages. These take priority when evicting as no
// data needs to be migrated for them to be evicted.
//
// For simplicity, the list is approximate, tracking unused chunks only
// from root chunk sized (2M) VA blocks.
//
// Updated by the VA block code with
// uvm_pmm_gpu_mark_root_chunk_(un)used().
struct list_head va_block_unused;
// List of discarded root GPU chunks, which are still mapped on the GPU.
// Chunks on this list are evicted with a lower priority than unused chunks.
//
// Updated by the VA block code with
// uvm_pmm_gpu_mark_root_chunk_discarded().
struct list_head va_block_discarded;
// List of root chunks used by VA blocks
struct list_head va_block_used;
// LRU lists for picking which root chunks to evict
struct list_head alloc_list[UVM_PMM_ALLOC_LIST_COUNT];
// List of chunks needing to be lazily freed and a queue for processing
// the list. TODO: Bug 3881835: revisit whether to use nv_kthread_q_t
// or workqueue.
// the list. TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or
// workqueue.
struct list_head va_block_lazy_free;
nv_kthread_q_item_t va_block_lazy_free_q_item;
} root_chunks;
@@ -612,21 +622,6 @@ static uvm_chunk_size_t uvm_chunk_find_prev_size(uvm_chunk_sizes_mask_t chunk_si
return (uvm_chunk_size_t)1 << __fls(chunk_sizes);
}
// Obtain the {va_block, virt_addr} information for the chunks in the given
// [phys_addr:phys_addr + region_size) range. One entry per chunk is returned.
// phys_addr and region_size must be page-aligned.
//
// Valid translations are written to out_mappings sequentially (there are no
// gaps). The caller is required to provide enough entries in out_pages for the
// whole region. The function returns the number of entries written to
// out_mappings.
//
// The returned reverse map is a snapshot: it is stale as soon as it is
// returned, and the caller is responsible for locking the VA block(s) and
// checking that the chunks are still there. Also, the VA block(s) are
// retained, and it's up to the caller to release them.
NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region_size, uvm_reverse_map_t *out_mappings);
// Iterates over every size in the input mask from smallest to largest
#define for_each_chunk_size(__size, __chunk_sizes) \
for ((__size) = (__chunk_sizes) ? uvm_chunk_find_first_size(__chunk_sizes) : \
@@ -653,4 +648,6 @@ NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region
for (; (__size) != UVM_CHUNK_SIZE_INVALID; \
(__size) = uvm_chunk_find_prev_size((__chunk_sizes), (__size)))
NV_STATUS uvm_test_pmm_get_alloc_list(UVM_TEST_PMM_GET_ALLOC_LIST_PARAMS *params, struct file *filp);
#endif

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -1009,166 +1009,6 @@ NV_STATUS uvm_test_pmm_async_alloc(UVM_TEST_PMM_ASYNC_ALLOC_PARAMS *params, stru
return status;
}
static uvm_reverse_map_t g_reverse_map_entries[PAGES_PER_UVM_VA_BLOCK * 4];
static NV_STATUS test_pmm_reverse_map_single(uvm_gpu_t *gpu, uvm_va_space_t *va_space, NvU64 addr)
{
NV_STATUS status = NV_OK;
NvU32 num_translations;
uvm_va_block_t *va_block;
uvm_gpu_phys_address_t phys_addr;
bool is_resident;
status = uvm_va_block_find(va_space, addr, &va_block);
if (status != NV_OK)
return status;
TEST_CHECK_RET(uvm_va_block_size(va_block) == UVM_VA_BLOCK_SIZE);
// Verify that all pages are populated on the GPU
uvm_mutex_lock(&va_block->lock);
is_resident = uvm_processor_mask_test(&va_block->resident, gpu->id) &&
uvm_page_mask_full(uvm_va_block_resident_mask_get(va_block, gpu->id, NUMA_NO_NODE));
if (is_resident)
phys_addr = uvm_va_block_gpu_phys_page_address(va_block, 0, gpu);
uvm_mutex_unlock(&va_block->lock);
TEST_CHECK_RET(is_resident);
// In this test a single VA range covers the whole 2MB physical region. We
// expect a single translation to be returned for a 2MB chunk.
num_translations = uvm_pmm_gpu_phys_to_virt(&gpu->pmm, phys_addr.address, UVM_VA_BLOCK_SIZE, g_reverse_map_entries);
TEST_CHECK_RET(num_translations == 1);
TEST_CHECK_RET(g_reverse_map_entries[0].va_block == va_block);
TEST_CHECK_RET(g_reverse_map_entries[0].region.first == 0);
TEST_CHECK_RET(uvm_va_block_region_num_pages(g_reverse_map_entries[0].region) == uvm_va_block_num_cpu_pages(va_block));
uvm_va_block_release(va_block);
return NV_OK;
}
static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t *va_space, NvU64 addr, NvU64 size)
{
uvm_va_range_managed_t *managed_range;
uvm_va_block_t *va_block = NULL;
uvm_va_block_context_t *va_block_context = NULL;
NvU32 num_blocks;
NvU32 index = 0;
uvm_gpu_phys_address_t phys_addr = {0};
bool is_resident;
// In this test, the [addr:addr + size) VA region contains
// several managed ranges with different sizes.
// Find the first block to compute the base physical address of the root
// chunk
uvm_for_each_va_range_managed_in(managed_range, va_space, addr, addr + size - 1) {
va_block = uvm_va_range_block(managed_range, 0);
if (va_block)
break;
}
TEST_CHECK_RET(va_block);
va_block_context = uvm_va_block_context_alloc(NULL);
TEST_CHECK_RET(va_block_context);
uvm_mutex_lock(&va_block->lock);
is_resident = uvm_id_equal(uvm_va_block_page_get_closest_resident(va_block, va_block_context, 0, gpu->id), gpu->id);
if (is_resident) {
phys_addr = uvm_va_block_gpu_phys_page_address(va_block, 0, gpu);
phys_addr.address = UVM_ALIGN_DOWN(phys_addr.address, UVM_VA_BLOCK_SIZE);
}
uvm_mutex_unlock(&va_block->lock);
uvm_va_block_context_free(va_block_context);
TEST_CHECK_RET(is_resident);
// Perform the lookup for the whole root chunk
num_blocks = uvm_pmm_gpu_phys_to_virt(&gpu->pmm, phys_addr.address, size, g_reverse_map_entries);
TEST_CHECK_RET(num_blocks != 0);
// Iterate over all managed ranges and their VA blocks within the 2MB VA
// region. Some blocks are not populated. However, we assume that blocks
// have been populated in order so they have been assigned physical
// addresses incrementally. Therefore, the reverse translations will show
// them in order.
uvm_for_each_va_range_managed_in(managed_range, va_space, addr, addr + size - 1) {
for_each_va_block_in_va_range(managed_range, va_block) {
NvU32 num_va_block_pages = 0;
// Iterate over all the translations for the current VA block. One
// translation per chunk is returned. We compute the total number of
// pages covered in the translations to check that they match with
// the number of pages in the VA block.
while (g_reverse_map_entries[index].va_block == va_block) {
uvm_reverse_map_t *reverse_mapping;
reverse_mapping = &g_reverse_map_entries[index];
uvm_va_block_release(va_block);
num_va_block_pages += uvm_va_block_region_num_pages(reverse_mapping->region);
UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_start(reverse_mapping)));
UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_end(reverse_mapping)));
uvm_mutex_lock(&va_block->lock);
// Verify that all pages are populated on the GPU
is_resident = uvm_page_mask_region_full(uvm_va_block_resident_mask_get(va_block, gpu->id, NUMA_NO_NODE),
reverse_mapping->region);
uvm_mutex_unlock(&va_block->lock);
TEST_CHECK_RET(is_resident);
++index;
}
if (num_va_block_pages)
TEST_CHECK_RET(num_va_block_pages == uvm_va_block_num_cpu_pages(va_block));
}
}
TEST_CHECK_RET(index == num_blocks);
return NV_OK;
}
NV_STATUS uvm_test_pmm_reverse_map(UVM_TEST_PMM_REVERSE_MAP_PARAMS *params, struct file *filp)
{
NV_STATUS status;
uvm_gpu_t *gpu;
uvm_va_space_t *va_space;
va_space = uvm_va_space_get(filp);
// Take the global lock to void interferences from different instances of
// the test, since we use global variables
uvm_mutex_lock(&g_uvm_global.global_lock);
uvm_va_space_down_write(va_space);
gpu = uvm_va_space_get_gpu_by_uuid(va_space, &params->gpu_uuid);
if (!gpu || !uvm_processor_mask_test(&va_space->registered_gpus, gpu->id)) {
status = NV_ERR_INVALID_DEVICE;
goto exit_unlock;
}
status = test_pmm_reverse_map_single(gpu, va_space, params->range_address1);
if (status == NV_OK)
status = test_pmm_reverse_map_many_blocks(gpu, va_space, params->range_address2, params->range_size2);
exit_unlock:
uvm_va_space_up_write(va_space);
uvm_mutex_unlock(&g_uvm_global.global_lock);
return status;
}
static NV_STATUS test_chunk_with_elevated_page(uvm_gpu_t *gpu)
{
uvm_pmm_gpu_t *pmm = &gpu->pmm;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2018 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -35,17 +35,22 @@
#if defined(CONFIG_PROC_FS)
// This parameter enables additional debug procfs entries. It's enabled by
// default for debug and develop builds and disabled for release builds.
int uvm_enable_debug_procfs = UVM_IS_DEBUG() || UVM_IS_DEVELOP();
static int uvm_enable_debug_procfs = UVM_IS_DEBUG() || UVM_IS_DEVELOP();
module_param(uvm_enable_debug_procfs, int, S_IRUGO);
MODULE_PARM_DESC(uvm_enable_debug_procfs, "Enable debug procfs entries in /proc/" UVM_PROC_DIR_NAME);
#else
int uvm_enable_debug_procfs = 0;
static int uvm_enable_debug_procfs = 0;
#endif
static struct proc_dir_entry *uvm_proc_dir;
static struct proc_dir_entry *uvm_proc_gpus;
static struct proc_dir_entry *uvm_proc_cpu;
bool uvm_procfs_is_debug_enabled(void)
{
return uvm_enable_debug_procfs != 0;
}
NV_STATUS uvm_procfs_init(void)
{
if (!uvm_procfs_is_enabled())
@@ -80,4 +85,3 @@ struct proc_dir_entry *uvm_procfs_get_cpu_base_dir(void)
{
return uvm_proc_cpu;
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2019 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -45,10 +45,7 @@ static bool uvm_procfs_is_enabled(void)
// Is debug procfs enabled? This indicates that debug procfs files should be
// created.
static bool uvm_procfs_is_debug_enabled(void)
{
return uvm_enable_debug_procfs != 0;
}
bool uvm_procfs_is_debug_enabled(void);
struct proc_dir_entry *uvm_procfs_get_gpu_base_dir(void);
struct proc_dir_entry *uvm_procfs_get_cpu_base_dir(void);
@@ -71,8 +68,8 @@ void uvm_procfs_close_callback(void);
// Defer PM lock acquisition until the respective read() callback
// is invoked, to ensure the lock is acquired and released by
// the same thread. Else the lock tracking validation code must
// be disabled for this lock, which is undesirable. As a result,
// the same thread. Else the lock tracking validation code must
// be disabled for this lock, which is undesirable. As a result,
// lockless macro is used below. See bug 2594854 for additional
// information.
#define UVM_DEFINE_SINGLE_PROCFS_FILE(name) \

Some files were not shown because too many files have changed in this diff Show More