mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-28 03:59:48 +00:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2ccbad25e1 | ||
|
|
a5bfb10e75 | ||
|
|
2af9f1f0f7 |
11
README.md
11
README.md
@@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 580.94.11.
|
||||
version 590.48.01.
|
||||
|
||||
|
||||
## How to Build
|
||||
@@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
580.94.11 driver release. This can be achieved by installing
|
||||
590.48.01 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@@ -185,7 +185,7 @@ table below).
|
||||
For details on feature support and limitations, see the NVIDIA GPU driver
|
||||
end user README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/580.94.11/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/590.48.01/README/kernel_open.html
|
||||
|
||||
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
|
||||
Package for more details.
|
||||
@@ -959,9 +959,13 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 10DE 204B |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 17AA 204B |
|
||||
| NVIDIA RTX PRO 5000 Blackwell | 2BB3 1028 204D |
|
||||
| NVIDIA RTX PRO 5000 72GB Blackwell | 2BB3 1028 227A |
|
||||
| NVIDIA RTX PRO 5000 Blackwell | 2BB3 103C 204D |
|
||||
| NVIDIA RTX PRO 5000 72GB Blackwell | 2BB3 103C 227A |
|
||||
| NVIDIA RTX PRO 5000 Blackwell | 2BB3 10DE 204D |
|
||||
| NVIDIA RTX PRO 5000 72GB Blackwell | 2BB3 10DE 227A |
|
||||
| NVIDIA RTX PRO 5000 Blackwell | 2BB3 17AA 204D |
|
||||
| NVIDIA RTX PRO 5000 72GB Blackwell | 2BB3 17AA 227A |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 1028 204C |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 103C 204C |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 10DE 204C |
|
||||
@@ -1008,6 +1012,7 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX PRO 500 Blackwell Generation Laptop GPU | 2DB9 |
|
||||
| NVIDIA GeForce RTX 5050 Laptop GPU | 2DD8 |
|
||||
| NVIDIA RTX PRO 500 Blackwell Embedded GPU | 2DF9 |
|
||||
| NVIDIA GB10 | 2E12 10DE 21EC |
|
||||
| NVIDIA GeForce RTX 5070 | 2F04 |
|
||||
| NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F18 |
|
||||
| NVIDIA RTX PRO 3000 Blackwell Generation Laptop GPU | 2F38 |
|
||||
|
||||
@@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
|
||||
ccflags-y += -I$(src)
|
||||
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
|
||||
ccflags-y += -DNV_VERSION_STRING=\"580.94.11\"
|
||||
ccflags-y += -DNV_VERSION_STRING=\"590.48.01\"
|
||||
|
||||
# Include and link Tegra out-of-tree modules.
|
||||
ifneq ($(wildcard /usr/src/nvidia/nvidia-oot),)
|
||||
@@ -187,6 +187,7 @@ NV_CONFTEST_CFLAGS += $(filter -std=%,$(KBUILD_CFLAGS))
|
||||
NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
|
||||
NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
|
||||
NV_CONFTEST_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types,)
|
||||
NV_CONFTEST_CFLAGS += $(call cc-option,-fms-extensions,)
|
||||
NV_CONFTEST_CFLAGS += -Wno-error
|
||||
|
||||
NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h
|
||||
|
||||
53
kernel-open/common/inc/controller/sbios_table_version.h
Normal file
53
kernel-open/common/inc/controller/sbios_table_version.h
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SBIOS_TABLE_VERSION_H
|
||||
#define SBIOS_TABLE_VERSION_H
|
||||
|
||||
#define CONTROLLER_SBIOS_TABLE_VERSION_10 (0x10)
|
||||
#define CONTROLLER_SBIOS_TABLE_VERSION_20 (0x20)
|
||||
#define CONTROLLER_SBIOS_TABLE_VERSION_21 (0x21)
|
||||
#define CONTROLLER_SBIOS_TABLE_VERSION_22 (0x22)
|
||||
#define CONTROLLER_SBIOS_TABLE_VERSION_23 (0x23)
|
||||
#define CONTROLLER_SBIOS_TABLE_VERSION_24 (0x24)
|
||||
#define CONTROLLER_SBIOS_TABLE_VERSION_25 (0x25)
|
||||
#define CONTROLLER_SBIOS_TABLE_MAX_ENTRIES (8)
|
||||
|
||||
// NOTE: When adding a new version, make sure to update MAX_VERSION accordingly.
|
||||
#define CONTROLLER_SBIOS_TABLE_MAX_VERSION (0x25)
|
||||
|
||||
/*!
|
||||
* Layout of Controller 2x data used for static config
|
||||
*/
|
||||
#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_20 (0x20)
|
||||
#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_21 (0x21)
|
||||
#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_22 (0x22)
|
||||
#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_23 (0x23)
|
||||
#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_24 (0x24)
|
||||
#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_25 (0x25)
|
||||
#define NVPCF_CONTROLLER_STATIC_TABLE_MAX_ENTRIES (8)
|
||||
|
||||
// NOTE: When adding a new version, make sure to update MAX_VERSION accordingly.
|
||||
#define NVPCF_CONTROLLER_STATIC_TABLE_MAX_VERSION (0x25)
|
||||
|
||||
#endif // SBIOS_TABLE_VERSION_H
|
||||
@@ -24,7 +24,6 @@
|
||||
#define __NV_HASH_H__
|
||||
|
||||
#include "conftest.h"
|
||||
#include "nv-list-helpers.h"
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/hash.h>
|
||||
|
||||
@@ -26,8 +26,10 @@
|
||||
#define NV_IOCTL_H
|
||||
|
||||
#include <nv-ioctl-numbers.h>
|
||||
#include <nv-ioctl-numa.h>
|
||||
#include <nvtypes.h>
|
||||
|
||||
|
||||
typedef struct {
|
||||
NvU32 domain; /* PCI domain number */
|
||||
NvU8 bus; /* PCI bus number */
|
||||
@@ -113,7 +115,7 @@ typedef struct nv_ioctl_query_device_intr
|
||||
{
|
||||
NvU32 intrStatus NV_ALIGN_BYTES(4);
|
||||
NvU32 status;
|
||||
} nv_ioctl_query_device_intr;
|
||||
} nv_ioctl_query_device_intr_t;
|
||||
|
||||
/* system parameters that the kernel driver may use for configuration */
|
||||
typedef struct nv_ioctl_sys_params
|
||||
|
||||
@@ -102,17 +102,6 @@
|
||||
#include <linux/dma-buf.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_AVAILABLE)
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_drv.h>
|
||||
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
|
||||
#include <drm/drm_gem.h>
|
||||
#endif /* NV_DRM_AVAILABLE */
|
||||
|
||||
/* task and signal-related items */
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/task.h>
|
||||
@@ -141,8 +130,6 @@
|
||||
#include <asm/bitops.h> /* __set_bit() */
|
||||
#include <linux/time.h> /* FD_SET() */
|
||||
|
||||
#include "nv-list-helpers.h"
|
||||
|
||||
/*
|
||||
* Use current->cred->euid, instead of calling current_euid().
|
||||
* The latter can pull in the GPL-only debug_lockdep_rcu_enabled()
|
||||
@@ -274,14 +261,8 @@ extern int nv_pat_mode;
|
||||
user_function, NULL, args)
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL)
|
||||
#define NV_CONFIG_PREEMPT_RT 1
|
||||
#endif
|
||||
|
||||
#define NV_PAGE_COUNT(page) \
|
||||
((unsigned int)page_count(page))
|
||||
#define NV_GET_PAGE_FLAGS(page_ptr) \
|
||||
(NV_GET_PAGE_STRUCT(page_ptr->phys_addr)->flags)
|
||||
|
||||
#if !defined(DEBUG) && defined(__GFP_NOWARN)
|
||||
#define NV_GFP_KERNEL (GFP_KERNEL | __GFP_NOWARN)
|
||||
@@ -298,9 +279,9 @@ extern int nv_pat_mode;
|
||||
* such as Linux/x86-64; the alternative is to use an IOMMU such
|
||||
* as the one implemented with the K8 GART, if available.
|
||||
*/
|
||||
#define NV_GFP_DMA32 (NV_GFP_KERNEL | GFP_DMA32)
|
||||
#define NV_GFP_DMA32 (GFP_DMA32)
|
||||
#else
|
||||
#define NV_GFP_DMA32 (NV_GFP_KERNEL)
|
||||
#define NV_GFP_DMA32 0
|
||||
#endif
|
||||
|
||||
#if defined(NVCPU_AARCH64) || defined(NVCPU_RISCV64)
|
||||
@@ -388,11 +369,7 @@ static inline void nv_vfree(void *ptr, NvU64 size)
|
||||
|
||||
static inline void *nv_ioremap(NvU64 phys, NvU64 size)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_PRESENT)
|
||||
void *ptr = ioremap_driver_hardened(phys, size);
|
||||
#else
|
||||
void *ptr = ioremap(phys, size);
|
||||
#endif
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
}
|
||||
@@ -405,9 +382,7 @@ static inline void *nv_ioremap_nocache(NvU64 phys, NvU64 size)
|
||||
static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
|
||||
{
|
||||
void *ptr = NULL;
|
||||
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_CACHE_SHARED_PRESENT)
|
||||
ptr = ioremap_cache_shared(phys, size);
|
||||
#elif defined(NV_IOREMAP_CACHE_PRESENT)
|
||||
#if defined(NV_IOREMAP_CACHE_PRESENT)
|
||||
ptr = ioremap_cache(phys, size);
|
||||
#else
|
||||
return nv_ioremap(phys, size);
|
||||
@@ -421,9 +396,7 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
|
||||
static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
|
||||
{
|
||||
void *ptr = NULL;
|
||||
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT)
|
||||
ptr = ioremap_driver_hardened_wc(phys, size);
|
||||
#elif defined(NV_IOREMAP_WC_PRESENT)
|
||||
#if defined(NV_IOREMAP_WC_PRESENT)
|
||||
ptr = ioremap_wc(phys, size);
|
||||
#else
|
||||
return nv_ioremap_nocache(phys, size);
|
||||
@@ -465,13 +438,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#if defined(__GFP_RETRY_MAYFAIL)
|
||||
#define NV_GFP_NO_OOM (NV_GFP_KERNEL | __GFP_RETRY_MAYFAIL)
|
||||
#elif defined(__GFP_NORETRY)
|
||||
#define NV_GFP_NO_OOM (NV_GFP_KERNEL | __GFP_NORETRY)
|
||||
#else
|
||||
#define NV_GFP_NO_OOM (NV_GFP_KERNEL)
|
||||
#endif
|
||||
|
||||
#define NV_KMALLOC_NO_OOM(ptr, size) \
|
||||
{ \
|
||||
@@ -528,22 +495,12 @@ static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define NV_GET_CURRENT_PROCESS() current->tgid
|
||||
#define NV_IN_ATOMIC() in_atomic()
|
||||
#define NV_COPY_TO_USER(to, from, n) copy_to_user(to, from, n)
|
||||
#define NV_COPY_FROM_USER(to, from, n) copy_from_user(to, from, n)
|
||||
|
||||
#define NV_IS_SUSER() capable(CAP_SYS_ADMIN)
|
||||
#define NV_CLI() local_irq_disable()
|
||||
#define NV_SAVE_FLAGS(eflags) local_save_flags(eflags)
|
||||
#define NV_RESTORE_FLAGS(eflags) local_irq_restore(eflags)
|
||||
#define NV_MAY_SLEEP() (!irqs_disabled() && !in_interrupt() && !NV_IN_ATOMIC())
|
||||
#define NV_MAY_SLEEP() (!irqs_disabled() && !in_interrupt() && !in_atomic())
|
||||
#define NV_MODULE_PARAMETER(x) module_param(x, int, 0)
|
||||
#define NV_MODULE_STRING_PARAMETER(x) module_param(x, charp, 0)
|
||||
#undef MODULE_PARM
|
||||
|
||||
#define NV_NUM_CPUS() num_possible_cpus()
|
||||
|
||||
#define NV_HAVE_MEMORY_ENCRYPT_DECRYPT 0
|
||||
|
||||
#if defined(NVCPU_X86_64) && \
|
||||
@@ -596,7 +553,6 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NV_GET_OFFSET_IN_PAGE(phys_page) offset_in_page(phys_page)
|
||||
#define NV_GET_PAGE_STRUCT(phys_page) virt_to_page(__va(phys_page))
|
||||
#define NV_VMA_PGOFF(vma) ((vma)->vm_pgoff)
|
||||
#define NV_VMA_SIZE(vma) ((vma)->vm_end - (vma)->vm_start)
|
||||
@@ -693,9 +649,9 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
|
||||
#define NV_PRINT_AT(nv_debug_level,at) \
|
||||
{ \
|
||||
nv_printf(nv_debug_level, \
|
||||
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, " \
|
||||
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %lld, " \
|
||||
"page_table = 0x%p\n", __FUNCTION__, __LINE__, at, \
|
||||
at->num_pages, NV_ATOMIC_READ(at->usage_count), \
|
||||
at->num_pages, (long long)atomic64_read(&at->usage_count), \
|
||||
at->page_table); \
|
||||
}
|
||||
|
||||
@@ -711,13 +667,6 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
|
||||
# define minor(x) MINOR(x)
|
||||
#endif
|
||||
|
||||
#if !defined(PCI_COMMAND_SERR)
|
||||
#define PCI_COMMAND_SERR 0x100
|
||||
#endif
|
||||
#if !defined(PCI_COMMAND_INTX_DISABLE)
|
||||
#define PCI_COMMAND_INTX_DISABLE 0x400
|
||||
#endif
|
||||
|
||||
#ifndef PCI_CAP_ID_EXP
|
||||
#define PCI_CAP_ID_EXP 0x10
|
||||
#endif
|
||||
@@ -970,7 +919,7 @@ struct nv_dma_buf
|
||||
typedef struct nv_alloc_s {
|
||||
struct nv_alloc_s *next;
|
||||
struct device *dev;
|
||||
atomic_t usage_count;
|
||||
atomic64_t usage_count;
|
||||
struct {
|
||||
NvBool contig : 1;
|
||||
NvBool guest : 1;
|
||||
@@ -983,6 +932,7 @@ typedef struct nv_alloc_s {
|
||||
NvBool unencrypted : 1;
|
||||
NvBool coherent : 1;
|
||||
NvBool carveout : 1;
|
||||
NvBool pool : 1;
|
||||
} flags;
|
||||
unsigned int cache_type;
|
||||
unsigned int num_pages;
|
||||
@@ -1143,14 +1093,18 @@ typedef struct nv_dma_map_s {
|
||||
i++, sm = &dm->mapping.discontig.submaps[i])
|
||||
|
||||
/*
|
||||
* On 4K ARM kernels, use max submap size a multiple of 64K to keep nv-p2p happy.
|
||||
* Despite 4K OS pages, we still use 64K P2P pages due to dependent modules still using 64K.
|
||||
* Instead of using (4G-4K), use max submap size as (4G-64K) since the mapped IOVA range
|
||||
* must be aligned at 64K boundary.
|
||||
* On 4K ARM kernels, use max submap size a multiple of 2M to avoid breaking up 2M page size
|
||||
* sysmem allocations.
|
||||
*
|
||||
* Instead of using (4G-4K), use max submap size as (4G-2M) since the mapped IOVA range
|
||||
* must be aligned at 2M boundary.
|
||||
*
|
||||
* Bug 5401803: Tracks migrating away from making IOMMU mappings using submaps in favor of
|
||||
* using sg_chain() to chain a single large scatterlist.
|
||||
*/
|
||||
#if defined(CONFIG_ARM64_4K_PAGES)
|
||||
#define NV_DMA_U32_MAX_4K_PAGES ((NvU32)((NV_U32_MAX >> PAGE_SHIFT) + 1))
|
||||
#define NV_DMA_SUBMAP_MAX_PAGES ((NvU32)(NV_DMA_U32_MAX_4K_PAGES - 16))
|
||||
#define NV_DMA_SUBMAP_MAX_PAGES ((NvU32)(NV_DMA_U32_MAX_4K_PAGES - 512))
|
||||
#else
|
||||
#define NV_DMA_SUBMAP_MAX_PAGES ((NvU32)(NV_U32_MAX >> PAGE_SHIFT))
|
||||
#endif
|
||||
@@ -1294,7 +1248,8 @@ struct nv_pci_tegra_devfreq_dev;
|
||||
typedef struct nv_linux_state_s {
|
||||
nv_state_t nv_state;
|
||||
|
||||
atomic_t usage_count;
|
||||
atomic64_t usage_count;
|
||||
|
||||
NvU32 suspend_count;
|
||||
|
||||
struct device *dev;
|
||||
@@ -1470,6 +1425,8 @@ typedef struct nv_linux_state_s {
|
||||
|
||||
int (*devfreq_suspend)(struct device *dev);
|
||||
int (*devfreq_resume)(struct device *dev);
|
||||
int (*devfreq_enable_boost)(struct device *dev, unsigned int duration);
|
||||
int (*devfreq_disable_boost)(struct device *dev);
|
||||
#endif
|
||||
} nv_linux_state_t;
|
||||
|
||||
@@ -1640,6 +1597,7 @@ extern NvU32 NVreg_EnableUserNUMAManagement;
|
||||
extern NvU32 NVreg_RegisterPCIDriver;
|
||||
extern NvU32 NVreg_RegisterPlatformDeviceDriver;
|
||||
extern NvU32 NVreg_EnableResizableBar;
|
||||
extern NvU32 NVreg_TegraGpuPgMask;
|
||||
extern NvU32 NVreg_EnableNonblockingOpen;
|
||||
|
||||
extern NvU32 num_probed_nv_devices;
|
||||
@@ -1669,9 +1627,9 @@ static inline NvBool nv_alloc_release(nv_linux_file_private_t *nvlfp, nv_alloc_t
|
||||
{
|
||||
NV_PRINT_AT(NV_DBG_MEMINFO, at);
|
||||
|
||||
if (NV_ATOMIC_DEC_AND_TEST(at->usage_count))
|
||||
if (atomic64_dec_and_test(&at->usage_count))
|
||||
{
|
||||
NV_ATOMIC_INC(at->usage_count);
|
||||
atomic64_inc(&at->usage_count);
|
||||
|
||||
at->next = nvlfp->free_list;
|
||||
nvlfp->free_list = at;
|
||||
@@ -1697,10 +1655,7 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
|
||||
return flags;
|
||||
}
|
||||
|
||||
#define MODULE_BASE_NAME "nvidia"
|
||||
#define MODULE_INSTANCE_NUMBER 0
|
||||
#define MODULE_INSTANCE_STRING ""
|
||||
#define MODULE_NAME MODULE_BASE_NAME MODULE_INSTANCE_STRING
|
||||
#define MODULE_NAME "nvidia"
|
||||
|
||||
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32, const char*);
|
||||
NV_STATUS nv_imp_icc_get(nv_state_t *nv);
|
||||
|
||||
@@ -26,45 +26,12 @@
|
||||
#include <linux/list.h>
|
||||
#include "conftest.h"
|
||||
|
||||
/*
|
||||
* list_first_entry_or_null added by commit 6d7581e62f8b ("list: introduce
|
||||
* list_first_entry_or_null") in v3.10 (2013-05-29).
|
||||
*/
|
||||
#if !defined(list_first_entry_or_null)
|
||||
#define list_first_entry_or_null(ptr, type, member) \
|
||||
(!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Added by commit 93be3c2eb337 ("list: introduce list_last_entry(), use
|
||||
* list_{first,last}_entry()") in v3.13 (2013-11-12).
|
||||
*/
|
||||
#if !defined(list_last_entry)
|
||||
#define list_last_entry(ptr, type, member) \
|
||||
list_entry((ptr)->prev, type, member)
|
||||
#endif
|
||||
|
||||
/* list_last_entry_or_null() doesn't actually exist in the kernel */
|
||||
#if !defined(list_last_entry_or_null)
|
||||
#define list_last_entry_or_null(ptr, type, member) \
|
||||
(!list_empty(ptr) ? list_last_entry(ptr, type, member) : NULL)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* list_prev_entry() and list_next_entry added by commit 008208c6b26f
|
||||
* ("list: introduce list_next_entry() and list_prev_entry()") in
|
||||
* v3.13 (2013-11-12).
|
||||
*/
|
||||
#if !defined(list_prev_entry)
|
||||
#define list_prev_entry(pos, member) \
|
||||
list_entry((pos)->member.prev, typeof(*(pos)), member)
|
||||
#endif
|
||||
|
||||
#if !defined(list_next_entry)
|
||||
#define list_next_entry(pos, member) \
|
||||
list_entry((pos)->member.next, typeof(*(pos)), member)
|
||||
#endif
|
||||
|
||||
#if !defined(NV_LIST_IS_FIRST_PRESENT)
|
||||
static inline int list_is_first(const struct list_head *list,
|
||||
const struct list_head *head)
|
||||
|
||||
@@ -32,18 +32,6 @@
|
||||
#include <linux/semaphore.h>
|
||||
#include <linux/sched/signal.h> /* signal_pending */
|
||||
|
||||
#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL)
|
||||
typedef raw_spinlock_t nv_spinlock_t;
|
||||
#define NV_DEFINE_SPINLOCK(lock) DEFINE_RAW_SPINLOCK(lock)
|
||||
#define NV_SPIN_LOCK_INIT(lock) raw_spin_lock_init(lock)
|
||||
#define NV_SPIN_LOCK_IRQ(lock) raw_spin_lock_irq(lock)
|
||||
#define NV_SPIN_UNLOCK_IRQ(lock) raw_spin_unlock_irq(lock)
|
||||
#define NV_SPIN_LOCK_IRQSAVE(lock,flags) raw_spin_lock_irqsave(lock,flags)
|
||||
#define NV_SPIN_UNLOCK_IRQRESTORE(lock,flags) raw_spin_unlock_irqrestore(lock,flags)
|
||||
#define NV_SPIN_LOCK(lock) raw_spin_lock(lock)
|
||||
#define NV_SPIN_UNLOCK(lock) raw_spin_unlock(lock)
|
||||
#define NV_SPIN_UNLOCK_WAIT(lock) raw_spin_unlock_wait(lock)
|
||||
#else
|
||||
typedef spinlock_t nv_spinlock_t;
|
||||
#define NV_DEFINE_SPINLOCK(lock) DEFINE_SPINLOCK(lock)
|
||||
#define NV_SPIN_LOCK_INIT(lock) spin_lock_init(lock)
|
||||
@@ -54,7 +42,6 @@ typedef spinlock_t nv_spinlock_t;
|
||||
#define NV_SPIN_LOCK(lock) spin_lock(lock)
|
||||
#define NV_SPIN_UNLOCK(lock) spin_unlock(lock)
|
||||
#define NV_SPIN_UNLOCK_WAIT(lock) spin_unlock_wait(lock)
|
||||
#endif
|
||||
|
||||
#define NV_INIT_MUTEX(mutex) sema_init(mutex, 1)
|
||||
|
||||
|
||||
@@ -196,14 +196,33 @@ static inline struct rw_semaphore *nv_mmap_get_lock(struct mm_struct *mm)
|
||||
* Commit 45ad9f5290dc updated vma_start_write() to call __vma_start_write().
|
||||
*/
|
||||
void nv_vma_start_write(struct vm_area_struct *);
|
||||
|
||||
static inline void nv_vma_flags_set_word(struct vm_area_struct *vma, unsigned long flags)
|
||||
{
|
||||
nv_vma_start_write(vma);
|
||||
#if defined(NV_VMA_FLAGS_SET_WORD_PRESENT)
|
||||
vma_flags_set_word(&vma->flags, flags);
|
||||
#else
|
||||
ACCESS_PRIVATE(vma, __vm_flags) |= flags;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_vma_flags_clear_word(struct vm_area_struct *vma, unsigned long flags)
|
||||
{
|
||||
nv_vma_start_write(vma);
|
||||
#if defined(NV_VMA_FLAGS_SET_WORD_PRESENT)
|
||||
vma_flags_clear_word(&vma->flags, flags);
|
||||
#else
|
||||
ACCESS_PRIVATE(vma, __vm_flags) &= ~flags;
|
||||
#endif
|
||||
}
|
||||
#endif // !NV_CAN_CALL_VMA_START_WRITE
|
||||
|
||||
static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
{
|
||||
#if !NV_CAN_CALL_VMA_START_WRITE
|
||||
nv_vma_start_write(vma);
|
||||
ACCESS_PRIVATE(vma, __vm_flags) |= flags;
|
||||
#elif defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
nv_vma_flags_set_word(vma, flags);
|
||||
#elif defined(NV_VM_FLAGS_SET_PRESENT)
|
||||
vm_flags_set(vma, flags);
|
||||
#else
|
||||
vma->vm_flags |= flags;
|
||||
@@ -213,9 +232,8 @@ static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
static inline void nv_vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags)
|
||||
{
|
||||
#if !NV_CAN_CALL_VMA_START_WRITE
|
||||
nv_vma_start_write(vma);
|
||||
ACCESS_PRIVATE(vma, __vm_flags) &= ~flags;
|
||||
#elif defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
|
||||
nv_vma_flags_clear_word(vma, flags);
|
||||
#elif defined(NV_VM_FLAGS_SET_PRESENT)
|
||||
vm_flags_clear(vma, flags);
|
||||
#else
|
||||
vma->vm_flags &= ~flags;
|
||||
|
||||
@@ -47,9 +47,6 @@ void NV_API_CALL nv_init_msi (nv_state_t *);
|
||||
void NV_API_CALL nv_init_msix (nv_state_t *);
|
||||
NvS32 NV_API_CALL nv_request_msix_irq (nv_linux_state_t *);
|
||||
|
||||
#define NV_PCI_MSIX_FLAGS 2
|
||||
#define NV_PCI_MSIX_FLAGS_QSIZE 0x7FF
|
||||
|
||||
static inline void nv_free_msix_irq(nv_linux_state_t *nvl)
|
||||
{
|
||||
int i;
|
||||
@@ -67,17 +64,8 @@ static inline int nv_get_max_irq(struct pci_dev *pci_dev)
|
||||
NvU16 ctrl;
|
||||
|
||||
cap_ptr = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
|
||||
/*
|
||||
* The 'PCI_MSIX_FLAGS' was added in 2.6.21-rc3 by:
|
||||
* 2007-03-05 f5f2b13129a6541debf8851bae843cbbf48298b7
|
||||
*/
|
||||
#if defined(PCI_MSIX_FLAGS)
|
||||
pci_read_config_word(pci_dev, cap_ptr + PCI_MSIX_FLAGS, &ctrl);
|
||||
nvec = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1;
|
||||
#else
|
||||
pci_read_config_word(pci_dev, cap_ptr + NV_PCI_MSIX_FLAGS, &ctrl);
|
||||
nvec = (ctrl & NV_PCI_MSIX_FLAGS_QSIZE) + 1;
|
||||
#endif
|
||||
|
||||
return nvec;
|
||||
}
|
||||
|
||||
@@ -73,31 +73,22 @@ extern NvBool nvos_is_chipset_io_coherent(void);
|
||||
|
||||
#define NV_PGPROT_UNCACHED_DEVICE(old_prot) pgprot_noncached(old_prot)
|
||||
#if defined(NVCPU_AARCH64)
|
||||
#define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \
|
||||
PTE_ATTRINDX(MT_DEVICE_nGnRE))
|
||||
#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot) \
|
||||
__pgprot_modify(old_prot, PTE_ATTRINDX_MASK, NV_PROT_WRITE_COMBINED_DEVICE)
|
||||
#define NV_PGPROT_WRITE_COMBINED(old_prot) NV_PGPROT_UNCACHED(old_prot)
|
||||
#define NV_PGPROT_READ_ONLY(old_prot) \
|
||||
__pgprot_modify(old_prot, 0, PTE_RDONLY)
|
||||
#elif defined(NVCPU_X86_64)
|
||||
#define NV_PGPROT_UNCACHED_WEAK(old_prot) pgprot_noncached_weak(old_prot)
|
||||
#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot) \
|
||||
pgprot_modify_writecombine(old_prot)
|
||||
#define NV_PGPROT_WRITE_COMBINED(old_prot) \
|
||||
NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot)
|
||||
pgprot_modify_writecombine(old_prot)
|
||||
#define NV_PGPROT_READ_ONLY(old_prot) \
|
||||
__pgprot(pgprot_val((old_prot)) & ~_PAGE_RW)
|
||||
#elif defined(NVCPU_RISCV64)
|
||||
#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot) \
|
||||
#define NV_PGPROT_WRITE_COMBINED(old_prot) \
|
||||
pgprot_writecombine(old_prot)
|
||||
/* Don't attempt to mark sysmem pages as write combined on riscv */
|
||||
#define NV_PGPROT_WRITE_COMBINED(old_prot) old_prot
|
||||
#define NV_PGPROT_READ_ONLY(old_prot) \
|
||||
__pgprot(pgprot_val((old_prot)) & ~_PAGE_WRITE)
|
||||
#else
|
||||
/* Writecombine is not supported */
|
||||
#undef NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot)
|
||||
#undef NV_PGPROT_WRITE_COMBINED(old_prot)
|
||||
#define NV_PGPROT_READ_ONLY(old_prot)
|
||||
#endif
|
||||
|
||||
@@ -57,8 +57,6 @@ int nv_uvm_init (void);
|
||||
void nv_uvm_exit (void);
|
||||
NV_STATUS nv_uvm_suspend (void);
|
||||
NV_STATUS nv_uvm_resume (void);
|
||||
void nv_uvm_notify_start_device (const NvU8 *uuid);
|
||||
void nv_uvm_notify_stop_device (const NvU8 *uuid);
|
||||
NV_STATUS nv_uvm_event_interrupt (const NvU8 *uuid);
|
||||
NV_STATUS nv_uvm_drain_P2P (const NvU8 *uuid);
|
||||
NV_STATUS nv_uvm_resume_P2P (const NvU8 *uuid);
|
||||
|
||||
@@ -36,6 +36,19 @@
|
||||
#define NV_MAX_ISR_DELAY_MS (NV_MAX_ISR_DELAY_US / 1000)
|
||||
#define NV_NSECS_TO_JIFFIES(nsec) ((nsec) * HZ / 1000000000)
|
||||
|
||||
/*
|
||||
* in_hardirq() was added in v5.11-rc1 (2020-12-15) to replace in_irq().
|
||||
* Fall back to in_irq() for older kernels that don't have in_hardirq().
|
||||
*/
|
||||
static inline NvBool nv_in_hardirq(void)
|
||||
{
|
||||
#if defined(in_hardirq)
|
||||
return in_hardirq();
|
||||
#else
|
||||
return in_irq();
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !defined(NV_KTIME_GET_RAW_TS64_PRESENT)
|
||||
static inline void ktime_get_raw_ts64(struct timespec64 *ts64)
|
||||
{
|
||||
@@ -82,7 +95,7 @@ static inline NV_STATUS nv_sleep_us(unsigned int us)
|
||||
ktime_get_raw_ts64(&tm1);
|
||||
#endif
|
||||
|
||||
if (in_irq() && (us > NV_MAX_ISR_DELAY_US))
|
||||
if (nv_in_hardirq() && (us > NV_MAX_ISR_DELAY_US))
|
||||
return NV_ERR_GENERIC;
|
||||
|
||||
mdelay_safe_msec = us / 1000;
|
||||
@@ -127,7 +140,7 @@ static inline NV_STATUS nv_sleep_ms(unsigned int ms)
|
||||
tm_start = tm_aux;
|
||||
#endif
|
||||
|
||||
if (in_irq() && (ms > NV_MAX_ISR_DELAY_MS))
|
||||
if (nv_in_hardirq() && (ms > NV_MAX_ISR_DELAY_MS))
|
||||
{
|
||||
return NV_ERR_GENERIC;
|
||||
}
|
||||
|
||||
@@ -86,6 +86,8 @@ extern const NvBool nv_is_rm_firmware_supported_os;
|
||||
|
||||
#define NV_RM_DEVICE_INTR_ADDRESS 0x100
|
||||
|
||||
#define NV_TEGRA_PCI_IGPU_PG_MASK_DEFAULT 0xFFFFFFFF
|
||||
|
||||
/*
|
||||
* Clock domain identifier, which is used for fetching the engine
|
||||
* load backed by the specified clock domain for Tegra platforms
|
||||
@@ -413,6 +415,7 @@ typedef struct nv_soc_irq_info_s {
|
||||
|
||||
#define NV_MAX_SOC_IRQS 10
|
||||
#define NV_MAX_DPAUX_NUM_DEVICES 4
|
||||
#define NV_MAX_DPAUX_DEV_NAME_SIZE 10
|
||||
|
||||
#define NV_MAX_SOC_DPAUX_NUM_DEVICES 4
|
||||
|
||||
@@ -429,6 +432,12 @@ typedef struct nv_phys_addr_range
|
||||
NvU64 len;
|
||||
} nv_phys_addr_range_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char vbios_version[15];
|
||||
char firmware_version[64];
|
||||
} nv_cached_gpu_info_t;
|
||||
|
||||
typedef struct nv_state_t
|
||||
{
|
||||
void *priv; /* private data */
|
||||
@@ -465,6 +474,7 @@ typedef struct nv_state_t
|
||||
NvU32 num_dpaux_instance;
|
||||
NvU32 interrupt_line;
|
||||
NvU32 dpaux_irqs[NV_MAX_DPAUX_NUM_DEVICES];
|
||||
char dpaux_devname[NV_MAX_DPAUX_NUM_DEVICES][NV_MAX_DPAUX_DEV_NAME_SIZE];
|
||||
nv_soc_irq_info_t soc_irq_info[NV_MAX_SOC_IRQS];
|
||||
NvS32 current_soc_irq;
|
||||
NvU32 num_soc_irqs;
|
||||
@@ -481,6 +491,7 @@ typedef struct nv_state_t
|
||||
NvBool is_tegra_pci_igpu;
|
||||
NvBool supports_tegra_igpu_rg;
|
||||
NvBool is_tegra_pci_igpu_rg_enabled;
|
||||
NvU32 tegra_pci_igpu_pg_mask;
|
||||
|
||||
NvBool primary_vga;
|
||||
|
||||
@@ -588,8 +599,12 @@ typedef struct nv_state_t
|
||||
/* Console is managed by drm drivers or NVKMS */
|
||||
NvBool client_managed_console;
|
||||
|
||||
/* Bool to check if power management is unsupported */
|
||||
/* Struct to cache the gpu info details */
|
||||
nv_cached_gpu_info_t cached_gpu_info;
|
||||
|
||||
/* Bool to check if power management is supported */
|
||||
NvBool is_pm_unsupported;
|
||||
|
||||
} nv_state_t;
|
||||
|
||||
#define NVFP_TYPE_NONE 0x0
|
||||
@@ -651,7 +666,7 @@ typedef struct UvmGpuPagingChannelInfo_tag *nvgpuPagingChannelInfo_t;
|
||||
typedef enum UvmPmaGpuMemoryType_tag nvgpuGpuMemoryType_t;
|
||||
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU64, NvU64 *, NvU32, NvU64, NvU64, nvgpuGpuMemoryType_t);
|
||||
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemoryType_t);
|
||||
|
||||
typedef struct UvmGpuAccessBitsBufferAlloc_tag *nvgpuAccessBitBufferAlloc_t;
|
||||
/*
|
||||
* flags
|
||||
*/
|
||||
@@ -988,6 +1003,7 @@ NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
|
||||
|
||||
void NV_API_CALL nv_get_updated_emu_seg(NvU32 *start, NvU32 *end);
|
||||
void NV_API_CALL nv_get_screen_info(nv_state_t *, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64 *);
|
||||
void NV_API_CALL nv_set_gpu_pg_mask(nv_state_t *);
|
||||
|
||||
struct dma_buf;
|
||||
typedef struct nv_dma_buf nv_dma_buf_t;
|
||||
@@ -1107,16 +1123,15 @@ NvBool NV_API_CALL rm_isr (nvidia_stack_t *, nv_state_t *
|
||||
void NV_API_CALL rm_isr_bh (nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_isr_bh_unlocked (nvidia_stack_t *, nv_state_t *);
|
||||
NvBool NV_API_CALL rm_is_msix_allowed (nvidia_stack_t *, nv_state_t *);
|
||||
NvBool NV_API_CALL rm_wait_for_bar_firewall (nvidia_stack_t *, NvU32 domain, NvU8 bus, NvU8 device, NvU8 function, NvU16 devId, NvU16 subsystemId);
|
||||
NV_STATUS NV_API_CALL rm_pmu_perfmon_get_load (nvidia_stack_t *, nv_state_t *, NvU32 *, TEGRASOC_DEVFREQ_CLK);
|
||||
NV_STATUS NV_API_CALL rm_power_management (nvidia_stack_t *, nv_state_t *, nv_pm_action_t);
|
||||
NV_STATUS NV_API_CALL rm_stop_user_channels (nvidia_stack_t *, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_restart_user_channels (nvidia_stack_t *, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_save_low_res_mode (nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_get_vbios_version (nvidia_stack_t *, nv_state_t *, char *);
|
||||
char* NV_API_CALL rm_get_gpu_uuid (nvidia_stack_t *, nv_state_t *);
|
||||
const NvU8* NV_API_CALL rm_get_gpu_uuid_raw (nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_set_rm_firmware_requested(nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_get_firmware_version (nvidia_stack_t *, nv_state_t *, char *, NvLength);
|
||||
void NV_API_CALL rm_cleanup_file_private (nvidia_stack_t *, nv_state_t *, nv_file_private_t *);
|
||||
void NV_API_CALL rm_unbind_lock (nvidia_stack_t *, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_read_registry_dword (nvidia_stack_t *, nv_state_t *, const char *, NvU32 *);
|
||||
|
||||
@@ -33,6 +33,12 @@ typedef NvU32 MIGDeviceId;
|
||||
|
||||
#define NO_MIG_DEVICE 0L
|
||||
|
||||
/* Convert a MIGDeviceId into a 0-based per-GPU subdevice index. */
|
||||
#define MIG_DEVICE_ID_SUBDEV_MASK 0xf0000000
|
||||
#define MIG_DEVICE_ID_SUBDEV_SHIFT 28
|
||||
|
||||
#define MIG_DEVICE_ID_TO_SUBDEV(migDeviceId) (((migDeviceId) & MIG_DEVICE_ID_SUBDEV_MASK) >> MIG_DEVICE_ID_SUBDEV_SHIFT)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -29,7 +29,7 @@
|
||||
#define _NV_UVM_INTERFACE_H_
|
||||
|
||||
// Forward references, to break circular header file dependencies:
|
||||
struct UvmOpsUvmEvents;
|
||||
struct UvmEventsLinux;
|
||||
|
||||
#if defined(NVIDIA_UVM_ENABLED)
|
||||
|
||||
@@ -1008,6 +1008,65 @@ NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(UvmGpuFaultInfo *pFaultInfo,
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
NvBool bEnable);
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceAccessBitsBufAlloc
|
||||
|
||||
This function allocates a buffer for access bits.
|
||||
|
||||
Arguments:
|
||||
device[IN] - Device handle associated with the gpu
|
||||
pAccessBitsInfo[OUT] - Information provided by RM for access bits handling
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - If the parameter/s is invalid.
|
||||
NV_ERR_NO_MEMORY - If the memory allocation fails.
|
||||
NV_ERR_GENERIC - Unexpected error. We try hard to
|
||||
avoid returning this error code,
|
||||
because it is not very informative.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceAccessBitsBufAlloc(uvmGpuDeviceHandle device,
|
||||
UvmGpuAccessBitsBufferAlloc* pAccessBitsInfo);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceAccessBitsBufFree
|
||||
|
||||
This function frees the buffer used for access bits.
|
||||
|
||||
Arguments:
|
||||
device[IN] - Device handle associated with the gpu
|
||||
pAccessBitsInfo[IN] - Information containing the access bits buffer handle to be freed
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - If the parameter/s is invalid.
|
||||
NV_ERR_GENERIC - Unexpected error. We try hard to
|
||||
avoid returning this error code,
|
||||
because it is not very informative.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceAccessBitsBufFree(uvmGpuDeviceHandle device,
|
||||
UvmGpuAccessBitsBufferAlloc* pAccessBitsInfo);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceAccessBitsDump
|
||||
|
||||
This function get the access bits information in accordance with the mode
|
||||
requested and stores it in the buffer provided by the client.
|
||||
|
||||
Arguments:
|
||||
device[IN] - Device handle associated with the gpu
|
||||
pAccessBitsInfo[IN/OUT] - Information containing the access bits buffer
|
||||
handle to be used for dumping the access bits
|
||||
and the buffer where the dumped data will be stored
|
||||
mode[IN] - Mode in which the access bits are dumped
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - If the parameter/s is invalid.
|
||||
NV_ERR_GENERIC - Unexpected error. We try hard to
|
||||
avoid returning this error code,
|
||||
because it is not very informative.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceAccessBitsDump(uvmGpuDeviceHandle device,
|
||||
UvmGpuAccessBitsBufferAlloc* pAccessBitsInfo,
|
||||
UVM_ACCESS_BITS_DUMP_MODE mode);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceInitAccessCntrInfo
|
||||
@@ -1086,20 +1145,20 @@ NV_STATUS nvUvmInterfaceDisableAccessCntr(uvmGpuDeviceHandle device,
|
||||
UvmGpuAccessCntrInfo *pAccessCntrInfo);
|
||||
|
||||
//
|
||||
// Called by the UVM driver to register operations with RM. Only one set of
|
||||
// Called by the UVM driver to register event callbacks with RM. Only one set of
|
||||
// callbacks can be registered by any driver at a time. If another set of
|
||||
// callbacks was already registered, NV_ERR_IN_USE is returned.
|
||||
//
|
||||
NV_STATUS nvUvmInterfaceRegisterUvmCallbacks(struct UvmOpsUvmEvents *importedUvmOps);
|
||||
NV_STATUS nvUvmInterfaceRegisterUvmEvents(struct UvmEventsLinux *importedEvents);
|
||||
|
||||
//
|
||||
// Counterpart to nvUvmInterfaceRegisterUvmCallbacks. This must only be called
|
||||
// if nvUvmInterfaceRegisterUvmCallbacks returned NV_OK.
|
||||
// Counterpart to nvUvmInterfaceRegisterUvmEvents. This must only be called if
|
||||
// nvUvmInterfaceRegisterUvmEvents returned NV_OK.
|
||||
//
|
||||
// Upon return, the caller is guaranteed that any outstanding callbacks are done
|
||||
// and no new ones will be invoked.
|
||||
//
|
||||
void nvUvmInterfaceDeRegisterUvmOps(void);
|
||||
void nvUvmInterfaceDeRegisterUvmEvents(void);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceGetNvlinkInfo
|
||||
|
||||
@@ -221,9 +221,11 @@ typedef struct UvmGpuChannelInstanceInfo_tag
|
||||
// Ampere+ GPUs
|
||||
volatile NvU32 *pChramChannelRegister;
|
||||
|
||||
// Out: Address of the Runlist PRI Base Register required to ring the
|
||||
// doorbell after clearing the faulted bit.
|
||||
volatile NvU32 *pRunlistPRIBaseRegister;
|
||||
// Out: Address of the doorbell.
|
||||
volatile NvU32 *workSubmissionOffset;
|
||||
|
||||
// Out: channel handle required to ring the doorbell.
|
||||
NvU32 workSubmissionToken;
|
||||
|
||||
// Out: SMC engine id to which the GR channel is bound, or zero if the GPU
|
||||
// does not support SMC or it is a CE channel
|
||||
@@ -365,6 +367,9 @@ typedef struct
|
||||
// True if the CE supports encryption
|
||||
NvBool secure:1;
|
||||
|
||||
// True if the CE can be used for fast scrub
|
||||
NvBool scrub:1;
|
||||
|
||||
// Mask of physical CEs assigned to this LCE
|
||||
//
|
||||
// The value returned by RM for this field may change when a GPU is
|
||||
@@ -383,6 +388,7 @@ typedef enum
|
||||
{
|
||||
UVM_LINK_TYPE_NONE,
|
||||
UVM_LINK_TYPE_PCIE,
|
||||
UVM_LINK_TYPE_PCIE_BAR1,
|
||||
UVM_LINK_TYPE_NVLINK_1,
|
||||
UVM_LINK_TYPE_NVLINK_2,
|
||||
UVM_LINK_TYPE_NVLINK_3,
|
||||
@@ -539,6 +545,12 @@ typedef struct UvmGpuP2PCapsParams_tag
|
||||
// Size is 0 if bar1 p2p is not supported.
|
||||
NvU64 bar1DmaAddress[2];
|
||||
NvU64 bar1DmaSize[2];
|
||||
|
||||
// True if GPU i can use PCIe atomics on locations in GPU[i-1]
|
||||
// BAR1. This implies that GPU[i] can issue PCIe atomics,
|
||||
// GPU[i-1] can accept PCIe atomics, and the bus interconnect
|
||||
// between the two GPUs can correctly route PCIe atomics.
|
||||
NvBool bar1PcieAtomics[2];
|
||||
} UvmGpuP2PCapsParams;
|
||||
|
||||
// Platform-wide information
|
||||
@@ -830,11 +842,7 @@ typedef NV_STATUS (*uvmEventServiceInterrupt_t) (void *pDeviceObject,
|
||||
NV_OK if the UVM driver handled the interrupt
|
||||
NV_ERR_NO_INTR_PENDING if the interrupt is not for the UVM driver
|
||||
*/
|
||||
#if defined (__linux__)
|
||||
typedef NV_STATUS (*uvmEventIsrTopHalf_t) (const NvProcessorUuid *pGpuUuidStruct);
|
||||
#else
|
||||
typedef void (*uvmEventIsrTopHalf_t) (void);
|
||||
#endif
|
||||
|
||||
/*******************************************************************************
|
||||
uvmEventDrainP2P
|
||||
@@ -871,20 +879,24 @@ typedef NV_STATUS (*uvmEventDrainP2P_t) (const NvProcessorUuid *pGpuUuidStruct);
|
||||
*/
|
||||
typedef NV_STATUS (*uvmEventResumeP2P_t) (const NvProcessorUuid *pGpuUuidStruct);
|
||||
|
||||
struct UvmOpsUvmEvents
|
||||
struct UvmEventsLinux
|
||||
{
|
||||
uvmEventIsrTopHalf_t isrTopHalf;
|
||||
uvmEventSuspend_t suspend;
|
||||
uvmEventResume_t resume;
|
||||
uvmEventDrainP2P_t drainP2P;
|
||||
uvmEventResumeP2P_t resumeP2P;
|
||||
};
|
||||
|
||||
struct UvmEventsWindows
|
||||
{
|
||||
uvmEventSuspend_t suspend;
|
||||
uvmEventResume_t resume;
|
||||
uvmEventStartDevice_t startDevice;
|
||||
uvmEventStopDevice_t stopDevice;
|
||||
uvmEventIsrTopHalf_t isrTopHalf;
|
||||
uvmEventStopDevice_t stopDevice;
|
||||
#if defined (_WIN32)
|
||||
uvmEventWddmResetDuringTimeout_t wddmResetDuringTimeout;
|
||||
uvmEventWddmRestartAfterTimeout_t wddmRestartAfterTimeout;
|
||||
uvmEventServiceInterrupt_t serviceInterrupt;
|
||||
#endif
|
||||
uvmEventDrainP2P_t drainP2P;
|
||||
uvmEventResumeP2P_t resumeP2P;
|
||||
};
|
||||
|
||||
#define UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES 32
|
||||
@@ -1043,6 +1055,22 @@ typedef struct UvmGpuAccessCntrConfig_tag
|
||||
NvU32 threshold;
|
||||
} UvmGpuAccessCntrConfig;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_ACCESS_BITS_DUMP_MODE_AGGREGATE = 0,
|
||||
UVM_ACCESS_BITS_DUMP_MODE_DIFF = 1,
|
||||
UVM_ACCESS_BITS_DUMP_MODE_CURRENT = 2,
|
||||
} UVM_ACCESS_BITS_DUMP_MODE;
|
||||
|
||||
typedef struct UvmGpuAccessBitsBufferAlloc_tag
|
||||
{
|
||||
NvHandle accessBitsBufferHandle;
|
||||
NvBool bDirtyBits;
|
||||
NvU32 granularity;
|
||||
NV_DECLARE_ALIGNED(NvU64 enabledMask[64], 8);
|
||||
NV_DECLARE_ALIGNED(NvU64 currentBits[64], 8);
|
||||
} UvmGpuAccessBitsBufferAlloc;
|
||||
|
||||
//
|
||||
// When modifying this enum, make sure they are compatible with the mirrored
|
||||
// MEMORY_PROTECTION enum in phys_mem_allocator.h.
|
||||
@@ -1080,6 +1108,7 @@ typedef UvmGpuPagingChannel *gpuPagingChannelHandle;
|
||||
typedef UvmGpuPagingChannelInfo gpuPagingChannelInfo;
|
||||
typedef UvmGpuPagingChannelAllocParams gpuPagingChannelAllocParams;
|
||||
typedef UvmPmaAllocationOptions gpuPmaAllocationOptions;
|
||||
typedef UvmGpuAccessBitsBufferAlloc gpuAccessBitsBufferAlloc;
|
||||
|
||||
typedef struct UvmCslIv
|
||||
{
|
||||
|
||||
@@ -142,6 +142,8 @@ typedef enum {
|
||||
UvmGpuCompressionTypeCount = 2
|
||||
} UvmGpuCompressionType;
|
||||
|
||||
#define UVM_PMA_MAX_LOCALIZED_REGION_COUNT 2
|
||||
|
||||
//
|
||||
// Mirrored in PMA (PMA_STATS)
|
||||
//
|
||||
@@ -153,6 +155,9 @@ typedef struct UvmPmaStatistics_tag
|
||||
volatile NvU64 numPages2mProtected; // PMA-wide 2MB pages count in protected memory
|
||||
volatile NvU64 numFreePages64kProtected; // PMA-wide free 64KB page count in protected memory
|
||||
volatile NvU64 numFreePages2mProtected; // PMA-wide free 2MB pages count in protected memory
|
||||
volatile NvU64 numPages2mLocalizable[UVM_PMA_MAX_LOCALIZED_REGION_COUNT]; // Localizable free 64KB per-uGPU frame count
|
||||
volatile NvU64 numFreePages64kLocalizable[UVM_PMA_MAX_LOCALIZED_REGION_COUNT]; // Localizable free 64KB per-uGPU frame count
|
||||
volatile NvU64 numFreePages2mLocalizable[UVM_PMA_MAX_LOCALIZED_REGION_COUNT]; // Localizable free 64KB per-uGPU frame count
|
||||
} UvmPmaStatistics;
|
||||
|
||||
typedef enum
|
||||
|
||||
@@ -174,10 +174,7 @@ struct NvKmsKapiDeviceResourcesInfo {
|
||||
|
||||
NvBool supportsSyncpts;
|
||||
|
||||
NvBool requiresVrrSemaphores;
|
||||
|
||||
NvBool supportsInputColorRange;
|
||||
NvBool supportsInputColorSpace;
|
||||
NvBool contiguousPhysicalMappings;
|
||||
} caps;
|
||||
|
||||
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
|
||||
@@ -451,7 +448,6 @@ struct NvKmsKapiHeadReplyConfig {
|
||||
struct NvKmsKapiModeSetReplyConfig {
|
||||
enum NvKmsFlipResult flipResult;
|
||||
NvBool vrrFlip;
|
||||
NvS32 vrrSemaphoreIndex;
|
||||
struct NvKmsKapiHeadReplyConfig
|
||||
headReplyConfig[NVKMS_KAPI_MAX_HEADS];
|
||||
};
|
||||
@@ -1550,22 +1546,6 @@ struct NvKmsKapiFunctionsTable {
|
||||
NvU32 semaphoreIndex
|
||||
);
|
||||
|
||||
/*!
|
||||
* Signal the VRR semaphore at the specified index from the CPU.
|
||||
* If device does not support VRR semaphores, this is a no-op.
|
||||
* Returns true if signal is success or no-op, otherwise returns false.
|
||||
*
|
||||
* \param [in] device A device allocated using allocateDevice().
|
||||
*
|
||||
* \param [in] index The VRR semaphore index to be signalled.
|
||||
*/
|
||||
NvBool
|
||||
(*signalVrrSemaphore)
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvS32 index
|
||||
);
|
||||
|
||||
/*!
|
||||
* Check or wait on a head's LUT notifier.
|
||||
*
|
||||
|
||||
@@ -213,6 +213,7 @@ int NV_API_CALL os_nv_cap_validate_and_dup_fd (const nv_cap_t *, int
|
||||
void NV_API_CALL os_nv_cap_close_fd (int);
|
||||
NvS32 NV_API_CALL os_imex_channel_get (NvU64);
|
||||
NvS32 NV_API_CALL os_imex_channel_count (void);
|
||||
NV_STATUS NV_API_CALL os_tegra_igpu_perf_boost (void *, NvBool, NvU32);
|
||||
|
||||
NV_STATUS NV_API_CALL os_get_tegra_platform (NvU32 *);
|
||||
enum os_pci_req_atomics_type {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -83,6 +83,9 @@ NV_STATUS NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvg
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_own_access_cntr_intr(nvidia_stack_t *, nvgpuSessionHandle_t, nvgpuAccessCntrInfo_t, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, const nvgpuAccessCntrConfig_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_disable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_access_bits_buffer_alloc(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessBitBufferAlloc_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_access_bits_buffer_free(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessBitBufferAlloc_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_access_bits_dump(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessBitBufferAlloc_t, UVM_ACCESS_BITS_DUMP_MODE);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32, NvU64 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_unset_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_nvlink_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuNvlinkInfo_t);
|
||||
|
||||
@@ -669,50 +669,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_IOREMAP_WC_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
ioremap_driver_hardened)
|
||||
#
|
||||
# Determine if the ioremap_driver_hardened() function is present.
|
||||
# It does not exist on all architectures.
|
||||
# TODO: Update the commit ID once the API is upstreamed.
|
||||
#
|
||||
CODE="
|
||||
#include <asm/io.h>
|
||||
void conftest_ioremap_driver_hardened(void) {
|
||||
ioremap_driver_hardened();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_IOREMAP_DRIVER_HARDENED_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
ioremap_driver_hardened_wc)
|
||||
#
|
||||
# Determine if the ioremap_driver_hardened_wc() function is present.
|
||||
# It does not exist on all architectures.
|
||||
# TODO: Update the commit ID once the API is upstreamed.
|
||||
#
|
||||
CODE="
|
||||
#include <asm/io.h>
|
||||
void conftest_ioremap_driver_hardened_wc(void) {
|
||||
ioremap_driver_hardened_wc();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
ioremap_cache_shared)
|
||||
#
|
||||
# Determine if the ioremap_cache_shared() function is present.
|
||||
# It does not exist on all architectures.
|
||||
# TODO: Update the commit ID once the API is upstreamed.
|
||||
#
|
||||
CODE="
|
||||
#include <asm/io.h>
|
||||
void conftest_ioremap_cache_shared(void) {
|
||||
ioremap_cache_shared();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_IOREMAP_CACHE_SHARED_PRESENT" "" "functions"
|
||||
;;
|
||||
dom0_kernel_present)
|
||||
# Add config parameter if running on DOM0.
|
||||
if [ -n "$VGX_BUILD" ]; then
|
||||
@@ -1330,33 +1286,54 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_EVENTFD_SIGNAL_HAS_COUNTER_ARG" "" "types"
|
||||
;;
|
||||
|
||||
drm_available)
|
||||
# Determine if the DRM subsystem is usable
|
||||
get_dev_pagemap_has_pgmap_arg)
|
||||
#
|
||||
# Determine if the get_dev_pagemap() function has an additional
|
||||
# 'pgmap' argument.
|
||||
#
|
||||
# This argument was removed by commit dd57f5feb19a
|
||||
# (mm/memremap: remove unused get_dev_pagemap() parameter)
|
||||
# in linux-next, expected in v6.18.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
#include <linux/memremap.h>
|
||||
|
||||
#include <drm/drm_drv.h>
|
||||
struct dev_pagemap *get_dev_pagemap_has_pgmap_arg(void) {
|
||||
struct dev_pagemap *pgmap;
|
||||
|
||||
#if !defined(CONFIG_DRM) && !defined(CONFIG_DRM_MODULE) && !defined(__FreeBSD__)
|
||||
#error DRM not enabled
|
||||
#endif
|
||||
|
||||
void conftest_drm_available(void) {
|
||||
struct drm_driver drv;
|
||||
|
||||
/* 2013-10-02 1bb72532ac260a2d3982b40bdd4c936d779d0d16 */
|
||||
(void)drm_dev_alloc;
|
||||
|
||||
/* 2013-10-02 c22f0ace1926da399d9a16dfaf09174c1b03594c */
|
||||
(void)drm_dev_register;
|
||||
|
||||
/* 2013-10-02 c3a49737ef7db0bdd4fcf6cf0b7140a883e32b2a */
|
||||
(void)drm_dev_unregister;
|
||||
get_dev_pagemap(0, pgmap);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_AVAILABLE" "" "generic"
|
||||
compile_check_conftest "$CODE" "NV_GET_DEV_PAGEMAP_HAS_PGMAP_ARG" "" "types"
|
||||
;;
|
||||
|
||||
drm_sysfs_connector_property_event)
|
||||
#
|
||||
# Determine if drm_sysfs_connector_property_event() is present.
|
||||
#
|
||||
# Commit 0cf8d292ba5e ("drm/sysfs: rename drm_sysfs_connector_status_event()")
|
||||
# renamed drm_sysfs_connector_status_event() to
|
||||
# drm_sysfs_connector_property_event() in Linux v6.5.
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_sysfs.h>
|
||||
void conftest_drm_sysfs_connector_property_event(void) {
|
||||
drm_sysfs_connector_property_event();
|
||||
}"
|
||||
compile_check_conftest "$CODE" "NV_DRM_SYSFS_CONNECTOR_PROPERTY_EVENT_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_sysfs_connector_status_event)
|
||||
#
|
||||
# Determine if drm_sysfs_connector_status_event() is present.
|
||||
#
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_sysfs.h>
|
||||
void conftest_drm_sysfs_connector_status_event(void) {
|
||||
drm_sysfs_connector_status_event();
|
||||
}"
|
||||
compile_check_conftest "$CODE" "NV_DRM_SYSFS_CONNECTOR_STATUS_EVENT_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
pde_data)
|
||||
@@ -1437,71 +1414,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_VMF_INSERT_PFN_PROT_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_atomic_available)
|
||||
#
|
||||
# Determine if the DRM atomic modesetting subsystem is usable
|
||||
#
|
||||
# Added by commit 036ef5733ba4
|
||||
# ("drm/atomic: Allow drivers to subclass drm_atomic_state, v3") in
|
||||
# v4.2 (2018-05-18).
|
||||
#
|
||||
# Make conftest more robust by adding test for
|
||||
# drm_atomic_set_mode_prop_for_crtc(), this function added by
|
||||
# commit 955f3c334f0f ("drm/atomic: Add MODE_ID property") in v4.2
|
||||
# (2015-05-25). If the DRM atomic modesetting subsystem is
|
||||
# back ported to Linux kernel older than v4.2, then commit
|
||||
# 955f3c334f0f must be back ported in order to get NVIDIA-DRM KMS
|
||||
# support.
|
||||
# Commit 72fdb40c1a4b ("drm: extract drm_atomic_uapi.c") in v4.20
|
||||
# (2018-09-05), moved drm_atomic_set_mode_prop_for_crtc() function
|
||||
# prototype from drm/drm_atomic.h to drm/drm_atomic_uapi.h.
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
#include <drm/drm_atomic.h>
|
||||
#if !defined(CONFIG_DRM) && !defined(CONFIG_DRM_MODULE) && !defined(__FreeBSD__)
|
||||
#error DRM not enabled
|
||||
#endif
|
||||
void conftest_drm_atomic_modeset_available(void) {
|
||||
size_t a;
|
||||
|
||||
a = offsetof(struct drm_mode_config_funcs, atomic_state_alloc);
|
||||
}" > conftest$$.c;
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
#include <drm/drm_atomic.h>
|
||||
#if defined(NV_DRM_DRM_ATOMIC_UAPI_H_PRESENT)
|
||||
#include <drm/drm_atomic_uapi.h>
|
||||
#endif
|
||||
void conftest_drm_atomic_set_mode_prop_for_crtc(void) {
|
||||
drm_atomic_set_mode_prop_for_crtc();
|
||||
}" > conftest$$.c;
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#undef NV_DRM_ATOMIC_MODESET_AVAILABLE" | append_conftest "generic"
|
||||
else
|
||||
echo "#define NV_DRM_ATOMIC_MODESET_AVAILABLE" | append_conftest "generic"
|
||||
fi
|
||||
else
|
||||
echo "#undef NV_DRM_ATOMIC_MODESET_AVAILABLE" | append_conftest "generic"
|
||||
fi
|
||||
;;
|
||||
|
||||
drm_driver_has_legacy_dev_list)
|
||||
#
|
||||
# Determine if the 'drm_driver' structure has a 'legacy_dev_list' field.
|
||||
@@ -2202,6 +2114,35 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_GET_BACKLIGHT_DEVICE_BY_NAME_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
dma_map_ops_has_map_phys)
|
||||
#
|
||||
# Determine if .map_phys exists in struct dma_map_ops.
|
||||
#
|
||||
# Commit 14cb413af00c ("dma-mapping: remove unused mapping resource callbacks")
|
||||
# removed .map_resource operation and replaced it with .map_phys.
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/dma-map-ops.h>
|
||||
int conftest_dma_map_ops_has_map_phys(void) {
|
||||
return offsetof(struct dma_map_ops, map_phys);
|
||||
}
|
||||
int conftest_dma_map_ops_has_unmap_phys(void) {
|
||||
return offsetof(struct dma_map_ops, unmap_phys);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
echo "#define NV_DMA_MAP_OPS_HAS_MAP_PHYS" | append_conftest "types"
|
||||
rm -f conftest$$.o
|
||||
return
|
||||
else
|
||||
echo "#undef NV_DMA_MAP_OPS_HAS_MAP_PHYS" | append_conftest "types"
|
||||
return
|
||||
fi
|
||||
;;
|
||||
|
||||
dma_buf_ops_has_map)
|
||||
#
|
||||
# Determine if .map exists in dma_buf_ops.
|
||||
@@ -2330,6 +2271,7 @@ compile_test() {
|
||||
# drm_helper_mode_fill_fb_struct()") in linux-next
|
||||
# (2025-07-16)
|
||||
CODE="
|
||||
#include <linux/stddef.h>
|
||||
#include <drm/drm_modeset_helper.h>
|
||||
|
||||
void conftest_drm_fill_fb_struct_takes_format_info(void) {
|
||||
@@ -2444,6 +2386,23 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PM_RUNTIME_AVAILABLE" "" "generic"
|
||||
;;
|
||||
|
||||
pm_domain_available)
|
||||
#
|
||||
# Determine whether dev_pm_genpd_suspend() exists.
|
||||
#
|
||||
# This was added to the kernel in commit fc51989062138
|
||||
# ("PM: domains: Rename pm_genpd_syscore_poweroff|poweron()")
|
||||
# in v5.11-rc1 (2020-11-10),
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pm_domain.h>
|
||||
void pm_domain_conftest(void) {
|
||||
dev_pm_genpd_suspend();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PM_DOMAIN_AVAILABLE" "" "functions"
|
||||
;;
|
||||
|
||||
dma_direct_map_resource)
|
||||
#
|
||||
# Determine whether dma_is_direct() exists.
|
||||
@@ -2617,31 +2576,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS" "" "types"
|
||||
;;
|
||||
|
||||
drm_format_num_planes)
|
||||
#
|
||||
# Determine if drm_format_num_planes() function is present.
|
||||
#
|
||||
# The drm_format_num_planes() function was added by commit
|
||||
# d0d110e09629 drm: Add drm_format_num_planes() utility function in
|
||||
# v3.3 (2011-12-20). Prototype was moved from drm_crtc.h to
|
||||
# drm_fourcc.h by commit ae4df11a0f53 (drm: Move format-related
|
||||
# helpers to drm_fourcc.c) in v4.8 (2016-06-09).
|
||||
# drm_format_num_planes() has been removed by commit 05c452c115bf
|
||||
# (drm: Remove users of drm_format_num_planes) removed v5.3
|
||||
# (2019-05-16).
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_crtc.h>
|
||||
#include <drm/drm_fourcc.h>
|
||||
|
||||
void conftest_drm_format_num_planes(void) {
|
||||
drm_format_num_planes();
|
||||
}
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_FORMAT_NUM_PLANES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_gem_object_has_resv)
|
||||
#
|
||||
# Determine if the 'drm_gem_object' structure has a 'resv' field.
|
||||
@@ -3712,6 +3646,90 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DEVM_CLK_BULK_GET_ALL_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
thermal_zone_for_each_trip)
|
||||
#
|
||||
# Determine if thermal_zone_for_each_trip() function is present
|
||||
#
|
||||
# Added by commit a56cc0a83385 ("thermal: core: Add function to
|
||||
# walk trips under zone lock") in v6.6-rc3
|
||||
#
|
||||
CODE="
|
||||
#include <linux/thermal.h>
|
||||
void conftest_thermal_zone_for_each_trip(void)
|
||||
{
|
||||
thermal_zone_for_each_trip();
|
||||
}
|
||||
"
|
||||
compile_check_conftest "$CODE" "NV_THERMAL_ZONE_FOR_EACH_TRIP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
thermal_bind_cdev_to_trip)
|
||||
#
|
||||
# Determine if thermal_bind_cdev_to_trip() function is present
|
||||
#
|
||||
# Added by commit d069ed6b752f ("thermal: core: Allow trip
|
||||
# pointers to be used for cooling device binding") in v6.6-rc3
|
||||
#
|
||||
CODE="
|
||||
#include <linux/thermal.h>
|
||||
void conftest_thermal_bind_cdev_to_trip(void)
|
||||
{
|
||||
thermal_bind_cdev_to_trip();
|
||||
}
|
||||
"
|
||||
compile_check_conftest "$CODE" "NV_THERMAL_BIND_CDEV_TO_TRIP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
thermal_unbind_cdev_from_trip)
|
||||
#
|
||||
# Determine if thermal_unbind_cdev_from_trip() function is present
|
||||
#
|
||||
# Added by commit d069ed6b752f ("thermal: core: Allow trip
|
||||
# pointers to be used for cooling device binding") in v6.6-rc3
|
||||
#
|
||||
CODE="
|
||||
#include <linux/thermal.h>
|
||||
void conftest_thermal_unbind_cdev_from_trip(void)
|
||||
{
|
||||
thermal_unbind_cdev_from_trip();
|
||||
}
|
||||
"
|
||||
compile_check_conftest "$CODE" "NV_THERMAL_UNBIND_CDEV_FROM_TRIP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
update_devfreq)
|
||||
#
|
||||
# Determine if update_devfreq() function is present
|
||||
#
|
||||
# Added by commit b596d895fa29 ("PM / devfreq: Make update_devfreq()
|
||||
# public") in v4.20
|
||||
#
|
||||
CODE="
|
||||
#include <linux/devfreq.h>
|
||||
void conftest_update_devfreq(void)
|
||||
{
|
||||
update_devfreq();
|
||||
}
|
||||
"
|
||||
compile_check_conftest "$CODE" "NV_UPDATE_DEVFREQ_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
devfreq_dev_profile_has_is_cooling_device)
|
||||
#
|
||||
# Determine if the 'devfreq_dev_profile' structure has 'is_cooling_device'
|
||||
#
|
||||
# Added by commit 1224451bb6f93 ("PM / devfreq: Register devfreq as a cooling device
|
||||
# on demand") in v5.12-rc1
|
||||
#
|
||||
CODE="
|
||||
#include <linux/devfreq.h>
|
||||
int conftest_devfreq_dev_profile_has_is_cooling_device(void) {
|
||||
return offsetof(struct devfreq_dev_profile, is_cooling_device);
|
||||
}
|
||||
"
|
||||
compile_check_conftest "$CODE" "NV_DEVFREQ_DEV_PROFILE_HAS_IS_COOLING_DEVICE" "" "types"
|
||||
;;
|
||||
|
||||
devfreq_has_freq_table)
|
||||
#
|
||||
# Determine if the 'devfreq' structure has 'freq_table'
|
||||
@@ -3729,6 +3747,38 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DEVFREQ_HAS_FREQ_TABLE" "" "types"
|
||||
;;
|
||||
|
||||
devfreq_has_suspend_freq)
|
||||
#
|
||||
# Determine if the 'devfreq' structure has 'suspend_freq'
|
||||
#
|
||||
# Commit 83f8ca45afbf ("PM / devfreq: add support for
|
||||
# suspend/resume of a devfreq device") updated the devfreq
|
||||
# and add the suspend_freq field in v5.0.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/devfreq.h>
|
||||
int conftest_devfreq_has_suspend_freq(void) {
|
||||
return offsetof(struct devfreq, suspend_freq);
|
||||
}
|
||||
"
|
||||
compile_check_conftest "$CODE" "NV_DEVFREQ_HAS_SUSPEND_FREQ" "" "types"
|
||||
;;
|
||||
|
||||
bpmp_mrq_has_strap_set)
|
||||
#
|
||||
# Determine if STRAP_SET is present in the bpmp MRQ ABI.
|
||||
#
|
||||
# STRAP_SET was added by commit 4bef358c9071 ("soc/tegra:
|
||||
#bpmp: Update ABI header") in v5.0.
|
||||
#
|
||||
CODE="
|
||||
#include <stdint.h>
|
||||
#include <soc/tegra/bpmp-abi.h>
|
||||
int bpmp_mrq_has_strap = STRAP_SET;
|
||||
"
|
||||
compile_check_conftest "$CODE" "NV_BPMP_MRQ_HAS_STRAP_SET" "" "types"
|
||||
;;
|
||||
|
||||
dma_resv_add_fence)
|
||||
#
|
||||
# Determine if the dma_resv_add_fence() function is present.
|
||||
@@ -3917,6 +3967,27 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PCI_REBAR_GET_POSSIBLE_SIZES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
pci_resize_resource_has_exclude_bars_arg)
|
||||
#
|
||||
# Determine if pci_resize_resource() has exclude_bars argument.
|
||||
#
|
||||
# exclude_bars argument was added to pci_resize_resource by commit
|
||||
# 337b1b566db0 (11/14/2025) ("PCI: Fix restoring BARs on BAR resize rollback path")
|
||||
# in linux-next.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
|
||||
typeof(pci_resize_resource) conftest_pci_resize_resource_has_exclude_bars_arg;
|
||||
int __must_check conftest_pci_resize_resource_has_exclude_bars_arg(struct pci_dev *dev,
|
||||
int i, int size,
|
||||
int exclude_bars) {
|
||||
return 0;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PCI_RESIZE_RESOURCE_HAS_EXCLUDE_BARS_ARG" "" "types"
|
||||
;;
|
||||
|
||||
drm_connector_has_override_edid)
|
||||
#
|
||||
# Determine if 'struct drm_connector' has an 'override_edid' member.
|
||||
@@ -3955,22 +4026,39 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_IOMMU_SVA_BIND_DEVICE_HAS_DRVDATA_ARG" "" "types"
|
||||
;;
|
||||
|
||||
vm_area_struct_has_const_vm_flags)
|
||||
vm_flags_set)
|
||||
#
|
||||
# Determine if the 'vm_area_struct' structure has
|
||||
# const 'vm_flags'.
|
||||
# Determine if the vm_flags_set() function is present. The
|
||||
# presence of this function indicates that the vm_flags_clear()
|
||||
# function is also present.
|
||||
#
|
||||
# A union of '__vm_flags' and 'const vm_flags' was added by
|
||||
# The functions vm_flags_set()/ vm_flags_clear() were added by
|
||||
# commit bc292ab00f6c ("mm: introduce vma->vm_flags wrapper
|
||||
# functions") in v6.3.
|
||||
# functions") in v6.3-rc1 (2023-02-09).
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm_types.h>
|
||||
int conftest_vm_area_struct_has_const_vm_flags(void) {
|
||||
return offsetof(struct vm_area_struct, __vm_flags);
|
||||
#include <linux/mm.h>
|
||||
void conftest_vm_flags_set(void) {
|
||||
vm_flags_set();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS" "" "types"
|
||||
compile_check_conftest "$CODE" "NV_VM_FLAGS_SET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
vma_flags_set_word)
|
||||
#
|
||||
# Determine if the vma_flags_set_word() function is present.
|
||||
#
|
||||
# Added by commit c3f7c506e8f1 ("mm: introduce VMA flags bitmap type")
|
||||
# in v6.19-rc1.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
void conftest_vma_flags_set_word(void) {
|
||||
vma_flags_set_word();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_VMA_FLAGS_SET_WORD_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_driver_has_dumb_destroy)
|
||||
@@ -4856,6 +4944,46 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG" "" "types"
|
||||
;;
|
||||
|
||||
register_shrinker_has_format_arg)
|
||||
# TODO:desc
|
||||
# Determine if the 'mode' pointer argument is const in
|
||||
# drm_connector_helper_funcs::mode_valid.
|
||||
#
|
||||
# The 'mode' pointer argument in
|
||||
# drm_connector_helper_funcs::mode_valid was made const by commit
|
||||
# 26d6fd81916e ("drm/connector: make mode_valid take a const struct
|
||||
# drm_display_mode") in linux-next, expected in v6.15.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
|
||||
void conftest_register_shrinker_has_format_arg(void) {
|
||||
register_shrinker(NULL, \"%d\", 0);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_REGISTER_SHRINKER_HAS_FMT_ARG" "" "types"
|
||||
;;
|
||||
|
||||
shrinker_alloc)
|
||||
# TODO:desc
|
||||
# Determine if the 'mode' pointer argument is const in
|
||||
# drm_connector_helper_funcs::mode_valid.
|
||||
#
|
||||
# The 'mode' pointer argument in
|
||||
# drm_connector_helper_funcs::mode_valid was made const by commit
|
||||
# 26d6fd81916e ("drm/connector: make mode_valid take a const struct
|
||||
# drm_display_mode") in linux-next, expected in v6.15.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
|
||||
void conftest_shrinker_alloc(void) {
|
||||
shrinker_alloc();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_SHRINKER_ALLOC_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
memory_device_coherent_present)
|
||||
#
|
||||
# Determine if MEMORY_DEVICE_COHERENT support is present or not
|
||||
|
||||
@@ -2,6 +2,9 @@
|
||||
# corresponding #define will be generated in conftest/headers.h.
|
||||
NV_HEADER_PRESENCE_TESTS = \
|
||||
asm/system.h \
|
||||
drm/drm_hdcp.h \
|
||||
drm/display/drm_hdcp.h \
|
||||
drm/display/drm_hdcp_helper.h \
|
||||
drm/drmP.h \
|
||||
drm/drm_aperture.h \
|
||||
drm/drm_atomic_state_helper.h \
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
*/
|
||||
|
||||
#include "nv-kthread-q.h"
|
||||
#include "nv-list-helpers.h"
|
||||
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/interrupt.h>
|
||||
@@ -43,17 +42,6 @@
|
||||
// into the queue, and those functions will be run in the context of the
|
||||
// queue's kthread.
|
||||
|
||||
#ifndef WARN
|
||||
// Only *really* old kernels (2.6.9) end up here. Just use a simple printk
|
||||
// to implement this, because such kernels won't be supported much longer.
|
||||
#define WARN(condition, format...) ({ \
|
||||
int __ret_warn_on = !!(condition); \
|
||||
if (unlikely(__ret_warn_on)) \
|
||||
printk(KERN_ERR format); \
|
||||
unlikely(__ret_warn_on); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#define NVQ_WARN(fmt, ...) \
|
||||
do { \
|
||||
if (in_interrupt()) { \
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -20,8 +20,8 @@
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _UAPI_NVIDIA_DRM_IOCTL_H_
|
||||
#define _UAPI_NVIDIA_DRM_IOCTL_H_
|
||||
#ifndef _NV_DRM_COMMON_IOCTL_H_
|
||||
#define _NV_DRM_COMMON_IOCTL_H_
|
||||
|
||||
#include <drm/drm.h>
|
||||
|
||||
@@ -396,4 +396,4 @@ struct drm_nvidia_get_drm_file_unique_id_params {
|
||||
uint64_t id; /* OUT Unique ID of the DRM file */
|
||||
};
|
||||
|
||||
#endif /* _UAPI_NVIDIA_DRM_IOCTL_H_ */
|
||||
#endif /* _NV_DRM_COMMON_IOCTL_H_ */
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -31,8 +31,8 @@
|
||||
#include "nvidia-drm-encoder.h"
|
||||
#include "nvidia-drm-utils.h"
|
||||
#include "nvidia-drm-fb.h"
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nvidia-drm-format.h"
|
||||
#include "nv_drm_common_ioctl.h"
|
||||
|
||||
#include "nvmisc.h"
|
||||
#include "nv_common_utils.h"
|
||||
@@ -1286,15 +1286,10 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
if ((nv_drm_plane_state->input_colorspace == NV_DRM_INPUT_COLOR_SPACE_NONE) &&
|
||||
nv_drm_format_is_yuv(plane_state->fb->format->format)) {
|
||||
|
||||
if (nv_plane->supportsColorProperties) {
|
||||
req_config->config.inputColorSpace =
|
||||
nv_drm_color_encoding_to_nvkms_colorspace(plane_state->color_encoding);
|
||||
req_config->config.inputColorRange =
|
||||
nv_drm_color_range_to_nvkms_color_range(plane_state->color_range);
|
||||
} else {
|
||||
req_config->config.inputColorSpace = NVKMS_INPUT_COLOR_SPACE_NONE;
|
||||
req_config->config.inputColorRange = NVKMS_INPUT_COLOR_RANGE_DEFAULT;
|
||||
}
|
||||
req_config->config.inputColorSpace =
|
||||
nv_drm_color_encoding_to_nvkms_colorspace(plane_state->color_encoding);
|
||||
req_config->config.inputColorRange =
|
||||
nv_drm_color_range_to_nvkms_color_range(plane_state->color_range);
|
||||
req_config->config.inputTf = NVKMS_INPUT_TF_LINEAR;
|
||||
} else {
|
||||
#endif
|
||||
@@ -1559,7 +1554,7 @@ static int __nv_drm_cursor_atomic_check(struct drm_plane *plane,
|
||||
|
||||
WARN_ON(nv_plane->layer_idx != NVKMS_KAPI_LAYER_INVALID_IDX);
|
||||
|
||||
nv_drm_for_each_crtc_in_state(plane_state->state, crtc, crtc_state, i) {
|
||||
for_each_new_crtc_in_state(plane_state->state, crtc, crtc_state, i) {
|
||||
struct nv_drm_crtc_state *nv_crtc_state = to_nv_crtc_state(crtc_state);
|
||||
struct NvKmsKapiHeadRequestedConfig *head_req_config =
|
||||
&nv_crtc_state->req_config;
|
||||
@@ -1605,7 +1600,7 @@ static int nv_drm_plane_atomic_check(struct drm_plane *plane,
|
||||
|
||||
WARN_ON(nv_plane->layer_idx == NVKMS_KAPI_LAYER_INVALID_IDX);
|
||||
|
||||
nv_drm_for_each_crtc_in_state(plane_state->state, crtc, crtc_state, i) {
|
||||
for_each_new_crtc_in_state(plane_state->state, crtc, crtc_state, i) {
|
||||
struct nv_drm_crtc_state *nv_crtc_state = to_nv_crtc_state(crtc_state);
|
||||
struct NvKmsKapiHeadRequestedConfig *head_req_config =
|
||||
&nv_crtc_state->req_config;
|
||||
@@ -2435,7 +2430,7 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
|
||||
|
||||
req_config->flags.displaysChanged = NV_TRUE;
|
||||
|
||||
nv_drm_for_each_connector_in_state(crtc_state->state,
|
||||
for_each_new_connector_in_state(crtc_state->state,
|
||||
connector, connector_state, j) {
|
||||
if (connector_state->crtc != crtc) {
|
||||
continue;
|
||||
@@ -2844,26 +2839,16 @@ nv_drm_plane_create(struct drm_device *dev,
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
|
||||
if (pResInfo->caps.supportsInputColorSpace &&
|
||||
pResInfo->caps.supportsInputColorRange) {
|
||||
|
||||
nv_plane->supportsColorProperties = true;
|
||||
|
||||
drm_plane_create_color_properties(
|
||||
plane,
|
||||
NVBIT(DRM_COLOR_YCBCR_BT601) |
|
||||
NVBIT(DRM_COLOR_YCBCR_BT709) |
|
||||
NVBIT(DRM_COLOR_YCBCR_BT2020),
|
||||
NVBIT(DRM_COLOR_YCBCR_FULL_RANGE) |
|
||||
NVBIT(DRM_COLOR_YCBCR_LIMITED_RANGE),
|
||||
DRM_COLOR_YCBCR_BT709,
|
||||
DRM_COLOR_YCBCR_FULL_RANGE
|
||||
);
|
||||
} else {
|
||||
nv_plane->supportsColorProperties = false;
|
||||
}
|
||||
#else
|
||||
nv_plane->supportsColorProperties = false;
|
||||
drm_plane_create_color_properties(
|
||||
plane,
|
||||
NVBIT(DRM_COLOR_YCBCR_BT601) |
|
||||
NVBIT(DRM_COLOR_YCBCR_BT709) |
|
||||
NVBIT(DRM_COLOR_YCBCR_BT2020),
|
||||
NVBIT(DRM_COLOR_YCBCR_FULL_RANGE) |
|
||||
NVBIT(DRM_COLOR_YCBCR_LIMITED_RANGE),
|
||||
DRM_COLOR_YCBCR_BT709,
|
||||
DRM_COLOR_YCBCR_FULL_RANGE
|
||||
);
|
||||
#endif
|
||||
|
||||
drm_plane_helper_add(plane, &nv_plane_helper_funcs);
|
||||
|
||||
@@ -191,13 +191,6 @@ struct nv_drm_plane {
|
||||
*/
|
||||
uint32_t layer_idx;
|
||||
|
||||
/**
|
||||
* @supportsColorProperties
|
||||
*
|
||||
* If true, supports the COLOR_ENCODING and COLOR_RANGE properties.
|
||||
*/
|
||||
bool supportsColorProperties;
|
||||
|
||||
struct NvKmsLUTCaps ilut_caps;
|
||||
struct NvKmsLUTCaps tmo_caps;
|
||||
};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -40,7 +40,7 @@
|
||||
|
||||
#if defined(NV_DRM_AVAILABLE)
|
||||
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nv_drm_common_ioctl.h"
|
||||
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
@@ -326,8 +326,8 @@ done:
|
||||
|
||||
static int nv_drm_disp_cmp (const void *l, const void *r)
|
||||
{
|
||||
struct nv_drm_mst_display_info *l_info = (struct nv_drm_mst_display_info *)l;
|
||||
struct nv_drm_mst_display_info *r_info = (struct nv_drm_mst_display_info *)r;
|
||||
const struct nv_drm_mst_display_info *l_info = (const struct nv_drm_mst_display_info *)l;
|
||||
const struct nv_drm_mst_display_info *r_info = (const struct nv_drm_mst_display_info *)r;
|
||||
|
||||
return strcmp(l_info->dpAddress, r_info->dpAddress);
|
||||
}
|
||||
@@ -743,6 +743,8 @@ static int nv_drm_dev_load(struct drm_device *dev)
|
||||
|
||||
nv_dev->hasVideoMemory = resInfo.caps.hasVideoMemory;
|
||||
|
||||
nv_dev->contiguousPhysicalMappings = resInfo.caps.contiguousPhysicalMappings;
|
||||
|
||||
nv_dev->genericPageKind = resInfo.caps.genericPageKind;
|
||||
|
||||
// Fermi-Volta use generation 0, Turing+ uses generation 2.
|
||||
@@ -762,8 +764,6 @@ static int nv_drm_dev_load(struct drm_device *dev)
|
||||
resInfo.caps.numDisplaySemaphores;
|
||||
nv_dev->display_semaphores.next_index = 0;
|
||||
|
||||
nv_dev->requiresVrrSemaphores = resInfo.caps.requiresVrrSemaphores;
|
||||
|
||||
nv_dev->vtFbBaseAddress = resInfo.vtFbBaseAddress;
|
||||
nv_dev->vtFbSize = resInfo.vtFbSize;
|
||||
|
||||
@@ -1717,6 +1717,11 @@ static long nv_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
return retcode;
|
||||
}
|
||||
|
||||
static int nv_drm_load_noop(struct drm_device *dev, unsigned long flags)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations nv_drm_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
|
||||
@@ -1899,6 +1904,8 @@ static struct drm_driver nv_drm_driver = {
|
||||
.gem_prime_res_obj = nv_drm_gem_prime_res_obj,
|
||||
#endif
|
||||
|
||||
.load = nv_drm_load_noop,
|
||||
|
||||
.postclose = nv_drm_postclose,
|
||||
.open = nv_drm_open,
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -25,12 +25,12 @@
|
||||
#if defined(NV_DRM_AVAILABLE)
|
||||
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nvidia-drm-fb.h"
|
||||
#include "nvidia-drm-utils.h"
|
||||
#include "nvidia-drm-gem.h"
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-format.h"
|
||||
#include "nv_drm_common_ioctl.h"
|
||||
|
||||
#include <drm/drm_crtc_helper.h>
|
||||
|
||||
|
||||
@@ -29,10 +29,10 @@
|
||||
#endif
|
||||
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nvidia-drm-gem.h"
|
||||
#include "nvidia-drm-fence.h"
|
||||
#include "nvidia-dma-resv-helper.h"
|
||||
#include "nv_drm_common_ioctl.h"
|
||||
|
||||
#include <linux/dma-fence.h>
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -35,15 +35,22 @@
|
||||
|
||||
static const u32 nvkms_to_drm_format[] = {
|
||||
/* RGB formats */
|
||||
[NvKmsSurfaceMemoryFormatA1R5G5B5] = DRM_FORMAT_ARGB1555,
|
||||
[NvKmsSurfaceMemoryFormatX1R5G5B5] = DRM_FORMAT_XRGB1555,
|
||||
[NvKmsSurfaceMemoryFormatR5G6B5] = DRM_FORMAT_RGB565,
|
||||
[NvKmsSurfaceMemoryFormatA8R8G8B8] = DRM_FORMAT_ARGB8888,
|
||||
[NvKmsSurfaceMemoryFormatX8R8G8B8] = DRM_FORMAT_XRGB8888,
|
||||
[NvKmsSurfaceMemoryFormatX8B8G8R8] = DRM_FORMAT_XBGR8888,
|
||||
[NvKmsSurfaceMemoryFormatA2B10G10R10] = DRM_FORMAT_ABGR2101010,
|
||||
[NvKmsSurfaceMemoryFormatX2B10G10R10] = DRM_FORMAT_XBGR2101010,
|
||||
[NvKmsSurfaceMemoryFormatA8B8G8R8] = DRM_FORMAT_ABGR8888,
|
||||
[NvKmsSurfaceMemoryFormatA1R5G5B5] = DRM_FORMAT_ARGB1555,
|
||||
[NvKmsSurfaceMemoryFormatX1R5G5B5] = DRM_FORMAT_XRGB1555,
|
||||
[NvKmsSurfaceMemoryFormatR5G6B5] = DRM_FORMAT_RGB565,
|
||||
[NvKmsSurfaceMemoryFormatA8R8G8B8] = DRM_FORMAT_ARGB8888,
|
||||
[NvKmsSurfaceMemoryFormatX8R8G8B8] = DRM_FORMAT_XRGB8888,
|
||||
[NvKmsSurfaceMemoryFormatX8B8G8R8] = DRM_FORMAT_XBGR8888,
|
||||
[NvKmsSurfaceMemoryFormatA2B10G10R10] = DRM_FORMAT_ABGR2101010,
|
||||
[NvKmsSurfaceMemoryFormatX2B10G10R10] = DRM_FORMAT_XBGR2101010,
|
||||
[NvKmsSurfaceMemoryFormatA8B8G8R8] = DRM_FORMAT_ABGR8888,
|
||||
#if defined(DRM_FORMAT_ABGR16161616)
|
||||
/*
|
||||
* DRM_FORMAT_ABGR16161616 was introduced by Linux kernel commit
|
||||
* ff92ecf575a92 (v5.14).
|
||||
*/
|
||||
[NvKmsSurfaceMemoryFormatR16G16B16A16] = DRM_FORMAT_ABGR16161616,
|
||||
#endif
|
||||
#if defined(DRM_FORMAT_ABGR16161616F)
|
||||
[NvKmsSurfaceMemoryFormatRF16GF16BF16AF16] = DRM_FORMAT_ABGR16161616F,
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -33,7 +33,7 @@
|
||||
#include <drm/drm_drv.h>
|
||||
|
||||
#include "nvidia-drm-gem-dma-buf.h"
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nv_drm_common_ioctl.h"
|
||||
|
||||
#include "linux/dma-buf.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
#include "nvidia-drm-gem-nvkms-memory.h"
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nv_drm_common_ioctl.h"
|
||||
|
||||
#include <drm/drm_drv.h>
|
||||
#include <drm/drm_prime.h>
|
||||
@@ -161,6 +161,21 @@ static int __nv_drm_gem_nvkms_map(
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX Physical mapping currently broken in cases where we can't guarantee
|
||||
* that the mapping is contiguous. Fail on platforms that don't have
|
||||
* guaranteed contiguous physical mappings.
|
||||
*/
|
||||
if (!nv_dev->contiguousPhysicalMappings) {
|
||||
NV_DRM_DEV_LOG_INFO(
|
||||
nv_dev,
|
||||
"Mapping vidmem NvKmsKapiMemory 0x%p is currently "
|
||||
"unsupported on coherent GPU memory configurations",
|
||||
pMemory);
|
||||
ret = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!nvKms->mapMemory(nv_dev->pDevice,
|
||||
pMemory,
|
||||
NVKMS_KAPI_MAPPING_TYPE_USER,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -28,7 +28,7 @@
|
||||
|
||||
#include "nvidia-drm-gem-user-memory.h"
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nv_drm_common_ioctl.h"
|
||||
|
||||
#include "linux/dma-buf.h"
|
||||
#include "linux/mm.h"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -25,7 +25,6 @@
|
||||
#if defined(NV_DRM_AVAILABLE)
|
||||
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nvidia-drm-fence.h"
|
||||
#include "nvidia-drm-gem.h"
|
||||
#include "nvidia-drm-gem-nvkms-memory.h"
|
||||
@@ -34,6 +33,7 @@
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-gem-dma-buf.h"
|
||||
#include "nvidia-drm-gem-nvkms-memory.h"
|
||||
#include "nv_drm_common_ioctl.h"
|
||||
|
||||
#include <drm/drm_drv.h>
|
||||
#include <drm/drm_prime.h>
|
||||
|
||||
@@ -54,7 +54,7 @@
|
||||
* drm_atomic_helper_disable_all() is copied from
|
||||
* linux/drivers/gpu/drm/drm_atomic_helper.c and modified to use
|
||||
* nv_drm_for_each_crtc instead of drm_for_each_crtc to loop over all crtcs,
|
||||
* use nv_drm_for_each_*_in_state instead of for_each_connector_in_state to loop
|
||||
* use for_each_new_*_in_state instead of for_each_connector_in_state to loop
|
||||
* over all modeset object states, and use drm_atomic_state_free() if
|
||||
* drm_atomic_state_put() is not available.
|
||||
*
|
||||
@@ -139,13 +139,13 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
|
||||
plane_state->rotation = DRM_MODE_ROTATE_0;
|
||||
}
|
||||
|
||||
nv_drm_for_each_connector_in_state(state, conn, conn_state, i) {
|
||||
for_each_new_connector_in_state(state, conn, conn_state, i) {
|
||||
ret = drm_atomic_set_crtc_for_connector(conn_state, NULL);
|
||||
if (ret < 0)
|
||||
goto free;
|
||||
}
|
||||
|
||||
nv_drm_for_each_plane_in_state(state, plane, plane_state, i) {
|
||||
for_each_new_plane_in_state(state, plane, plane_state, i) {
|
||||
ret = drm_atomic_set_crtc_for_plane(plane_state, NULL);
|
||||
if (ret < 0)
|
||||
goto free;
|
||||
|
||||
@@ -138,154 +138,6 @@ nv_drm_prime_pages_to_sg(struct drm_device *dev,
|
||||
int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
|
||||
struct drm_modeset_acquire_ctx *ctx);
|
||||
|
||||
/*
|
||||
* for_each_connector_in_state(), for_each_crtc_in_state() and
|
||||
* for_each_plane_in_state() were added by kernel commit
|
||||
* df63b9994eaf942afcdb946d27a28661d7dfbf2a which was Signed-off-by:
|
||||
* Ander Conselvan de Oliveira <ander.conselvan.de.oliveira@intel.com>
|
||||
* Daniel Vetter <daniel.vetter@ffwll.ch>
|
||||
*
|
||||
* for_each_connector_in_state(), for_each_crtc_in_state() and
|
||||
* for_each_plane_in_state() were copied from
|
||||
* include/drm/drm_atomic.h @
|
||||
* 21a01abbe32a3cbeb903378a24e504bfd9fe0648
|
||||
* which has the following copyright and license information:
|
||||
*
|
||||
* Copyright (C) 2014 Red Hat
|
||||
* Copyright (C) 2014 Intel Corp.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robdclark@gmail.com>
|
||||
* Daniel Vetter <daniel.vetter@ffwll.ch>
|
||||
*/
|
||||
|
||||
/**
|
||||
* nv_drm_for_each_connector_in_state - iterate over all connectors in an
|
||||
* atomic update
|
||||
* @__state: &struct drm_atomic_state pointer
|
||||
* @connector: &struct drm_connector iteration cursor
|
||||
* @connector_state: &struct drm_connector_state iteration cursor
|
||||
* @__i: int iteration cursor, for macro-internal use
|
||||
*
|
||||
* This iterates over all connectors in an atomic update. Note that before the
|
||||
* software state is committed (by calling drm_atomic_helper_swap_state(), this
|
||||
* points to the new state, while afterwards it points to the old state. Due to
|
||||
* this tricky confusion this macro is deprecated.
|
||||
*/
|
||||
#if !defined(for_each_connector_in_state)
|
||||
#define nv_drm_for_each_connector_in_state(__state, \
|
||||
connector, connector_state, __i) \
|
||||
for ((__i) = 0; \
|
||||
(__i) < (__state)->num_connector && \
|
||||
((connector) = (__state)->connectors[__i].ptr, \
|
||||
(connector_state) = (__state)->connectors[__i].state, 1); \
|
||||
(__i)++) \
|
||||
for_each_if (connector)
|
||||
#else
|
||||
#define nv_drm_for_each_connector_in_state(__state, \
|
||||
connector, connector_state, __i) \
|
||||
for_each_connector_in_state(__state, connector, connector_state, __i)
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* nv_drm_for_each_crtc_in_state - iterate over all CRTCs in an atomic update
|
||||
* @__state: &struct drm_atomic_state pointer
|
||||
* @crtc: &struct drm_crtc iteration cursor
|
||||
* @crtc_state: &struct drm_crtc_state iteration cursor
|
||||
* @__i: int iteration cursor, for macro-internal use
|
||||
*
|
||||
* This iterates over all CRTCs in an atomic update. Note that before the
|
||||
* software state is committed (by calling drm_atomic_helper_swap_state(), this
|
||||
* points to the new state, while afterwards it points to the old state. Due to
|
||||
* this tricky confusion this macro is deprecated.
|
||||
*/
|
||||
#if !defined(for_each_crtc_in_state)
|
||||
#define nv_drm_for_each_crtc_in_state(__state, crtc, crtc_state, __i) \
|
||||
for ((__i) = 0; \
|
||||
(__i) < (__state)->dev->mode_config.num_crtc && \
|
||||
((crtc) = (__state)->crtcs[__i].ptr, \
|
||||
(crtc_state) = (__state)->crtcs[__i].state, 1); \
|
||||
(__i)++) \
|
||||
for_each_if (crtc_state)
|
||||
#else
|
||||
#define nv_drm_for_each_crtc_in_state(__state, crtc, crtc_state, __i) \
|
||||
for_each_crtc_in_state(__state, crtc, crtc_state, __i)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* nv_drm_for_each_plane_in_state - iterate over all planes in an atomic update
|
||||
* @__state: &struct drm_atomic_state pointer
|
||||
* @plane: &struct drm_plane iteration cursor
|
||||
* @plane_state: &struct drm_plane_state iteration cursor
|
||||
* @__i: int iteration cursor, for macro-internal use
|
||||
*
|
||||
* This iterates over all planes in an atomic update. Note that before the
|
||||
* software state is committed (by calling drm_atomic_helper_swap_state(), this
|
||||
* points to the new state, while afterwards it points to the old state. Due to
|
||||
* this tricky confusion this macro is deprecated.
|
||||
*/
|
||||
#if !defined(for_each_plane_in_state)
|
||||
#define nv_drm_for_each_plane_in_state(__state, plane, plane_state, __i) \
|
||||
for ((__i) = 0; \
|
||||
(__i) < (__state)->dev->mode_config.num_total_plane && \
|
||||
((plane) = (__state)->planes[__i].ptr, \
|
||||
(plane_state) = (__state)->planes[__i].state, 1); \
|
||||
(__i)++) \
|
||||
for_each_if (plane_state)
|
||||
#else
|
||||
#define nv_drm_for_each_plane_in_state(__state, plane, plane_state, __i) \
|
||||
for_each_plane_in_state(__state, plane, plane_state, __i)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* for_each_new_plane_in_state() was added by kernel commit
|
||||
* 581e49fe6b411f407102a7f2377648849e0fa37f which was Signed-off-by:
|
||||
* Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
|
||||
* Daniel Vetter <daniel.vetter@ffwll.ch>
|
||||
*
|
||||
* This commit also added the old_state and new_state pointers to
|
||||
* __drm_planes_state. Because of this, the best that can be done on kernel
|
||||
* versions without this macro is for_each_plane_in_state.
|
||||
*/
|
||||
|
||||
/**
|
||||
* nv_drm_for_each_new_plane_in_state - iterate over all planes in an atomic update
|
||||
* @__state: &struct drm_atomic_state pointer
|
||||
* @plane: &struct drm_plane iteration cursor
|
||||
* @new_plane_state: &struct drm_plane_state iteration cursor for the new state
|
||||
* @__i: int iteration cursor, for macro-internal use
|
||||
*
|
||||
* This iterates over all planes in an atomic update, tracking only the new
|
||||
* state. This is useful in enable functions, where we need the new state the
|
||||
* hardware should be in when the atomic commit operation has completed.
|
||||
*/
|
||||
#if !defined(for_each_new_plane_in_state)
|
||||
#define nv_drm_for_each_new_plane_in_state(__state, plane, new_plane_state, __i) \
|
||||
nv_drm_for_each_plane_in_state(__state, plane, new_plane_state, __i)
|
||||
#else
|
||||
#define nv_drm_for_each_new_plane_in_state(__state, plane, new_plane_state, __i) \
|
||||
for_each_new_plane_in_state(__state, plane, new_plane_state, __i)
|
||||
#endif
|
||||
|
||||
#include <drm/drm_auth.h>
|
||||
#include <drm/drm_file.h>
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015, 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -108,8 +108,11 @@ static bool __will_generate_flip_event(struct drm_crtc *crtc,
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Find out whether primary & overlay flip done events will be generated. */
|
||||
nv_drm_for_each_plane_in_state(old_crtc_state->state,
|
||||
/*
|
||||
* Find out whether primary & overlay flip done events will be generated.
|
||||
* Only called after drm_atomic_helper_swap_state, so we use old state.
|
||||
*/
|
||||
for_each_old_plane_in_state(old_crtc_state->state,
|
||||
plane, old_plane_state, i) {
|
||||
if (old_plane_state->crtc != crtc) {
|
||||
continue;
|
||||
@@ -193,7 +196,7 @@ static int __nv_drm_convert_in_fences(
|
||||
return 0;
|
||||
}
|
||||
|
||||
nv_drm_for_each_new_plane_in_state(state, plane, plane_state, i) {
|
||||
for_each_new_plane_in_state(state, plane, plane_state, i) {
|
||||
if ((plane->type == DRM_PLANE_TYPE_CURSOR) ||
|
||||
(plane_state->crtc != crtc) ||
|
||||
(plane_state->fence == NULL)) {
|
||||
@@ -334,7 +337,8 @@ static int __nv_drm_get_syncpt_data(
|
||||
|
||||
head_reply_config = &reply_config->headReplyConfig[nv_crtc->head];
|
||||
|
||||
nv_drm_for_each_plane_in_state(old_crtc_state->state, plane, old_plane_state, i) {
|
||||
/* Use old state because this is only called after drm_atomic_helper_swap_state */
|
||||
for_each_old_plane_in_state(old_crtc_state->state, plane, old_plane_state, i) {
|
||||
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
|
||||
|
||||
if (plane->type == DRM_PLANE_TYPE_CURSOR || old_plane_state->crtc != crtc) {
|
||||
@@ -395,7 +399,7 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
|
||||
&(to_nv_atomic_state(state)->config);
|
||||
struct NvKmsKapiModeSetReplyConfig reply_config = { };
|
||||
struct drm_crtc *crtc;
|
||||
struct drm_crtc_state *crtc_state;
|
||||
struct drm_crtc_state *old_crtc_state, *new_crtc_state;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
@@ -429,18 +433,10 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
|
||||
memset(requested_config, 0, sizeof(*requested_config));
|
||||
|
||||
/* Loop over affected crtcs and construct NvKmsKapiRequestedModeSetConfig */
|
||||
nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
|
||||
/*
|
||||
* When committing a state, the new state is already stored in
|
||||
* crtc->state. When checking a proposed state, the proposed state is
|
||||
* stored in crtc_state.
|
||||
*/
|
||||
struct drm_crtc_state *new_crtc_state =
|
||||
commit ? crtc->state : crtc_state;
|
||||
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
|
||||
if (commit) {
|
||||
struct drm_crtc_state *old_crtc_state = crtc_state;
|
||||
struct nv_drm_crtc_state *nv_new_crtc_state =
|
||||
to_nv_crtc_state(new_crtc_state);
|
||||
|
||||
@@ -497,20 +493,17 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
|
||||
}
|
||||
|
||||
if (commit && nv_dev->supportsSyncpts) {
|
||||
nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
|
||||
/* commit is true so we check old state */
|
||||
for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) {
|
||||
/*! loop over affected crtcs and get NvKmsKapiModeSetReplyConfig */
|
||||
ret = __nv_drm_get_syncpt_data(
|
||||
nv_dev, crtc, crtc_state, requested_config, &reply_config);
|
||||
nv_dev, crtc, old_crtc_state, requested_config, &reply_config);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (commit && nv_dev->requiresVrrSemaphores && reply_config.vrrFlip) {
|
||||
nvKms->signalVrrSemaphore(nv_dev->pDevice, reply_config.vrrSemaphoreIndex);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -523,12 +516,38 @@ int nv_drm_atomic_check(struct drm_device *dev,
|
||||
struct drm_crtc_state *crtc_state;
|
||||
int i;
|
||||
|
||||
nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
|
||||
struct drm_plane *plane;
|
||||
struct drm_plane_state *plane_state;
|
||||
int j;
|
||||
bool cursor_surface_changed;
|
||||
bool cursor_only_commit;
|
||||
|
||||
for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
|
||||
|
||||
/*
|
||||
* Committing cursor surface change without any other plane change can
|
||||
* cause cursor surface in use by HW to be freed prematurely. Add all
|
||||
* planes to the commit to avoid this. This is a workaround for bug 4966645.
|
||||
*/
|
||||
cursor_surface_changed = false;
|
||||
cursor_only_commit = true;
|
||||
for_each_new_plane_in_state(crtc_state->state, plane, plane_state, j) {
|
||||
if (plane->type == DRM_PLANE_TYPE_CURSOR) {
|
||||
if (plane_state->fb != plane->state->fb) {
|
||||
cursor_surface_changed = true;
|
||||
}
|
||||
} else {
|
||||
cursor_only_commit = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* if the color management changed on the crtc, we need to update the
|
||||
* crtc's plane's CSC matrices, so add the crtc's planes to the commit
|
||||
*/
|
||||
if (crtc_state->color_mgmt_changed) {
|
||||
if (crtc_state->color_mgmt_changed ||
|
||||
(cursor_surface_changed && cursor_only_commit)) {
|
||||
if ((ret = drm_atomic_add_affected_planes(state, crtc)) != 0) {
|
||||
goto done;
|
||||
}
|
||||
@@ -619,7 +638,7 @@ int nv_drm_atomic_commit(struct drm_device *dev,
|
||||
* Our system already implements such a queue, but due to
|
||||
* bug 4054608, it is currently not used.
|
||||
*/
|
||||
nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
|
||||
for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
|
||||
/*
|
||||
@@ -726,7 +745,7 @@ int nv_drm_atomic_commit(struct drm_device *dev,
|
||||
goto done;
|
||||
}
|
||||
|
||||
nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
|
||||
for_each_old_crtc_in_state(state, crtc, crtc_state, i) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
struct nv_drm_crtc_state *nv_new_crtc_state =
|
||||
to_nv_crtc_state(crtc->state);
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
#include <drm/drm_print.h>
|
||||
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem.h>
|
||||
@@ -141,8 +142,9 @@ struct nv_drm_device {
|
||||
|
||||
NvBool hasVideoMemory;
|
||||
|
||||
NvBool contiguousPhysicalMappings;
|
||||
|
||||
NvBool supportsSyncpts;
|
||||
NvBool requiresVrrSemaphores;
|
||||
NvBool subOwnershipGranted;
|
||||
NvBool hasFramebufferConsole;
|
||||
|
||||
|
||||
@@ -62,6 +62,9 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_plane_create_color_properties
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_legacy_gamma_set
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_mixed
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_prime_mmap
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_sysfs_connector_property_event
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_sysfs_connector_status_event
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_flags_set
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_legacy_dev_list
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
|
||||
@@ -91,7 +94,6 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fenc
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers_has_driver_arg
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
*/
|
||||
|
||||
#include "nv-kthread-q.h"
|
||||
#include "nv-list-helpers.h"
|
||||
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/interrupt.h>
|
||||
@@ -43,17 +42,6 @@
|
||||
// into the queue, and those functions will be run in the context of the
|
||||
// queue's kthread.
|
||||
|
||||
#ifndef WARN
|
||||
// Only *really* old kernels (2.6.9) end up here. Just use a simple printk
|
||||
// to implement this, because such kernels won't be supported much longer.
|
||||
#define WARN(condition, format...) ({ \
|
||||
int __ret_warn_on = !!(condition); \
|
||||
if (unlikely(__ret_warn_on)) \
|
||||
printk(KERN_ERR format); \
|
||||
unlikely(__ret_warn_on); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#define NVQ_WARN(fmt, ...) \
|
||||
do { \
|
||||
if (in_interrupt()) { \
|
||||
|
||||
@@ -130,6 +130,15 @@ module_param_named(config_file, nvkms_conf, charp, 0400);
|
||||
|
||||
static atomic_t nvkms_alloc_called_count;
|
||||
|
||||
#define NV_SUPPORTS_PLATFORM_DEVICE_PUT NV_IS_EXPORT_SYMBOL_GPL_platform_device_put
|
||||
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && NV_SUPPORTS_PLATFORM_DEVICE_PUT
|
||||
#if defined(NV_LINUX_HOST1X_NEXT_H_PRESENT) || defined(CONFIG_TEGRA_GRHOST)
|
||||
#define NVKMS_NVHOST_SYNCPT_SUPPORTED
|
||||
struct platform_device *nvhost_platform_device = NULL;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
NvBool nvkms_test_fail_alloc_core_channel(
|
||||
enum FailAllocCoreChannelMethod method
|
||||
)
|
||||
@@ -206,21 +215,18 @@ NvBool nvkms_kernel_supports_syncpts(void)
|
||||
* support for syncpts; callers must also check that the hardware
|
||||
* supports syncpts.
|
||||
*/
|
||||
#if (defined(CONFIG_TEGRA_GRHOST) || defined(NV_LINUX_HOST1X_NEXT_H_PRESENT))
|
||||
#if defined(NVKMS_NVHOST_SYNCPT_SUPPORTED)
|
||||
return NV_TRUE;
|
||||
#else
|
||||
return NV_FALSE;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NVKMS_SYNCPT_STUBS_NEEDED
|
||||
|
||||
/*************************************************************************
|
||||
* NVKMS interface for nvhost unit for sync point APIs.
|
||||
*************************************************************************/
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
|
||||
#undef NVKMS_SYNCPT_STUBS_NEEDED
|
||||
#if defined(NVKMS_NVHOST_SYNCPT_SUPPORTED) && defined(CONFIG_TEGRA_GRHOST)
|
||||
|
||||
#include <linux/nvhost.h>
|
||||
|
||||
@@ -228,17 +234,21 @@ NvBool nvkms_syncpt_op(
|
||||
enum NvKmsSyncPtOp op,
|
||||
NvKmsSyncPtOpParams *params)
|
||||
{
|
||||
struct platform_device *pdev = nvhost_get_default_device();
|
||||
if (nvhost_platform_device == NULL) {
|
||||
nvkms_log(NVKMS_LOG_LEVEL_ERROR, NVKMS_LOG_PREFIX,
|
||||
"Failed to get default nvhost device");
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
switch (op) {
|
||||
|
||||
case NVKMS_SYNCPT_OP_ALLOC:
|
||||
params->alloc.id = nvhost_get_syncpt_client_managed(
|
||||
pdev, params->alloc.syncpt_name);
|
||||
nvhost_platform_device, params->alloc.syncpt_name);
|
||||
break;
|
||||
|
||||
case NVKMS_SYNCPT_OP_PUT:
|
||||
nvhost_syncpt_put_ref_ext(pdev, params->put.id);
|
||||
nvhost_syncpt_put_ref_ext(nvhost_platform_device, params->put.id);
|
||||
break;
|
||||
|
||||
case NVKMS_SYNCPT_OP_FD_TO_ID_AND_THRESH: {
|
||||
@@ -272,7 +282,7 @@ NvBool nvkms_syncpt_op(
|
||||
|
||||
case NVKMS_SYNCPT_OP_ID_AND_THRESH_TO_FD:
|
||||
nvhost_syncpt_create_fence_single_ext(
|
||||
pdev,
|
||||
nvhost_platform_device,
|
||||
params->id_and_thresh_to_fd.id,
|
||||
params->id_and_thresh_to_fd.thresh,
|
||||
"nvkms-fence",
|
||||
@@ -281,7 +291,7 @@ NvBool nvkms_syncpt_op(
|
||||
|
||||
case NVKMS_SYNCPT_OP_READ_MINVAL:
|
||||
params->read_minval.minval =
|
||||
nvhost_syncpt_read_minval(pdev, params->read_minval.id);
|
||||
nvhost_syncpt_read_minval(nvhost_platform_device, params->read_minval.id);
|
||||
break;
|
||||
|
||||
}
|
||||
@@ -289,7 +299,7 @@ NvBool nvkms_syncpt_op(
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT) && defined(NV_LINUX_NVHOST_H_PRESENT)
|
||||
#elif defined(NVKMS_NVHOST_SYNCPT_SUPPORTED) && defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)
|
||||
|
||||
#include <linux/dma-fence.h>
|
||||
#include <linux/file.h>
|
||||
@@ -305,24 +315,20 @@ NvBool nvkms_syncpt_op(
|
||||
|
||||
#include <linux/nvhost.h>
|
||||
|
||||
#undef NVKMS_SYNCPT_STUBS_NEEDED
|
||||
|
||||
NvBool nvkms_syncpt_op(
|
||||
enum NvKmsSyncPtOp op,
|
||||
NvKmsSyncPtOpParams *params)
|
||||
{
|
||||
struct host1x_syncpt *host1x_sp;
|
||||
struct platform_device *pdev;
|
||||
struct host1x *host1x;
|
||||
|
||||
pdev = nvhost_get_default_device();
|
||||
if (pdev == NULL) {
|
||||
if (nvhost_platform_device == NULL) {
|
||||
nvkms_log(NVKMS_LOG_LEVEL_ERROR, NVKMS_LOG_PREFIX,
|
||||
"Failed to get nvhost default pdev");
|
||||
return NV_FALSE;
|
||||
"Failed to get default nvhost device");
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
host1x = nvhost_get_host1x(pdev);
|
||||
host1x = nvhost_get_host1x(nvhost_platform_device);
|
||||
if (host1x == NULL) {
|
||||
nvkms_log(NVKMS_LOG_LEVEL_ERROR, NVKMS_LOG_PREFIX,
|
||||
"Failed to get host1x");
|
||||
@@ -436,9 +442,7 @@ NvBool nvkms_syncpt_op(
|
||||
|
||||
return NV_TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef NVKMS_SYNCPT_STUBS_NEEDED
|
||||
#else
|
||||
/* Unsupported STUB for nvkms_syncpt APIs */
|
||||
NvBool nvkms_syncpt_op(
|
||||
enum NvKmsSyncPtOp op,
|
||||
@@ -2091,6 +2095,14 @@ static int __init nvkms_init(void)
|
||||
|
||||
atomic_set(&nvkms_alloc_called_count, 0);
|
||||
|
||||
#if defined(NVKMS_NVHOST_SYNCPT_SUPPORTED)
|
||||
/*
|
||||
* nvhost_get_default_device() might return NULL; don't check it
|
||||
* until we use it.
|
||||
*/
|
||||
nvhost_platform_device = nvhost_get_default_device();
|
||||
#endif
|
||||
|
||||
ret = nvkms_alloc_rm();
|
||||
|
||||
if (ret != 0) {
|
||||
@@ -2152,6 +2164,10 @@ static void __exit nvkms_exit(void)
|
||||
struct nvkms_timer_t *timer, *tmp_timer;
|
||||
unsigned long flags = 0;
|
||||
|
||||
#if defined(NVKMS_NVHOST_SYNCPT_SUPPORTED)
|
||||
platform_device_put(nvhost_platform_device);
|
||||
#endif
|
||||
|
||||
nvkms_proc_exit();
|
||||
|
||||
down(&nvkms_lock);
|
||||
|
||||
@@ -107,6 +107,7 @@ typedef struct {
|
||||
enum FailAllocCoreChannelMethod {
|
||||
FAIL_ALLOC_CORE_CHANNEL_RM_SETUP_CORE_CHANNEL = 0,
|
||||
FAIL_ALLOC_CORE_CHANNEL_RESTORE_CONSOLE = 1,
|
||||
FAIL_ALLOC_CORE_CHANNEL_NO_CLASS = 2,
|
||||
};
|
||||
|
||||
NvBool nvkms_test_fail_alloc_core_channel(enum FailAllocCoreChannelMethod method);
|
||||
|
||||
@@ -100,4 +100,5 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_register_backlight
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_platform_device_put
|
||||
|
||||
@@ -159,9 +159,6 @@ struct nvidia_p2p_page_table {
|
||||
*
|
||||
* This API only supports pinned, GPU-resident memory, such as that provided
|
||||
* by cudaMalloc().
|
||||
* This API does not support Coherent Driver-based Memory Management(CDMM) mode.
|
||||
* CDMM allows coherent GPU memory to be managed by the driver and not the OS.
|
||||
* This is done by the driver not onlining the memory as a NUMA node.
|
||||
*
|
||||
* This API may sleep.
|
||||
*
|
||||
|
||||
@@ -30,9 +30,11 @@
|
||||
#define NVC8B5_SET_SEMAPHORE_B (0x00000244)
|
||||
#define NVC8B5_SET_SEMAPHORE_B_LOWER 31:0
|
||||
#define NVC8B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE (0x00000260)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET 1:0
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC8B5_SET_SRC_PHYS_MODE_FLA 9:9
|
||||
@@ -40,6 +42,7 @@
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET 1:0
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_PEERMEM (0x00000003)
|
||||
#define NVC8B5_LAUNCH_DMA (0x00000300)
|
||||
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
@@ -66,6 +69,7 @@
|
||||
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_TYPE 12:12
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
|
||||
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
|
||||
#define NVC8B5_LAUNCH_DMA_DST_TYPE 13:13
|
||||
#define NVC8B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
|
||||
|
||||
@@ -46,4 +46,8 @@
|
||||
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B (0x0000000B)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B (0x0000000B)
|
||||
/* valid ARCHITECTURE_GB20x implementation values */
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB206 (0x00000006)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB207 (0x00000007)
|
||||
|
||||
#endif /* _ctrl2080mc_h_ */
|
||||
|
||||
@@ -1,152 +1,28 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2003-2016 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __ga100_dev_runlist_h__
|
||||
#define __ga100_dev_runlist_h__
|
||||
/* This file is autogenerated. Do not edit */
|
||||
#define NV_RUNLIST 0x000003ff:0x00000000 /* RW--D */
|
||||
#define NV_CHRAM 0x00001fff:0x00000000 /* RW--D */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK 0x040 /* RW-4R */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION 3:0 /* RWIVF */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_DEFAULT_PRIV_LEVEL 15 /* RWI-V */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_ALL_LEVELS_ENABLED 0x0000000F /* RW--V */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_ONLY_LEVEL3_ENABLED 0x00000008 /* RW--V */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0 0:0 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1 1:1 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2 2:2 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3 3:3 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION 7:4 /* RWIVF */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_DEFAULT_PRIV_LEVEL 8 /* RWI-V */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_ALL_LEVELS_ENABLED 0x0F /* RW--V */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_ONLY_LEVEL3_ENABLED 0x08 /* RW--V */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0 4:4 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1 5:5 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2 6:6 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3 7:7 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_VIOLATION 8:8 /* RWIVF */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_VIOLATION_REPORT_ERROR 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_VIOLATION_SOLDIER_ON 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_VIOLATION 9:9 /* RWIVF */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_VIOLATION_REPORT_ERROR 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_VIOLATION_SOLDIER_ON 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL 10:10 /* RWIVF */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_BLOCKED 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_LOWERED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL 11:11 /* RWIVF */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_BLOCKED 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_LOWERED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_ENABLE 31:12 /* RWIVF */
|
||||
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_ENABLE_ALL_SOURCES_ENABLED 0x000FFFFF /* RWI-V */
|
||||
#define NV_RUNLIST_INT_CYA_SPARE 0x044 /* RW-4R */
|
||||
#define NV_RUNLIST_INT_CYA_SPARE__PRIV_LEVEL_MASK 0x040 /* */
|
||||
#define NV_RUNLIST_INT_CYA_SPARE_DATA 31:0 /* RWIUF */
|
||||
#define NV_RUNLIST_INT_CYA_SPARE_DATA_INIT 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE 0:0 /* */
|
||||
#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE_VAL 1:1 /* */
|
||||
#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE_VAL_1MTHD 0x00000000 /* */
|
||||
#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE_VAL_2MTHD 0x00000001 /* */
|
||||
#define NV_RUNLIST_CONFIG 0x000 /* RW-4R */
|
||||
#define NV_RUNLIST_CONFIG_SEM_ACQ_STRENGTH 0:0 /* RWIVF */
|
||||
#define NV_RUNLIST_CONFIG_SEM_ACQ_STRENGTH_WEAK 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_CONFIG_SEM_ACQ_STRENGTH_STRONG 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_CONFIG_SEM_REL_STRENGTH 4:4 /* RWIVF */
|
||||
#define NV_RUNLIST_CONFIG_SEM_REL_STRENGTH_WEAK 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_CONFIG_SEM_REL_STRENGTH_STRONG 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_CONFIG_L2_EVICT 9:8 /* RWIVF */
|
||||
#define NV_RUNLIST_CONFIG_L2_EVICT_FIRST 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_CONFIG_L2_EVICT_NORMAL 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_CONFIG_L2_EVICT_LAST 0x00000002 /* RW--V */
|
||||
#define NV_RUNLIST_CONFIG_SUBCH4 10:10 /* RWXVF */
|
||||
#define NV_RUNLIST_CONFIG_SUBCH4_INACTIVE 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_CONFIG_SUBCH4_ACTIVE 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_CHANNEL_CONFIG 0x004 /* R--4R */
|
||||
#define NV_RUNLIST_CHANNEL_CONFIG_NUM_CHANNELS_LOG2 3:0 /* C--UF */
|
||||
#define NV_RUNLIST_CHANNEL_CONFIG_NUM_CHANNELS_LOG2_2K 11 /* C---V */
|
||||
#define NV_RUNLIST_CHANNEL_CONFIG_CHRAM_BAR0_OFFSET 31:4 /* R-XVF */
|
||||
#define NV_RUNLIST_DOORBELL_CONFIG 0x008 /* R--4R */
|
||||
#define NV_RUNLIST_DOORBELL_CONFIG_ID 31:16 /* R-XVF */
|
||||
#define NV_RUNLIST_FB_CONFIG 0x00C /* R--4R */
|
||||
#define NV_RUNLIST_FB_CONFIG_FB_THREAD_ID 7:0 /* R-XVF */
|
||||
#define NV_RUNLIST_FB_CONFIG_ESC_ID 15:8 /* R-XVF */
|
||||
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG(i) (0x300+(i)*4) /* RW-4A */
|
||||
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG__SIZE_1 64 /* */
|
||||
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_MASK 11:0 /* */
|
||||
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_MASK_HW 10:0 /* RWIVF */
|
||||
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_MASK_HW_INIT 2047 /* RWI-V */
|
||||
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_SET 27:16 /* */
|
||||
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_SET_HW 26:16 /* RWIVF */
|
||||
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_SET_HW_INIT 0x0 /* RWI-V */
|
||||
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_PENDING_ENABLE 31:31 /* RWIVF */
|
||||
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_PENDING_ENABLE_TRUE 1 /* RW--V */
|
||||
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_PENDING_ENABLE_FALSE 0 /* RWI-V */
|
||||
#define NV_RUNLIST_PBDMA_CONFIG(i) (0x010+(i)*4) /* R--4A */
|
||||
#define NV_RUNLIST_PBDMA_CONFIG__SIZE_1 2 /* */
|
||||
#define NV_RUNLIST_PBDMA_CONFIG_PBDMA_ID 7:0 /* R-XUF */
|
||||
#define NV_RUNLIST_PBDMA_CONFIG_PBDMA_BAR0_OFFSET 25:10 /* R-XUF */
|
||||
#define NV_RUNLIST_PBDMA_CONFIG_VALID 31:31 /* R-XUF */
|
||||
#define NV_RUNLIST_PBDMA_CONFIG_VALID_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_PBDMA_CONFIG_VALID_FALSE 0x00000000 /* R---V */
|
||||
#define NV_RUNLIST_ACQ_PRETEST 0x020 /* RW-4R */
|
||||
#define NV_RUNLIST_ACQ_PRETEST_TIMEOUT 7:0 /* RWIUF */
|
||||
#define NV_RUNLIST_ACQ_PRETEST_TIMEOUT_8 0x00000008 /* RWI-V */
|
||||
#define NV_RUNLIST_ACQ_PRETEST_TIMESCALE 15:12 /* RWIUF */
|
||||
#define NV_RUNLIST_ACQ_PRETEST_TIMESCALE_0 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_ACQ_PRETEST_TIMESCALE_10 0x0000000a /* RW--V */
|
||||
#define NV_RUNLIST_IDLE_FILTER 0x024 /* RW-4R */
|
||||
#define NV_RUNLIST_IDLE_FILTER_PERIOD 7:0 /* RWIUF */
|
||||
#define NV_RUNLIST_IDLE_FILTER_PERIOD_INIT 0x00000050 /* RWI-V */
|
||||
#define NV_RUNLIST_IDLE_FILTER_PERIOD__PROD 0x00000064 /* RW--V */
|
||||
#define NV_RUNLIST_IDLE_FILTER_PERIOD_8 0x00000008 /* RW--V */
|
||||
#define NV_RUNLIST_IDLE_FILTER_PERIOD_32 0x00000020 /* RW--V */
|
||||
#define NV_RUNLIST_USERD_WRITEBACK 0x028 /* RW-4R */
|
||||
#define NV_RUNLIST_USERD_WRITEBACK_TIMER 7:0 /* RWIUF */
|
||||
#define NV_RUNLIST_USERD_WRITEBACK_TIMER_DISABLED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_USERD_WRITEBACK_TIMER_SHORT 0x00000003 /* RW--V */
|
||||
#define NV_RUNLIST_USERD_WRITEBACK_TIMER_100US 0x00000064 /* RWI-V */
|
||||
#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE 15:12 /* RWIUF */
|
||||
#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE_0 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE_SHORT 0x00000000 /* */
|
||||
#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE_100US 0x00000000 /* */
|
||||
#define NV_RUNLIST_ESCHED_CONFIG 0x02c /* C--4R */
|
||||
#define NV_RUNLIST_ESCHED_CONFIG_ESCHED_CLASS_ID 15:0 /* C--UF */
|
||||
#define NV_RUNLIST_ESCHED_CONFIG_ESCHED_CLASS_ID_VALUE 50543 /* C---V */
|
||||
#define NV_CHRAM_CHANNEL(i) (0x000+(i)*4) /* RW-4A */
|
||||
#define NV_CHRAM_CHANNEL__SIZE_1 2048 /* */
|
||||
#define NV_CHRAM_CHANNEL_WRITE_CONTROL 0:0 /* -WIVF */
|
||||
@@ -188,22 +64,6 @@
|
||||
#define NV_CHRAM_CHANNEL_ACQUIRE_FAIL 12:12 /* RWIVF */
|
||||
#define NV_CHRAM_CHANNEL_ACQUIRE_FAIL_FALSE 0x00000000 /* RWI-V */
|
||||
#define NV_CHRAM_CHANNEL_ACQUIRE_FAIL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_CHRAM_CHANNEL_STATUS 12:8 /* */
|
||||
#define NV_CHRAM_CHANNEL_STATUS_IDLE 0x00000000 /* */
|
||||
#define NV_CHRAM_CHANNEL_STATUS_PENDING 0x00000001 /* */
|
||||
#define NV_CHRAM_CHANNEL_STATUS_PENDING_CTX_RELOAD 0x00000003 /* */
|
||||
#define NV_CHRAM_CHANNEL_STATUS_PENDING_ACQUIRE_FAIL 0x00000011 /* */
|
||||
#define NV_CHRAM_CHANNEL_STATUS_PENDING_ACQUIRE_FAIL_CTX_RELOAD 0x00000013 /* */
|
||||
#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY 0x00000004 /* */
|
||||
#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY_AND_ENG_BUSY 0x0000000C /* */
|
||||
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY 0x00000008 /* */
|
||||
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING_ACQUIRE_FAIL 0x00000019 /* */
|
||||
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING 0x00000009 /* */
|
||||
#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY_CTX_RELOAD 0x00000006 /* */
|
||||
#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY_ENG_BUSY_CTX_RELOAD 0x0000000E /* */
|
||||
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_CTX_RELOAD 0x0000000A /* */
|
||||
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING_CTX_RELOAD 0x0000000B /* */
|
||||
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING_ACQUIRE_FAIL_CTX_RELOAD 0x0000001B /* */
|
||||
#define NV_CHRAM_CHANNEL_UPDATE 31:0 /* */
|
||||
#define NV_CHRAM_CHANNEL_UPDATE_ENABLE_CHANNEL 0x00000002 /* */
|
||||
#define NV_CHRAM_CHANNEL_UPDATE_DISABLE_CHANNEL 0x00000003 /* */
|
||||
@@ -211,461 +71,10 @@
|
||||
#define NV_CHRAM_CHANNEL_UPDATE_RESET_PBDMA_FAULTED 0x00000011 /* */
|
||||
#define NV_CHRAM_CHANNEL_UPDATE_RESET_ENG_FAULTED 0x00000021 /* */
|
||||
#define NV_CHRAM_CHANNEL_UPDATE_CLEAR_CHANNEL 0xFFFFFFFF /* */
|
||||
#define NV_RUNLIST_SUBMIT_BASE_LO 0x080 /* RW-4R */
|
||||
#define NV_RUNLIST_SUBMIT_BASE_LO_PTR_LO 31:12 /* RWIUF */
|
||||
#define NV_RUNLIST_SUBMIT_BASE_LO_PTR_LO_NULL 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET 1:0 /* RWIVF */
|
||||
#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET_VID_MEM 0x0 /* RWI-V */
|
||||
#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET_SYS_MEM_COHERENT 0x2 /* RW--V */
|
||||
#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET_SYS_MEM_NONCOHERENT 0x3 /* RW--V */
|
||||
#define NV_RUNLIST_SUBMIT_BASE_LO_PTR_ALIGN_SHIFT 12 /* */
|
||||
#define NV_RUNLIST_SUBMIT_BASE_HI 0x084 /* RW-4R */
|
||||
#define NV_RUNLIST_SUBMIT_BASE_HI_PTR_HI 7:0 /* RWIUF */
|
||||
#define NV_RUNLIST_SUBMIT_BASE_HI_PTR_HI_NULL 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_SUBMIT 0x088 /* RW-4R */
|
||||
#define NV_RUNLIST_SUBMIT_LENGTH 15:0 /* RWIUF */
|
||||
#define NV_RUNLIST_SUBMIT_LENGTH_ZERO 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_SUBMIT_LENGTH_MAX 0x0000ffff /* RW--V */
|
||||
#define NV_RUNLIST_SUBMIT_OFFSET 31:16 /* RWIVF */
|
||||
#define NV_RUNLIST_SUBMIT_OFFSET_ZERO 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_SUBMIT_INFO 0x08C /* R--4R */
|
||||
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID 13:0 /* */
|
||||
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_HW 10:0 /* R-IUF */
|
||||
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_HW_DEFAULT 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_VALID 14:14 /* R-IUF */
|
||||
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_VALID_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_VALID_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_SUBMIT_INFO_PENDING 15:15 /* R-IVF */
|
||||
#define NV_RUNLIST_SUBMIT_INFO_PENDING_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_SUBMIT_INFO_PENDING_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_OFFSET 31:16 /* R-IVF */
|
||||
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_OFFSET_ZERO 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK(i) (0x190+(i)*4) /* RW-4A */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK__SIZE_1 2 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION 3:0 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_DEFAULT_PRIV_LEVEL 15 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_ALL_LEVELS_ENABLED 0x0000000F /* RW--V */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_ONLY_LEVEL3_ENABLED 0x00000008 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0 0:0 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1 1:1 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2 2:2 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3 3:3 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION 7:4 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_DEFAULT_PRIV_LEVEL 15 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_ALL_LEVELS_ENABLED 0x0F /* RW--V */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_ONLY_LEVEL3_ENABLED 0x08 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0 4:4 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1 5:5 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2 6:6 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3 7:7 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_ENABLE 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_DISABLE 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_VIOLATION 8:8 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_VIOLATION_REPORT_ERROR 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_VIOLATION_SOLDIER_ON 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_VIOLATION 9:9 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_VIOLATION_REPORT_ERROR 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_VIOLATION_SOLDIER_ON 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL 10:10 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_BLOCKED 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_LOWERED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL 11:11 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_BLOCKED 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_LOWERED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_ENABLE 31:12 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_ENABLE_ALL_SOURCES_ENABLED 0x000FFFFF /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_VECTORID(i) (0x160+(i)*4) /* RW-4A */
|
||||
#define NV_RUNLIST_INTR_VECTORID__SIZE_1 2 /* */
|
||||
#define NV_RUNLIST_INTR_VECTORID__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /* */
|
||||
#define NV_RUNLIST_INTR_VECTORID_VECTOR 11:0 /* RWXUF */
|
||||
#define NV_RUNLIST_INTR_VECTORID_GSP 30:30 /* RWIUF */
|
||||
#define NV_RUNLIST_INTR_VECTORID_GSP_DISABLE 0 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_VECTORID_GSP_ENABLE 1 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_VECTORID_CPU 31:31 /* RWIUF */
|
||||
#define NV_RUNLIST_INTR_VECTORID_CPU_DISABLE 0 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_VECTORID_CPU_ENABLE 1 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_RETRIGGER(i) (0x180+(i)*4) /* -W-4A */
|
||||
#define NV_RUNLIST_INTR_RETRIGGER__SIZE_1 2 /* */
|
||||
#define NV_RUNLIST_INTR_RETRIGGER__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /* */
|
||||
#define NV_RUNLIST_INTR_RETRIGGER_TRIGGER 0:0 /* -W-VF */
|
||||
#define NV_RUNLIST_INTR_RETRIGGER_TRIGGER_TRUE 1 /* -W--V */
|
||||
#define NV_RUNLIST_INTR_0 0x100 /* RW-4R */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0 0:0 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0_NOT_PENDING 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0_PENDING 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0_RESET 0x00000001 /* -W--V */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1 1:1 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1_NOT_PENDING 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1_PENDING 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1_RESET 0x00000001 /* -W--V */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2 2:2 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2_NOT_PENDING 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2_PENDING 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2_RESET 0x00000001 /* -W--V */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG(i) (i):(i) /* */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG__SIZE_1 3 /* */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG_NOT_PENDING 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG_PENDING 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG_RESET 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE(i) ((i)+1):((i)+1) /* */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE__SIZE_1 2 /* */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE_NOT_PENDING 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE_PENDING 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE_RESET 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_IDLE 4:4 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_IDLE_NOT_PENDING 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_IDLE_PENDING 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_IDLE_RESET 0x00000001 /* -W--V */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE 5:5 /* RWXVF */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE_NOT_PENDING 0x00000000 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE_PENDING 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE_RESET 0x00000001 /* -W--V */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE 6:6 /* RWXVF */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_NOT_PENDING 0x00000000 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_PENDING 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_RESET 0x00000001 /* -W--V */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE 7:7 /* RWXVF */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE_NOT_PENDING 0x00000000 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE_PENDING 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE_RESET 0x00000001 /* -W--V */
|
||||
#define NV_RUNLIST_INTR_0_BAD_TSG 12:12 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_BAD_TSG_NOT_PENDING 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INTR_0_BAD_TSG_PENDING 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_BAD_TSG_RESET 0x00000001 /* -W--V */
|
||||
#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE 8:8 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE_NOT_PENDING 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE_PENDING 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE_RESET 0x00000001 /* -W--V */
|
||||
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_0 16:16 /* R-XVF */
|
||||
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_0_NOT_PENDING 0x00000000 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_0_PENDING 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_0 17:17 /* R-XVF */
|
||||
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_0_NOT_PENDING 0x00000000 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_0_PENDING 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_1 18:18 /* R-XVF */
|
||||
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_1_NOT_PENDING 0x00000000 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_1_PENDING 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_1 19:19 /* R-XVF */
|
||||
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_1_NOT_PENDING 0x00000000 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_1_PENDING 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j(i,j) (16+(i)+(j)*2):(16+(i)+(j)*2) /* */
|
||||
#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j__SIZE_1 2 /* */
|
||||
#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j__SIZE_2 2 /* */
|
||||
#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j_NOT_PENDING 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j_PENDING 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE 9:9 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE_NOT_PENDING 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE_PENDING 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE_RESET 0x00000001 /* -W--V */
|
||||
#define NV_RUNLIST_INTR_0_RUNLIST_EVENT 9:9 /* */
|
||||
#define NV_RUNLIST_INTR_0_MASK_SET 0x110 /* RW-4R */
|
||||
#define NV_RUNLIST_INTR_0_MASK_SET_TSG_PREEMPT_COMPLETE 8:8 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_MASK_SET_TSG_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_MASK_SET_TSG_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_MASK_CLEAR 0x118 /* RW-4R */
|
||||
#define NV_RUNLIST_INTR_0_MASK_CLEAR_TSG_PREEMPT_COMPLETE 8:8 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_MASK_CLEAR_TSG_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_MASK_CLEAR_TSG_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE(i) (0x120+(i)*8) /* RW-4A */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE__SIZE_1 2 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG0 0:0 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG0_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG0_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG1 1:1 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG1_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG1_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG2 2:2 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG2_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG2_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG(i) (i):(i) /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG__SIZE_1 3 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG_DISABLED 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG_ENABLED 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE(i) ((i)+1):((i)+1) /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE__SIZE_1 2 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE_DISABLED 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE_ENABLED 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_IDLE 4:4 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_IDLE_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_IDLE_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_AND_ENG_IDLE 5:5 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_AND_ENG_IDLE_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_AND_ENG_IDLE_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE 6:6 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE 7:7 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_TSG_PREEMPT_COMPLETE 8:8 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_TSG_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_TSG_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_PREEMPT_COMPLETE 9:9 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_BAD_TSG 12:12 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_BAD_TSG_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_BAD_TSG_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_0 16:16 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_0_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_0_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_0 17:17 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_0_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_0_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_1 18:18 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_1_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_1_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_1 19:19 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_1_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_1_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j(i,j) (16+(i)+(j)*2):(16+(i)+(j)*2) /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j__SIZE_1 2 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j__SIZE_2 2 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j_DISABLED 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j_ENABLED 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE(i) (0x140+(i)*8) /* RW-4A */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE__SIZE_1 2 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG0 0:0 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG0_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG0_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG1 1:1 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG1_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG1_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG2 2:2 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG2_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG2_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG(i) (i):(i) /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG__SIZE_1 3 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG_DISABLED 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG_ENABLED 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE(i) ((i)+1):((i)+1) /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE__SIZE_1 2 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE_DISABLED 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE_ENABLED 0x00000001 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_IDLE 4:4 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_IDLE_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_IDLE_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_AND_ENG_IDLE 5:5 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_AND_ENG_IDLE_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_AND_ENG_IDLE_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE 6:6 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE 7:7 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_TSG_PREEMPT_COMPLETE 8:8 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_TSG_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_TSG_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_PREEMPT_COMPLETE 9:9 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_BAD_TSG 12:12 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_BAD_TSG_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_BAD_TSG_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_0 16:16 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_0_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_0_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_0 17:17 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_0_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_0_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_1 18:18 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_1_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_1_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_1 19:19 /* RWIVF */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_1_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_1_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j(i,j) (16+(i)+(j)*2):(16+(i)+(j)*2) /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j__SIZE_1 2 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j__SIZE_2 2 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j_DISABLED 0x00000000 /* */
|
||||
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j_ENABLED 0x00000001 /* */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO(i) (0x224+(i)*64) /* R--4A */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO__SIZE_1 3 /* */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_PREV_TSGID 13:0 /* R-IUF */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_PREV_TSGID_DEFAULT 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE 15:14 /* R-IUF */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE_LOAD 0x00000001 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE_SAVE 0x00000002 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE_SWITCH 0x00000003 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_NEXT_TSGID 29:16 /* R-IUF */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_NEXT_TSGID_DEFAULT 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS 31:30 /* R-IUF */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_AWAITING_ACK 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_ENG_WAS_RESET 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_ACK_RECEIVED 0x00000002 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_DROPPED_TIMEOUT 0x00000003 /* R---V */
|
||||
#define NV_RUNLIST_INFO 0x108 /* R--4R */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_IDLE_INTR_ARM 0:0 /* R-IUF */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_IDLE_INTR_ARM_UNARMED 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_IDLE_INTR_ARM_ARMED 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_INTR_ARM 1:1 /* R-IUF */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_INTR_ARM_UNARMED 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_INTR_ARM_ARMED 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_AND_ENG_IDLE_INTR_ARM 4:4 /* R-IUF */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_AND_ENG_IDLE_INTR_ARM_UNARMED 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_AND_ENG_IDLE_INTR_ARM_ARMED 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_AND_ENG_IDLE_INTR_ARM 5:5 /* R-IUF */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_AND_ENG_IDLE_INTR_ARM_UNARMED 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_AND_ENG_IDLE_INTR_ARM_ARMED 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INFO_ENG_IDLE 8:8 /* R-IUF */
|
||||
#define NV_RUNLIST_INFO_ENG_IDLE_FALSE 0x00000000 /* R---V */
|
||||
#define NV_RUNLIST_INFO_ENG_IDLE_TRUE 0x00000001 /* R-I-V */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_IDLE 9:9 /* R-IUF */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_IDLE_FALSE 0x00000000 /* R---V */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_IDLE_TRUE 0x00000001 /* R-I-V */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_FETCH_STATUS 10:10 /* R-IVF */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_FETCH_STATUS_IDLE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_FETCH_STATUS_BUSY 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INFO_ACQUIRE_STILL_PENDING 12:12 /* R-IUF */
|
||||
#define NV_RUNLIST_INFO_ACQUIRE_STILL_PENDING_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INFO_ACQUIRE_STILL_PENDING_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_FETCH_NACKED 13:13 /* R-IUF */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_FETCH_NACKED_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INFO_RUNLIST_FETCH_NACKED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_BAD_TSG 0x174 /* R--4R */
|
||||
#define NV_RUNLIST_INTR_BAD_TSG_CODE 3:0 /* R-IVF */
|
||||
#define NV_RUNLIST_INTR_BAD_TSG_CODE_NO_ERROR 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_INTR_BAD_TSG_CODE_ZERO_LENGTH_TSG 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_INTR_BAD_TSG_CODE_MAX_LENGTH_EXCEEDED 0x00000002 /* R---V */
|
||||
#define NV_RUNLIST_INTR_BAD_TSG_CODE_RUNLIST_OVERFLOW 0x00000003 /* R---V */
|
||||
#define NV_RUNLIST_INTR_BAD_TSG_CODE_EXPECTED_A_CHID_ENTRY 0x00000004 /* R---V */
|
||||
#define NV_RUNLIST_INTR_BAD_TSG_CODE_EXPECTED_A_TSG_HEADER 0x00000005 /* R---V */
|
||||
#define NV_RUNLIST_INTR_BAD_TSG_CODE_INVALID_RUNQUEUE 0x00000006 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG(i) (0x220+(i)*64) /* RW-4A */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG__SIZE_1 3 /* */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_PERIOD 30:0 /* RWIVF */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_PERIOD_INIT 0x003fffff /* RWI-V */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_PERIOD_MAX 0x7fffffff /* RW--V */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_DETECTION 31:31 /* RWIVF */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_DETECTION_DISABLED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_DETECTION_ENABLED 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG 0x050 /* RW-4R */
|
||||
#define NV_RUNLIST_BLKCG_IDLE_CG_DLY_CNT 5:0 /* RWIVF */
|
||||
#define NV_RUNLIST_BLKCG_IDLE_CG_DLY_CNT_INIT 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG_IDLE_CG_DLY_CNT__PROD 0x00000002 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG_IDLE_CG_EN 6:6 /* RWIVF */
|
||||
#define NV_RUNLIST_BLKCG_IDLE_CG_EN_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG_IDLE_CG_EN_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG_IDLE_CG_EN__PROD 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG_STATE_CG_EN 7:7 /* */
|
||||
#define NV_RUNLIST_BLKCG_STATE_CG_EN_ENABLED 0x00000001 /* */
|
||||
#define NV_RUNLIST_BLKCG_STATE_CG_EN_DISABLED 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG_STATE_CG_EN__PROD 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG_STALL_CG_DLY_CNT 13:8 /* */
|
||||
#define NV_RUNLIST_BLKCG_STALL_CG_DLY_CNT_INIT 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG_STALL_CG_DLY_CNT__PROD 0x00000002 /* */
|
||||
#define NV_RUNLIST_BLKCG_STALL_CG_EN 14:14 /* RWIVF */
|
||||
#define NV_RUNLIST_BLKCG_STALL_CG_EN_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG_STALL_CG_EN_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG_STALL_CG_EN__PROD 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN 15:15 /* */
|
||||
#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN_ENABLED 0x00000001 /* */
|
||||
#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN_DISABLED 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN__PROD 0x00000001 /* */
|
||||
#define NV_RUNLIST_BLKCG_WAKEUP_DLY_CNT 19:16 /* RWIVF */
|
||||
#define NV_RUNLIST_BLKCG_WAKEUP_DLY_CNT_INIT 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG_WAKEUP_DLY_CNT__PROD 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG_THROT_CLK_CNT 23:20 /* */
|
||||
#define NV_RUNLIST_BLKCG_THROT_CLK_CNT_INIT 0x0000000f /* */
|
||||
#define NV_RUNLIST_BLKCG_THROT_CLK_CNT_FULLSPEED 0x0000000f /* */
|
||||
#define NV_RUNLIST_BLKCG_THROT_CLK_CNT__PROD 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG_DI_DT_SKEW_VAL 27:24 /* */
|
||||
#define NV_RUNLIST_BLKCG_DI_DT_SKEW_VAL_INIT 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG_DI_DT_SKEW_VAL__PROD 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG_THROT_CLK_EN 28:28 /* */
|
||||
#define NV_RUNLIST_BLKCG_THROT_CLK_EN_ENABLED 0x00000001 /* */
|
||||
#define NV_RUNLIST_BLKCG_THROT_CLK_EN_DISABLED 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG_THROT_CLK_EN__PROD 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER 29:29 /* */
|
||||
#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER_EN 0x00000001 /* */
|
||||
#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER_DIS 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER__PROD 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG_PAUSE_CG_EN 30:30 /* */
|
||||
#define NV_RUNLIST_BLKCG_PAUSE_CG_EN_ENABLED 0x00000001 /* */
|
||||
#define NV_RUNLIST_BLKCG_PAUSE_CG_EN_DISABLED 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG_PAUSE_CG_EN__PROD 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG_HALT_CG_EN 31:31 /* */
|
||||
#define NV_RUNLIST_BLKCG_HALT_CG_EN_ENABLED 0x00000001 /* */
|
||||
#define NV_RUNLIST_BLKCG_HALT_CG_EN_DISABLED 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG_HALT_CG_EN__PROD 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG1 0x054 /* RW-4R */
|
||||
#define NV_RUNLIST_BLKCG1_MONITOR_CG_EN 0:0 /* RWIVF */
|
||||
#define NV_RUNLIST_BLKCG1_MONITOR_CG_EN_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_MONITOR_CG_EN_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG 16:1 /* */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_ENABLED 0x00000000 /* */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_DISABLED 0x0000FFFF /* */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG__PROD 0x00000001 /* */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_RLP 1:1 /* RWIVF */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_RLP_ENABLED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_RLP_DISABLED 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_RLP__PROD 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_EVH 3:3 /* RWIVF */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_EVH_ENABLED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_EVH_DISABLED 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_EVH__PROD 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_EISM 7:7 /* RWIVF */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_EISM_ENABLED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_EISM_DISABLED 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_EISM__PROD 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_LB 8:8 /* RWIVF */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_LB_ENABLED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_LB_DISABLED 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_LB__PROD 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL 9:9 /* RWIVF */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL_ENABLED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL_DISABLED 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL__PROD 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP 10:10 /* RWIVF */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP_ENABLED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP_DISABLED 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP__PROD 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB 11:11 /* RWIVF */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB_ENABLED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB_DISABLED 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB__PROD 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PRI 13:13 /* RWIVF */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PRI_ENABLED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PRI_DISABLED 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_PRI__PROD 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_CHSW 14:14 /* RWIVF */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_CHSW_ENABLED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_CHSW_DISABLED 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_CHSW__PROD 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_XBAR 15:15 /* RWIVF */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_XBAR_ENABLED 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_XBAR_DISABLED 0x00000001 /* RWI-V */
|
||||
#define NV_RUNLIST_BLKCG1_SLCG_XBAR__PROD 0x00000000 /* RW--V */
|
||||
#define NV_RUNLIST_SLCG_MISC 0x05C /* RW-4R */
|
||||
#define NV_RUNLIST_SLCG_MISC_EXTRA_BUSY_CLKS 3:0 /* RWIVF */
|
||||
#define NV_RUNLIST_SLCG_MISC_EXTRA_BUSY_CLKS_ZERO 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_INTERNAL_DOORBELL 0x090 /* -W-4R */
|
||||
#define NV_RUNLIST_INTERNAL_DOORBELL_CHID 11:0 /* */
|
||||
#define NV_RUNLIST_INTERNAL_DOORBELL_CHID_HW 10:0 /* -WXUF */
|
||||
#define NV_RUNLIST_INTERNAL_DOORBELL_GFID 21:16 /* -WXUF */
|
||||
#define NV_RUNLIST_SCHED_DISABLE 0x094 /* RW-4R */
|
||||
#define NV_RUNLIST_SCHED_DISABLE_RUNLIST 0:0 /* RWIVF */
|
||||
#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_ENABLED 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_DISABLED 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_FALSE 0x00000000 /* */
|
||||
#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_TRUE 0x00000001 /* */
|
||||
#define NV_RUNLIST_PREEMPT 0x098 /* RW-4R */
|
||||
#define NV_RUNLIST_PREEMPT_ID 11:0 /* */
|
||||
#define NV_RUNLIST_PREEMPT_ID_HW 10:0 /* RWIUF */
|
||||
@@ -679,104 +88,4 @@
|
||||
#define NV_RUNLIST_PREEMPT_TYPE 25:24 /* RWIVF */
|
||||
#define NV_RUNLIST_PREEMPT_TYPE_RUNLIST 0x00000000 /* RWI-V */
|
||||
#define NV_RUNLIST_PREEMPT_TYPE_TSG 0x00000001 /* RW--V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0(i) (0x200+(i)*64) /* R--4A */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0__SIZE_1 3 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_TSGID 11:0 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_TSGID_HW 10:0 /* R-XUF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS 15:13 /* R-IVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_INVALID 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_VALID 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_CTXSW_SAVE 0x00000005 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_CTXSW_LOAD 0x00000006 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_CTXSW_SWITCH 0x00000007 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_CTX 13:13 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_CTX_INVALID 0x00000000 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_CTX_VALID 0x00000001 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_CTXLOAD 14:14 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_CTXLOAD_NOT_IN_PROGRESS 0x00000000 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_CTXLOAD_IN_PROGRESS 0x00000001 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_CTXSW 15:15 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_CTXSW_NOT_IN_PROGRESS 0x00000000 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_CTXSW_IN_PROGRESS 0x00000001 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_NEXT_TSGID 27:16 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_NEXT_TSGID_HW 26:16 /* R-XUF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_ENG_RELOAD 29:29 /* R-IVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_ENG_RELOAD_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_ENG_RELOAD_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_FAULTED 30:30 /* R-IVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_FAULTED_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_FAULTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_ENGINE 31:31 /* R-IVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_ENGINE_IDLE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS0_ENGINE_BUSY 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS1(i) (0x204+(i)*64) /* R--4A */
|
||||
#define NV_RUNLIST_ENGINE_STATUS1__SIZE_1 3 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS1_GFID 5:0 /* R-XVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID 13:8 /* R-XVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS1_INTR_ID 20:16 /* R-XVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS1_GFID_VALID 30:30 /* R-IVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS1_GFID_VALID_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS1_GFID_VALID_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID_VALID 31:31 /* R-IVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID_VALID_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID_VALID_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL(i,j) (0x208+(i)*64+(j)*4) /* R--4A */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL__SIZE_1 3 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL__SIZE_2 2 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_CHID 11:0 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_CHID_HW 10:0 /* R-XUF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_VALID 15:15 /* R-IVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_VALID_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_VALID_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_CHID 27:16 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_CHID_HW 26:16 /* R-XUF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_VALID 31:31 /* R-IVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_VALID_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_VALID_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG(i) (0x228+(i)*64) /* R--4A */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG__SIZE_1 3 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_IF_EN 0:0 /* R-IVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_IF_EN_DISABLED 0x00000000 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_IF_EN_ENABLED 0x00000001 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_NO_CREDITS 8:8 /* R-IVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_NO_CREDITS_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_NO_CREDITS_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_WFI 12:12 /* R-IVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_WFI_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_WFI_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_NO_CREDITS 16:16 /* R-IVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_NO_CREDITS_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_NO_CREDITS_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_WFI 20:20 /* R-IVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_WFI_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_WFI_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_ENGINE_ID 29:24 /* R-XUF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_INST(i) (0x210+(i)*64) /* R--4A */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_INST__SIZE_1 3 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET 1:0 /* R-XUF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET_VID_MEM 0x00000000 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET_SYS_MEM_COHERENT 0x00000002 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET_SYS_MEM_NONCOHERENT 0x00000003 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_INST_VALID 11:11 /* R-IVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_INST_VALID_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_INST_VALID_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_INST_PTR_LO 31:12 /* R-XUF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_INST_HI(i) (0x214+(i)*64) /* R--4A */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_INST_HI__SIZE_1 3 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_INST_HI_PTR_HI 31:0 /* R-XUF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_INST_HI_PTR_HI_ZERO 0x00000000 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST(i) (0x218+(i)*64) /* R--4A */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST__SIZE_1 3 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET 1:0 /* R-XUF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET_VID_MEM 0x00000000 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET_SYS_MEM_COHERENT 0x00000002 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET_SYS_MEM_NONCOHERENT 0x00000003 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_VALID 11:11 /* R-IVF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_VALID_FALSE 0x00000000 /* R-I-V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_VALID_TRUE 0x00000001 /* R---V */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_PTR_LO 31:12 /* R-XUF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI(i) (0x21C+(i)*64) /* R--4A */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI__SIZE_1 3 /* */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI_PTR_HI 31:0 /* R-XUF */
|
||||
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI_PTR_HI_ZERO 0x00000000 /* R---V */
|
||||
#endif // __ga100_dev_runlist_h__
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
*/
|
||||
|
||||
#include "nv-kthread-q.h"
|
||||
#include "nv-list-helpers.h"
|
||||
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/interrupt.h>
|
||||
@@ -43,17 +42,6 @@
|
||||
// into the queue, and those functions will be run in the context of the
|
||||
// queue's kthread.
|
||||
|
||||
#ifndef WARN
|
||||
// Only *really* old kernels (2.6.9) end up here. Just use a simple printk
|
||||
// to implement this, because such kernels won't be supported much longer.
|
||||
#define WARN(condition, format...) ({ \
|
||||
int __ret_warn_on = !!(condition); \
|
||||
if (unlikely(__ret_warn_on)) \
|
||||
printk(KERN_ERR format); \
|
||||
unlikely(__ret_warn_on); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#define NVQ_WARN(fmt, ...) \
|
||||
do { \
|
||||
if (in_interrupt()) { \
|
||||
|
||||
@@ -78,6 +78,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_host.c
|
||||
|
||||
@@ -61,6 +61,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_arch_invalidate_secondary_tlb
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_device_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += get_dev_pagemap_has_pgmap_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
|
||||
|
||||
@@ -2354,7 +2354,9 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
//
|
||||
// When a page is in its preferred location, a fault from another processor will
|
||||
// not cause a migration if a mapping for that page from that processor can be
|
||||
// established without migrating the page.
|
||||
// established without migrating the page. Individual faulting pages will still
|
||||
// migrate to service immediate access needs, but prefetch operations will not
|
||||
// pull additional pages away from their preferred location.
|
||||
//
|
||||
// If the specified processor is a GPU and the GPU is not a NUMA node and the
|
||||
// input range is system-allocated pageable memory and the system supports
|
||||
@@ -2951,619 +2953,6 @@ NV_STATUS UvmIs8Supported(NvU32 *is8Supported);
|
||||
// Tools API
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmDebugGetVersion
|
||||
//
|
||||
// Returns the version number of the UVM debug library
|
||||
// See uvm_types.h for valid verion numbers, e.g. UVM_DEBUG_V1
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
unsigned UvmDebugVersion(void);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmDebugCreateSession
|
||||
//
|
||||
// Creates a handle for a debugging session.
|
||||
//
|
||||
// When the client initializes, it will pass in a process handle and get a
|
||||
// session ID for itself. Subsequent calls to the UVM API will take in that
|
||||
// session ID.
|
||||
//
|
||||
// There are security requirements to this call.
|
||||
// One of the following must be true:
|
||||
// 1. The session owner must be running as an elevated user
|
||||
// 2. The session owner and target must belong to the same user and the
|
||||
// session owner is at least as privileged as the target.
|
||||
//
|
||||
// For CUDA 6.0 we can create at most 64 sessions per debugger process.
|
||||
//
|
||||
// Arguments:
|
||||
// pid: (INPUT)
|
||||
// Process id for which the debugging session will be created
|
||||
//
|
||||
// session: (OUTPUT)
|
||||
// Handle to the debugging session associated to that pid.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_PID_NOT_FOUND:
|
||||
// pid is invalid/ not associated with UVM.
|
||||
//
|
||||
// NV_ERR_INSUFFICIENT_PERMISSIONS:
|
||||
// Function fails the security check.
|
||||
//
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES:
|
||||
// Attempt is made to allocate more than 64 sessions per process.
|
||||
//
|
||||
// NV_ERR_BUSY_RETRY:
|
||||
// internal resources are blocked by other threads.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmDebugCreateSession(unsigned pid,
|
||||
UvmDebugSession *session);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmDebugDestroySession
|
||||
//
|
||||
// Destroys a debugging session.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
// Handle to the debugging session associated to that pid.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// session is invalid.
|
||||
//
|
||||
// NV_ERR_BUSY_RETRY:
|
||||
// ebug session is in use by some other thread.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmDebugDestroySession(UvmDebugSession session);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmDebugCountersEnable
|
||||
//
|
||||
// Enables the counters following the user specified configuration.
|
||||
//
|
||||
// The user must fill a list with the configuration of the counters it needs to
|
||||
// either enable or disable. It can only enable one counter per line.
|
||||
//
|
||||
// The structure (UvmCounterConfig) has several fields:
|
||||
// - scope: Please see the UvmCounterScope enum (above), for details.
|
||||
// - name: Name of the counter. Please check UvmCounterName for list.
|
||||
// - gpuid: Identifies the GPU for which the counter will be enabled/disabled
|
||||
// This parameter is ignored in AllGpu scopes.
|
||||
// - state: A value of 0 will disable the counter, a value of 1 will enable
|
||||
// the counter.
|
||||
//
|
||||
// Note: All counters are refcounted, that means that a counter will only be
|
||||
// disable when its refcount reached zero.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
// Handle to the debugging session.
|
||||
//
|
||||
// config: (INPUT)
|
||||
// pointer to configuration list as per above.
|
||||
//
|
||||
// count: (INPUT)
|
||||
// number of entries in the config list.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INSUFFICIENT_PERMISSIONS:
|
||||
// Function fails the security check
|
||||
//
|
||||
// RM_INVALID_ARGUMENT:
|
||||
// debugging session is invalid or one of the counter lines is invalid.
|
||||
// If call returns this value, no action specified by the config list
|
||||
// will have taken effect.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// UvmCounterScopeGlobalSingleGpu is not supported for CUDA 6.0
|
||||
//
|
||||
// NV_ERR_BUSY_RETRY:
|
||||
// the debug session is in use by some other thread.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmDebugCountersEnable(UvmDebugSession session,
|
||||
UvmCounterConfig *config,
|
||||
unsigned count);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmDebugGetCounterHandle
|
||||
//
|
||||
// Returns handle to a particular counter. This is an opaque handle that the
|
||||
// implementation uses in order to find your counter, later. This handle can be
|
||||
// used in subsequent calls to UvmDebugGetCounterVal().
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
// Handle to the debugging session.
|
||||
//
|
||||
// scope: (INPUT)
|
||||
// Scope that will be mapped.
|
||||
//
|
||||
// counterName: (INPUT)
|
||||
// Name of the counter in that scope.
|
||||
//
|
||||
// gpu: (INPUT)
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition of the scoped GPU.
|
||||
// This parameter is ignored in AllGpu scopes.
|
||||
//
|
||||
// pCounterHandle: (OUTPUT)
|
||||
// Handle to the counter address.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// Specified scope/gpu pair or session id is invalid
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// UvmCounterScopeGlobalSingleGpu is not supported for CUDA 6.0
|
||||
//
|
||||
// NV_ERR_BUSY_RETRY:
|
||||
// debug session is in use by some other thread.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmDebugGetCounterHandle(UvmDebugSession session,
|
||||
UvmCounterScope scope,
|
||||
UvmCounterName counterName,
|
||||
NvProcessorUuid gpu,
|
||||
NvUPtr *pCounterHandle);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmDebugGetCounterVal
|
||||
//
|
||||
// Returns the counter value specified by the counter name.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
// Handle to the debugging session.
|
||||
//
|
||||
// counterHandleArray: (INPUT)
|
||||
// Array of counter handles
|
||||
//
|
||||
// handleCount: (INPUT)
|
||||
// Number of handles in the pPCounterHandle array.
|
||||
//
|
||||
// counterValArray: (OUTPUT)
|
||||
// Array of counter values corresponding to the handles.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// one of the specified handles is invalid.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmDebugGetCounterVal(UvmDebugSession session,
|
||||
NvUPtr *counterHandleArray,
|
||||
unsigned handleCount,
|
||||
unsigned long long *counterValArray);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmEventQueueCreate
|
||||
//
|
||||
// This call creates an event queue of the given size.
|
||||
// No events are added in the queue until they are enabled by the user.
|
||||
// Event queue data is visible to the user even after the target process dies
|
||||
// if the session is active and queue is not freed.
|
||||
//
|
||||
// User doesn't need to serialize multiple UvmEventQueueCreate calls as
|
||||
// each call creates a new queue state associated with the returned queue
|
||||
// handle.
|
||||
//
|
||||
// Arguments:
|
||||
// sessionHandle: (INPUT)
|
||||
// Handle to the debugging session.
|
||||
//
|
||||
// queueHandle: (OUTPUT)
|
||||
// Handle to created queue.
|
||||
//
|
||||
// queueSize: (INPUT)
|
||||
// Size of the event queue buffer in units of UvmEventEntry's.
|
||||
// This quantity must be > 1.
|
||||
//
|
||||
// notificationCount: (INPUT)
|
||||
// Number of entries after which the user should be notified that
|
||||
// there are events to fetch.
|
||||
// User is notified when queueEntries >= notification count.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INSUFFICIENT_PERMISSIONS:
|
||||
// Function fails the security check.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// One of the arguments is invalid.
|
||||
//
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES:
|
||||
// it's not possible to allocate a queue of requested size.
|
||||
//
|
||||
// NV_ERR_BUSY_RETRY:
|
||||
// internal resources are blocked by other threads.
|
||||
//
|
||||
// NV_ERR_PID_NOT_FOUND:
|
||||
// queue create call is made on a session after the target dies.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmEventQueueCreate(UvmDebugSession sessionHandle,
|
||||
UvmEventQueueHandle *queueHandle,
|
||||
NvS64 queueSize,
|
||||
NvU64 notificationCount,
|
||||
UvmEventTimeStampType timeStampType);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmEventQueueDestroy
|
||||
//
|
||||
// This call frees all interal resources associated with the queue, including
|
||||
// unpinning of the memory associated with that queue. Freeing user buffer is
|
||||
// responsibility of a caller. Event queue might be also destroyed as a side
|
||||
// effect of destroying a session associated with this queue.
|
||||
//
|
||||
// User needs to ensure that a queue handle is not deleted while some other
|
||||
// thread is using the same queue handle.
|
||||
//
|
||||
// Arguments:
|
||||
// sessionHandle: (INPUT)
|
||||
// Handle to the debugging session.
|
||||
//
|
||||
// queueHandle: (INPUT)
|
||||
// Handle to the queue which is to be freed
|
||||
//
|
||||
// Error codes:
|
||||
// RM_ERR_NOT_PERMITTED:
|
||||
// Function fails the security check.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// One of the arguments is invalid.
|
||||
//
|
||||
// NV_ERR_BUSY_RETRY:
|
||||
// internal resources are blocked by other threads.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmEventQueueDestroy(UvmDebugSession sessionHandle,
|
||||
UvmEventQueueHandle queueHandle);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmEventEnable
|
||||
//
|
||||
// This call enables a particular event type in the event queue.
|
||||
// All events are disabled by default when a queue is created.
|
||||
//
|
||||
// This API does not access the queue state maintained in the user
|
||||
// library so the user doesn't need to acquire a lock to protect the queue
|
||||
// state.
|
||||
//
|
||||
// Arguments:
|
||||
// sessionHandle: (INPUT)
|
||||
// Handle to the debugging session.
|
||||
//
|
||||
// queueHandle: (INPUT)
|
||||
// Handle to the queue where events are to be enabled
|
||||
//
|
||||
// eventTypeFlags: (INPUT)
|
||||
// This field specifies the event types to be enabled. For example:
|
||||
// To enable migration events and memory violations: pass flags
|
||||
// "UVM_EVENT_ENABLE_MEMORY_VIOLATION |UVM_EVENT_ENABLE_MIGRATION"
|
||||
//
|
||||
// Error codes:
|
||||
// RM_ERR_NOT_PERMITTED:
|
||||
// Function fails the security check.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// One of the arguments is invalid.
|
||||
//
|
||||
// NV_ERR_PID_NOT_FOUND:
|
||||
// this call is made after the target process dies
|
||||
//
|
||||
// NV_ERR_BUSY_RETRY:
|
||||
// internal resources are blocked by other threads.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmEventEnable(UvmDebugSession sessionHandle,
|
||||
UvmEventQueueHandle queueHandle,
|
||||
unsigned eventTypeFlags);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmEventDisable
|
||||
//
|
||||
// This call disables a particular event type in the queue.
|
||||
//
|
||||
// This API does not access the queue state maintained in the user
|
||||
// library so the user doesn't need to acquire a lock to protect the queue
|
||||
// state.
|
||||
//
|
||||
// Arguments:
|
||||
// sessionHandle: (INPUT)
|
||||
// Handle to the debugging session.
|
||||
//
|
||||
// queueHandle: (INPUT)
|
||||
// Handle to the queue where events are to be enabled
|
||||
//
|
||||
// eventTypeFlags: (INPUT)
|
||||
// This field specifies the event types to be enabled
|
||||
// For example: To enable migration events and memory violations:
|
||||
// pass "UVM_EVENT_ENABLE_MEMORY_VIOLATION |UVM_EVENT_ENABLE_MIGRATION"
|
||||
// as flags
|
||||
//
|
||||
// Error codes:
|
||||
// RM_ERR_NOT_PERMITTED:
|
||||
// Function fails the security check.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// One of the arguments is invalid.
|
||||
//
|
||||
// NV_ERR_PID_NOT_FOUND:
|
||||
// this call is made after the target process dies
|
||||
//
|
||||
// NV_ERR_BUSY_RETRY:
|
||||
// internal resources are blocked by other threads.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmEventDisable(UvmDebugSession sessionHandle,
|
||||
UvmEventQueueHandle queueHandle,
|
||||
unsigned eventTypeFlags);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmEventWaitOnQueueHandles
|
||||
//
|
||||
// User is notified when queueEntries >= notification count.
|
||||
// This call does a blocking wait for this notification. It returns when
|
||||
// at least one of the queue handles has events to be fetched or if it timeouts
|
||||
//
|
||||
// This API accesses constant data maintained in the queue state. Hence,
|
||||
// the user doesn't need to acquire a lock to protect the queue state.
|
||||
//
|
||||
// Arguments:
|
||||
// queueHandles: (INPUT)
|
||||
// array of queue handles.
|
||||
//
|
||||
// arraySize: (INPUT)
|
||||
// number of handles in array.
|
||||
//
|
||||
// timeout: (INPUT)
|
||||
// timeout in msec
|
||||
//
|
||||
// pNotificationFlags: (OUTPUT)
|
||||
// If a particular queue handle in the input array is notified then
|
||||
// the respective bit flag is set in pNotificationFlags.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// one of the queueHandles is invalid.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmEventWaitOnQueueHandles(UvmEventQueueHandle *queueHandleArray,
|
||||
unsigned arraySize,
|
||||
NvU64 timeout,
|
||||
unsigned *pNotificationFlags);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmEventGetNotificationHandles
|
||||
//
|
||||
// User is notified when queueEntries >= notification count.
|
||||
// The user can directly get the queue notification handles rather than using
|
||||
// a UVM API to wait on queue handles. This helps the user to wait on other
|
||||
// objects (apart from queue notification) along with queue notification
|
||||
// handles in the same thread. The user can safely use this call along with the
|
||||
// library supported wait call UvmEventWaitOnQueueHandles.
|
||||
//
|
||||
// This API reads constant data maintained in the queue state. Hence,
|
||||
// the user doesn't need to acquire a lock to protect the queue state.
|
||||
//
|
||||
// Arguments:
|
||||
// queueHandles: (INPUT)
|
||||
// array of queue handles.
|
||||
//
|
||||
// arraySize: (INPUT)
|
||||
// number of handles in array.
|
||||
//
|
||||
// notificationHandles: (OUTPUT)
|
||||
// Windows: Output of this call contains an array of 'windows event
|
||||
// handles' corresponding to the queue handles passes as input.
|
||||
// Linux: All queues belonging to the same process share the same
|
||||
// file descriptor(fd) for notification. If the user chooses to use
|
||||
// UvmEventGetNotificationHandles then he should check all queues
|
||||
// for new events (by calling UvmEventFetch) when notified on
|
||||
// the fd.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// One of the arguments is invalid.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmEventGetNotificationHandles(UvmEventQueueHandle *queueHandleArray,
|
||||
unsigned arraySize,
|
||||
void **notificationHandleArray);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmEventGetGpuUuidTable
|
||||
//
|
||||
// Each migration event entry contains the gpu index to/from where data is
|
||||
// migrated. This index maps to a corresponding physical gpu UUID in the
|
||||
// gpuUuidTable. Using indices saves on the size of each event entry. This API
|
||||
// provides the gpuIndex to gpuUuid relation to the user.
|
||||
//
|
||||
// This API does not access the queue state maintained in the user
|
||||
// library and so the user doesn't need to acquire a lock to protect the
|
||||
// queue state.
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuidTable: (OUTPUT)
|
||||
// The return value is an array of physical GPU UUIDs. The array index
|
||||
// is the corresponding gpuIndex. There can be at max 32 GPUs
|
||||
// associated with UVM, so array size is 32.
|
||||
//
|
||||
// validCount: (OUTPUT)
|
||||
// The system doesn't normally contain 32 GPUs. This field gives the
|
||||
// count of entries that are valid in the returned gpuUuidTable.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_BUSY_RETRY:
|
||||
// internal resources are blocked by other threads.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmEventGetGpuUuidTable(NvProcessorUuid *gpuUuidTable,
|
||||
unsigned *validCount);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmEventFetch
|
||||
//
|
||||
// This call is used to fetch the queue entries in a user buffer.
|
||||
//
|
||||
// This API updates the queue state. Hence simultaneous calls to fetch/skip
|
||||
// events should be avoided as that might corrupt the queue state.
|
||||
//
|
||||
// Arguments:
|
||||
// sessionHandle: (INPUT)
|
||||
// Handle to the debugging session.
|
||||
//
|
||||
// queueHandle: (INPUT)
|
||||
// queue from where to fetch the events.
|
||||
//
|
||||
// pBuffer: (OUTPUT)
|
||||
// Pointer to the buffer where the API will copy the events. User
|
||||
// shall ensure the size is enough.
|
||||
//
|
||||
// nEntries: (INPUT/OUTPUT)
|
||||
// It provides the maximum number of entries that will be fetched
|
||||
// from the queue. If this number is larger than the size of the
|
||||
// queue it will be internally capped to that value.
|
||||
// As output it returns the actual number of entries copies to the
|
||||
// buffer.
|
||||
//
|
||||
// Error codes:
|
||||
// RM_ERR_NOT_PERMITTED:
|
||||
// Function fails the security check.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// One of the arguments is invalid.
|
||||
//
|
||||
// NV_ERR_INVALID_INDEX:
|
||||
// The indices of the queue have been corrupted.
|
||||
//
|
||||
// NV_ERR_BUFFER_TOO_SMALL:
|
||||
// The event queue buffer provided by the caller was too small to
|
||||
// contain all of the events that occurred during this run.
|
||||
// Events were therefore dropped (not recorded).
|
||||
// Please re-run with a larger buffer.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmEventFetch(UvmDebugSession sessionHandle,
|
||||
UvmEventQueueHandle queueHandle,
|
||||
UvmEventEntry *pBuffer,
|
||||
NvU64 *nEntries);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmEventSkipAll
|
||||
//
|
||||
// This API drops all event entries from the queue.
|
||||
//
|
||||
// This API updates the queue state. Hence simultaneous calls to fetch/
|
||||
// skip events should be avoided as that might corrupt the queue state.
|
||||
//
|
||||
// Arguments:
|
||||
// sessionHandle: (INPUT)
|
||||
// Handle to the debugging session.
|
||||
//
|
||||
// queueHandle: (INPUT)
|
||||
// target queue.
|
||||
//
|
||||
// Error codes:
|
||||
// RM_ERR_NOT_PERMITTED:
|
||||
// Function fails the security check.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// One of the arguments is invalid.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmEventSkipAll(UvmDebugSession sessionHandle,
|
||||
UvmEventQueueHandle queueHandle);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmEventQueryTimeStampType
|
||||
//
|
||||
// This API returns the type of time stamp used in an event entry for a given
|
||||
// queue.
|
||||
//
|
||||
// This API reads constant data maintained in the queue state. Hence,
|
||||
// the user doesn't need to acquire a lock to protect the queue state.
|
||||
//
|
||||
// Arguments:
|
||||
// sessionHandle: (INPUT)
|
||||
// Handle to the debugging session.
|
||||
//
|
||||
// queueHandle: (INPUT)
|
||||
// target queue.
|
||||
//
|
||||
// timeStampType: (OUTPUT)
|
||||
// type of time stamp used in event entry. See UvmEventTimestampType
|
||||
// for supported types of time stamps.
|
||||
//
|
||||
// Error codes:
|
||||
// RM_ERR_NOT_PERMITTED:
|
||||
// Function fails the security check.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// One of the arguments is invalid.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmEventQueryTimeStampType(UvmDebugSession sessionHandle,
|
||||
UvmEventQueueHandle queueHandle,
|
||||
UvmEventTimeStampType *timeStampType);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmDebugAccessMemory
|
||||
//
|
||||
// This call can be used by the debugger to read/write memory range. UVM driver
|
||||
// may not be aware of all the pages in this range. A bit per page is set by the
|
||||
// driver if it is read/written by UVM.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
// Handle to the debugging session.
|
||||
//
|
||||
// baseAddress: (INPUT)
|
||||
// base address from where memory is to be accessed
|
||||
//
|
||||
// sizeInBytes: (INPUT)
|
||||
// Number of bytes to be accessed
|
||||
//
|
||||
// accessType: (INPUT)
|
||||
// Read or write access request
|
||||
//
|
||||
// buffer: (INPUT/OUTPUT)
|
||||
// This buffer would be read or written to by the driver.
|
||||
// User needs to allocate a big enough buffer to fit sizeInBytes.
|
||||
//
|
||||
// isBitmaskSet: (INPUT/OUTPUT)
|
||||
// Set to 1, if any field in bitmask is set
|
||||
// NULL(INPUT) if unused
|
||||
//
|
||||
// bitmask: (INPUT/OUTPUT)
|
||||
// One bit per page is set if UVM reads or writes to it.
|
||||
// User should allocate a bitmask big enough to fit one bit per page
|
||||
// covered by baseAddress + sizeInBytes:
|
||||
// (baseAlignmentBytes + sizeInBytes + pageSize - 1)/pageSize number
|
||||
// of bits.
|
||||
// NULL(IN) if unused.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// One of the arguments is invalid.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmDebugAccessMemory(UvmDebugSession session,
|
||||
void *baseAddress,
|
||||
NvU64 sizeInBytes,
|
||||
UvmDebugAccessType accessType,
|
||||
void *buffer,
|
||||
NvBool *isBitmaskSet,
|
||||
NvU64 *bitmask);
|
||||
|
||||
//
|
||||
// Uvm Tools uvm API
|
||||
//
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsCreateSession
|
||||
//
|
||||
|
||||
@@ -78,6 +78,10 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_serialize_clear_ops_by_type = false;
|
||||
|
||||
parent_gpu->access_bits_supported = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
@@ -96,5 +100,7 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->ats.no_ats_range_required = false;
|
||||
|
||||
parent_gpu->ats.gmmu_pt_depth0_init_required = false;
|
||||
|
||||
parent_gpu->conf_computing.per_channel_key_rotation = false;
|
||||
}
|
||||
|
||||
@@ -82,6 +82,8 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_serialize_clear_ops_by_type = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
@@ -94,16 +96,22 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->sparse_mappings_supported = true;
|
||||
|
||||
parent_gpu->access_bits_supported = false;
|
||||
|
||||
UVM_ASSERT(parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100);
|
||||
if (parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA100 ||
|
||||
parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA000)
|
||||
parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA000) {
|
||||
parent_gpu->map_remap_larger_page_promotion = true;
|
||||
else
|
||||
}
|
||||
else {
|
||||
parent_gpu->map_remap_larger_page_promotion = false;
|
||||
}
|
||||
|
||||
parent_gpu->plc_supported = true;
|
||||
|
||||
parent_gpu->ats.no_ats_range_required = false;
|
||||
|
||||
parent_gpu->ats.gmmu_pt_depth0_init_required = false;
|
||||
|
||||
parent_gpu->conf_computing.per_channel_key_rotation = false;
|
||||
}
|
||||
|
||||
@@ -73,6 +73,7 @@ static NvU32 ce_aperture(uvm_aperture_t aperture)
|
||||
return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(uvm_aperture_is_peer(aperture));
|
||||
return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) |
|
||||
HWVALUE(C6B5, SET_SRC_PHYS_MODE, FLA, 0) |
|
||||
HWVALUE(C6B5, SET_SRC_PHYS_MODE, PEER_ID, UVM_APERTURE_PEER_ID(aperture));
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
Copyright (c) 2018-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -111,8 +111,6 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
|
||||
uvm_spin_loop_t spin;
|
||||
NvU32 channel_faulted_mask = 0;
|
||||
NvU32 clear_type_value = 0;
|
||||
NvU32 doorbell_value = 0;
|
||||
volatile NvU32 *doorbell_ptr;
|
||||
|
||||
UVM_ASSERT(!user_channel->gpu->parent->has_clear_faulted_channel_method);
|
||||
|
||||
@@ -129,12 +127,6 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
|
||||
uvm_mmu_engine_type_string(fault->fault_source.mmu_engine_type));
|
||||
}
|
||||
|
||||
doorbell_ptr = (NvU32 *)((NvU8 *)user_channel->runlist_pri_base_register + NV_RUNLIST_INTERNAL_DOORBELL);
|
||||
|
||||
// GFID is not required since we clear faulted channel with a SW method on
|
||||
// SRIOV. On baremetal, GFID is always zero.
|
||||
doorbell_value = HWVALUE(_RUNLIST, INTERNAL_DOORBELL, CHID, user_channel->hw_channel_id);
|
||||
|
||||
// Wait for the channel to have the FAULTED bit set as this can race with
|
||||
// interrupt notification
|
||||
UVM_SPIN_WHILE(!(UVM_GPU_READ_ONCE(*user_channel->chram_channel_register) & channel_faulted_mask), &spin);
|
||||
@@ -143,7 +135,7 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
|
||||
|
||||
wmb();
|
||||
|
||||
UVM_GPU_WRITE_ONCE(*doorbell_ptr, doorbell_value);
|
||||
UVM_GPU_WRITE_ONCE(*user_channel->work_submission_offset, user_channel->work_submission_token);
|
||||
}
|
||||
|
||||
static NvU32 instance_ptr_aperture_type_to_hw_value(uvm_aperture_t aperture)
|
||||
|
||||
@@ -601,7 +601,12 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_page_mask_zero(faults_serviced_mask);
|
||||
uvm_page_mask_zero(reads_serviced_mask);
|
||||
|
||||
if (!(vma->vm_flags & VM_READ))
|
||||
// If the VMA doesn't have read or write permissions then all faults are
|
||||
// fatal so we exit early.
|
||||
// TODO: Bug 5451843: This fix brings to light potential issues in the ATS
|
||||
// fault handling path as described in the bug. Those need to be handled
|
||||
// to avoid any potential permission issues.
|
||||
if (!(vma->vm_flags & (VM_READ | VM_WRITE)))
|
||||
return NV_OK;
|
||||
|
||||
if (!(vma->vm_flags & VM_WRITE)) {
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_blackwell_fault_buffer.h"
|
||||
#include "ctrl2080mc.h"
|
||||
|
||||
void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
@@ -81,6 +82,16 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_serialize_clear_ops_by_type = parent_gpu->rm_info.accessCntrBufferCount == 2;
|
||||
|
||||
// TODO: Bug 5262806: Remove this WAR once the bug is fixed.
|
||||
// Before this override, accessCntrBufferCount has only been used to
|
||||
// determine the support for access counters in uvm_gpu.c and the statement
|
||||
// above. After the HAL init, it is used for buffer allocations, and must
|
||||
// not change its value.
|
||||
if (parent_gpu->rm_info.accessCntrBufferCount > 1)
|
||||
parent_gpu->rm_info.accessCntrBufferCount = 1;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
@@ -99,6 +110,10 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->ats.no_ats_range_required = true;
|
||||
|
||||
parent_gpu->ats.gmmu_pt_depth0_init_required = parent_gpu->ats.non_pasid_ats_enabled;
|
||||
|
||||
parent_gpu->access_bits_supported = false;
|
||||
|
||||
// Blackwell has a physical translation prefetcher, meaning SW must assume
|
||||
// that any physical ATS translation can be fetched at any time. The
|
||||
// specific behavior and impact differs with non-PASID ATS support, but
|
||||
@@ -142,16 +157,26 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
// TODO: Bug 5023085: this should be queried from RM instead of determined
|
||||
// by UVM.
|
||||
if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100 &&
|
||||
parent_gpu->rm_info.gpuImplementation ==
|
||||
NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B) {
|
||||
parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B) {
|
||||
parent_gpu->is_integrated_gpu = true;
|
||||
parent_gpu->access_bits_supported = false;
|
||||
// GB10B has sticky L2 coherent cache lines.
|
||||
// For details, refer to the comments in uvm_gpu.h
|
||||
// where this field is declared.
|
||||
parent_gpu->sticky_l2_coherent_cache_lines = true;
|
||||
}
|
||||
if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 &&
|
||||
parent_gpu->rm_info.gpuImplementation ==
|
||||
NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B)
|
||||
parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B) {
|
||||
parent_gpu->is_integrated_gpu = true;
|
||||
parent_gpu->access_bits_supported = false;
|
||||
}
|
||||
if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 &&
|
||||
(parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB206 ||
|
||||
parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB207)) {
|
||||
// TODO: Bug 3186788 : As reported in Bug 5309034, GB206
|
||||
// and GB207 experience a GSP crash with VAB. Depending
|
||||
// on whether RM fixes it or marks it as cannot fix, the
|
||||
// below checks can be removed or retained.
|
||||
parent_gpu->access_bits_supported = false;
|
||||
}
|
||||
}
|
||||
|
||||
77
kernel-open/nvidia-uvm/uvm_blackwell_ce.c
Normal file
77
kernel-open/nvidia-uvm/uvm_blackwell_ce.c
Normal file
@@ -0,0 +1,77 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_conf_computing.h"
|
||||
|
||||
bool uvm_hal_blackwell_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
if (uvm_gpu_address_is_peer(gpu, src)) {
|
||||
UVM_ERR_PRINT("Peer copy from peer address (0x%llx) is not allowed!", src.address);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (push->channel && uvm_gpu_address_is_peer(gpu, dst) && !uvm_channel_is_p2p(push->channel)) {
|
||||
UVM_ERR_PRINT("Peer copy from address (0x%llx) to address (0x%llx) should use designated p2p channels!",
|
||||
src.address,
|
||||
dst.address);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
// Blackwell+ GPUs support secure P2P. In that case, memcopy using
|
||||
// physical addresses is valid.
|
||||
if (!uvm_aperture_is_peer(dst.aperture)) {
|
||||
// In Confidential Computing, if a non-p2p memcopy uses physical
|
||||
// addressing for either the destination or the source, then the
|
||||
// corresponding aperture must be vidmem. If virtual addressing
|
||||
// is used, and the backing storage is sysmem the access is only
|
||||
// legal if the copy type is NONPROT2NONPROT, but the validation
|
||||
// does not detect it.
|
||||
if (!src.is_virtual && (src.aperture != UVM_APERTURE_VID))
|
||||
return false;
|
||||
|
||||
if (!dst.is_virtual && dst.aperture != UVM_APERTURE_VID)
|
||||
return false;
|
||||
}
|
||||
|
||||
// The source and destination must be both unprotected, for sysmem
|
||||
// copy, or both protected for p2p copy.
|
||||
if (dst.is_unprotected != src.is_unprotected)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!gpu->parent->ce_phys_vidmem_write_supported && !dst.is_virtual && dst.aperture == UVM_APERTURE_VID) {
|
||||
UVM_ERR_PRINT("Destination address of vidmem memcopy must be virtual, not physical: {%s, 0x%llx}\n",
|
||||
uvm_gpu_address_aperture_string(dst),
|
||||
dst.address);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -352,9 +352,8 @@ void uvm_hal_blackwell_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t apert
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
NvU32 aperture_value;
|
||||
|
||||
if (!gpu->parent->is_integrated_gpu) {
|
||||
if (!gpu->parent->is_integrated_gpu)
|
||||
return uvm_hal_ampere_host_l2_invalidate(push, aperture);
|
||||
}
|
||||
|
||||
switch (aperture) {
|
||||
case UVM_APERTURE_SYS:
|
||||
@@ -369,9 +368,9 @@ void uvm_hal_blackwell_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t apert
|
||||
uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
|
||||
// Flush dirty
|
||||
NV_PUSH_4U(C96F, MEM_OP_A, 0,
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, 0,
|
||||
MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, L2_FLUSH_DIRTY));
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, 0,
|
||||
MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, L2_FLUSH_DIRTY));
|
||||
// Invalidate
|
||||
NV_PUSH_4U(C96F, MEM_OP_A, 0,
|
||||
MEM_OP_B, 0,
|
||||
|
||||
@@ -3140,13 +3140,25 @@ static void pick_ces_conf_computing(uvm_channel_manager_t *manager,
|
||||
UVM_CHANNEL_TYPE_MEMOPS,
|
||||
UVM_CHANNEL_TYPE_WLC };
|
||||
|
||||
static const uvm_channel_type_t types_p2p[] = { UVM_CHANNEL_TYPE_CPU_TO_GPU,
|
||||
UVM_CHANNEL_TYPE_GPU_TO_CPU,
|
||||
UVM_CHANNEL_TYPE_GPU_INTERNAL,
|
||||
UVM_CHANNEL_TYPE_GPU_TO_GPU,
|
||||
UVM_CHANNEL_TYPE_MEMOPS,
|
||||
UVM_CHANNEL_TYPE_WLC };
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
pick_ces_for_channel_types(manager, ce_caps, types, ARRAY_SIZE(types), preferred_ce);
|
||||
if (gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_UNSUPPORTED) {
|
||||
pick_ces_for_channel_types(manager, ce_caps, types, ARRAY_SIZE(types), preferred_ce);
|
||||
|
||||
// Direct transfers between GPUs are disallowed in Confidential Computing,
|
||||
// but the preferred CE is still set to an arbitrary value for consistency.
|
||||
preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_GPU] = preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_CPU];
|
||||
// If direct transfers between GPUs are disallowed, the preferred
|
||||
// CE is still set to an arbitrary value for consistency.
|
||||
preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_GPU] = preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_CPU];
|
||||
}
|
||||
else {
|
||||
pick_ces_for_channel_types(manager, ce_caps, types_p2p, ARRAY_SIZE(types_p2p), preferred_ce);
|
||||
}
|
||||
|
||||
best_wlc_ce = preferred_ce[UVM_CHANNEL_TYPE_WLC];
|
||||
|
||||
|
||||
@@ -37,6 +37,16 @@
|
||||
#define TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU 1024
|
||||
#define TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU_EMU 64
|
||||
|
||||
// It is unsafe to destroy the GPU's channel manager of an active uvm_gpu_t
|
||||
// object. We sync trackers to avoid having any of the GPU's channels in any
|
||||
// trackers. We can only guarantee that because in these tests, we only allow
|
||||
// a single reference to the GPU.
|
||||
static void channel_manager_destroy(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_parent_gpu_sync_trackers(gpu->parent);
|
||||
uvm_channel_manager_destroy(gpu->channel_manager);
|
||||
}
|
||||
|
||||
// Schedule pushes one after another on all GPUs and channel types that copy and
|
||||
// increment a counter into an adjacent memory location in a buffer. And then
|
||||
// verify that all the values are correct on the CPU.
|
||||
@@ -177,7 +187,7 @@ static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
|
||||
uvm_channel_update_progress_all(channel);
|
||||
TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE);
|
||||
|
||||
uvm_channel_manager_destroy(gpu->channel_manager);
|
||||
channel_manager_destroy(gpu);
|
||||
|
||||
// Destruction will hit the error again, so clear one more time.
|
||||
uvm_global_reset_fatal_error();
|
||||
@@ -306,7 +316,7 @@ static NV_STATUS test_rc(uvm_va_space_t *va_space)
|
||||
test_status = uvm_test_rc_for_gpu(gpu);
|
||||
g_uvm_global.disable_fatal_error_assert = false;
|
||||
|
||||
uvm_channel_manager_destroy(gpu->channel_manager);
|
||||
channel_manager_destroy(gpu);
|
||||
create_status = uvm_channel_manager_create(gpu, &gpu->channel_manager);
|
||||
|
||||
TEST_NV_CHECK_RET(test_status);
|
||||
@@ -355,7 +365,10 @@ static NV_STATUS uvm_test_iommu_rc_for_gpu(uvm_gpu_t *gpu)
|
||||
cpu_ptr = uvm_mem_get_cpu_addr_kernel(sysmem);
|
||||
sysmem_dma_addr = uvm_mem_gpu_address_physical(sysmem, gpu, 0, data_size);
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Test memset to IOMMU mapped sysmem");
|
||||
status = uvm_push_begin(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_GPU_TO_CPU,
|
||||
&push,
|
||||
"Test memset to IOMMU mapped sysmem");
|
||||
TEST_NV_CHECK_GOTO(status, done);
|
||||
|
||||
gpu->parent->ce_hal->memset_8(&push, sysmem_dma_addr, 0, data_size);
|
||||
@@ -497,14 +510,15 @@ static NV_STATUS test_iommu(uvm_va_space_t *va_space)
|
||||
NV_STATUS test_status, create_status;
|
||||
|
||||
// The GPU channel manager is destroyed and then re-created after
|
||||
// testing ATS RC fault, so this test requires exclusive access to the GPU.
|
||||
// testing ATS RC fault, so this test requires exclusive access to the
|
||||
// GPU.
|
||||
TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
|
||||
|
||||
g_uvm_global.disable_fatal_error_assert = true;
|
||||
test_status = uvm_test_iommu_rc_for_gpu(gpu);
|
||||
g_uvm_global.disable_fatal_error_assert = false;
|
||||
|
||||
uvm_channel_manager_destroy(gpu->channel_manager);
|
||||
channel_manager_destroy(gpu);
|
||||
create_status = uvm_channel_manager_create(gpu, &gpu->channel_manager);
|
||||
|
||||
TEST_NV_CHECK_RET(test_status);
|
||||
@@ -989,7 +1003,9 @@ static NV_STATUS test_channel_iv_rotation(uvm_va_space_t *va_space)
|
||||
uvm_conf_computing_dma_buffer_t *cipher_text;
|
||||
void *cipher_cpu_va, *plain_cpu_va, *tag_cpu_va;
|
||||
uvm_gpu_address_t cipher_gpu_address, plain_gpu_address, tag_gpu_address;
|
||||
uvm_channel_t *work_channel = uvm_channel_is_lcic(channel) ? uvm_channel_lcic_get_paired_wlc(channel) : channel;
|
||||
uvm_channel_t *work_channel = uvm_channel_is_lcic(channel) ?
|
||||
uvm_channel_lcic_get_paired_wlc(channel) :
|
||||
channel;
|
||||
|
||||
plain_cpu_va = &status;
|
||||
data_size = sizeof(status);
|
||||
@@ -1037,8 +1053,8 @@ release:
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// All channels except SEC2 used at least a single IV to release tracking.
|
||||
// SEC2 doesn't support decrypt direction.
|
||||
// All channels except SEC2 used at least a single IV to release
|
||||
// tracking. SEC2 doesn't support decrypt direction.
|
||||
if (uvm_channel_is_sec2(channel))
|
||||
TEST_CHECK_RET(before_rotation_dec == after_rotation_dec);
|
||||
else
|
||||
@@ -1557,7 +1573,7 @@ static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space
|
||||
TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
|
||||
|
||||
gpu->uvm_test_force_upper_pushbuffer_segment = 1;
|
||||
uvm_channel_manager_destroy(gpu->channel_manager);
|
||||
channel_manager_destroy(gpu);
|
||||
TEST_NV_CHECK_GOTO(uvm_channel_manager_create(gpu, &gpu->channel_manager), error);
|
||||
gpu->uvm_test_force_upper_pushbuffer_segment = 0;
|
||||
|
||||
|
||||
@@ -157,7 +157,7 @@ void on_uvm_assert(void);
|
||||
#define UVM_ASSERT_MSG_IGNORE(expr, fmt, ...) \
|
||||
do { \
|
||||
UVM_IGNORE_EXPR(expr); \
|
||||
UVM_NO_PRINT(fmt, ##__VA_ARGS__); \
|
||||
no_printk(fmt, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2019 NVIDIA Corporation
|
||||
Copyright (c) 2019-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -27,8 +27,6 @@
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_forward_decl.h"
|
||||
|
||||
extern int uvm_enable_debug_procfs;
|
||||
|
||||
extern unsigned uvm_perf_map_remote_on_native_atomics_fault;
|
||||
|
||||
extern uvm_global_t g_uvm_global;
|
||||
|
||||
@@ -98,6 +98,4 @@ typedef struct uvm_non_replayable_fault_buffer_struct uvm_non_replayable_fault_b
|
||||
typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
|
||||
typedef struct uvm_access_counter_buffer_struct uvm_access_counter_buffer_t;
|
||||
typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
|
||||
|
||||
typedef struct uvm_reverse_map_struct uvm_reverse_map_t;
|
||||
#endif //__UVM_FORWARD_DECL_H__
|
||||
|
||||
@@ -146,12 +146,20 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
|
||||
|
||||
// Add the physical offset for peer mappings
|
||||
if (uvm_aperture_is_peer(aperture)) {
|
||||
if (uvm_parent_gpus_are_direct_connected(memory_mapping_gpu->parent, memory_owning_gpu->parent))
|
||||
if (uvm_parent_gpus_are_nvlink_direct_connected(memory_mapping_gpu->parent, memory_owning_gpu->parent))
|
||||
phys_offset += memory_owning_gpu->parent->peer_address_info.peer_gpa_memory_window_start;
|
||||
else if (uvm_parent_gpus_are_nvswitch_connected(memory_mapping_gpu->parent, memory_owning_gpu->parent))
|
||||
phys_offset += memory_owning_gpu->parent->nvswitch_info.fabric_memory_window_start;
|
||||
}
|
||||
|
||||
// Add DMA offset for bar1 p2p.
|
||||
if (uvm_aperture_is_sys(aperture) && !memory_info->sysmem) {
|
||||
uvm_gpu_phys_address_t phys_address = uvm_gpu_peer_phys_address(memory_owning_gpu, memory_info->physAddr, memory_mapping_gpu);
|
||||
|
||||
UVM_ASSERT(uvm_aperture_is_sys(phys_address.aperture));
|
||||
phys_offset += (phys_address.address - memory_info->physAddr);
|
||||
}
|
||||
|
||||
for (index = 0; index < ext_mapping_info->numWrittenPtes; index++) {
|
||||
|
||||
pte = hal->make_pte(aperture,
|
||||
@@ -159,6 +167,16 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
|
||||
prot,
|
||||
pte_flags);
|
||||
|
||||
if (pte != ext_mapping_info->pteBuffer[index * skip]) {
|
||||
UVM_ERR_PRINT("PTE mismatch for %s->%s at %d (aperture: %s) %llx vs. %llx (address: %llx)\n",
|
||||
uvm_parent_gpu_name(memory_mapping_gpu->parent),
|
||||
uvm_parent_gpu_name(memory_owning_gpu->parent),
|
||||
index,
|
||||
uvm_aperture_string(aperture),
|
||||
pte,
|
||||
ext_mapping_info->pteBuffer[index * skip],
|
||||
memory_info->physAddr);
|
||||
}
|
||||
TEST_CHECK_RET(pte == ext_mapping_info->pteBuffer[index * skip]);
|
||||
|
||||
phys_offset += page_size;
|
||||
|
||||
@@ -42,24 +42,21 @@
|
||||
#include "nv_uvm_interface.h"
|
||||
|
||||
uvm_global_t g_uvm_global;
|
||||
static struct UvmOpsUvmEvents g_exported_uvm_ops;
|
||||
static struct UvmEventsLinux g_exported_uvm_events;
|
||||
static bool g_ops_registered = false;
|
||||
|
||||
static NV_STATUS uvm_register_callbacks(void)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
g_exported_uvm_ops.suspend = uvm_suspend_entry;
|
||||
g_exported_uvm_ops.resume = uvm_resume_entry;
|
||||
g_exported_uvm_ops.startDevice = NULL;
|
||||
g_exported_uvm_ops.stopDevice = NULL;
|
||||
g_exported_uvm_ops.isrTopHalf = uvm_isr_top_half_entry;
|
||||
|
||||
g_exported_uvm_ops.drainP2P = uvm_suspend_and_drainP2P_entry;
|
||||
g_exported_uvm_ops.resumeP2P = uvm_resumeP2P_entry;
|
||||
g_exported_uvm_events.isrTopHalf = uvm_isr_top_half_entry;
|
||||
g_exported_uvm_events.suspend = uvm_suspend_entry;
|
||||
g_exported_uvm_events.resume = uvm_resume_entry;
|
||||
g_exported_uvm_events.drainP2P = uvm_suspend_and_drainP2P_entry;
|
||||
g_exported_uvm_events.resumeP2P = uvm_resumeP2P_entry;
|
||||
|
||||
// Register the UVM callbacks with the main GPU driver:
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmCallbacks(&g_exported_uvm_ops));
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmEvents(&g_exported_uvm_events));
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
@@ -71,7 +68,7 @@ static NV_STATUS uvm_register_callbacks(void)
|
||||
static void uvm_unregister_callbacks(void)
|
||||
{
|
||||
if (g_ops_registered) {
|
||||
uvm_rm_locked_call_void(nvUvmInterfaceDeRegisterUvmOps());
|
||||
uvm_rm_locked_call_void(nvUvmInterfaceDeRegisterUvmEvents());
|
||||
g_ops_registered = false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -408,14 +408,14 @@ static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
uvm_gpu_id_t gpu_id;
|
||||
NvU32 sub_processor_index;
|
||||
NvU32 cur_sub_processor_index;
|
||||
NvU32 start_search_index;
|
||||
|
||||
UVM_ASSERT(parent_gpu);
|
||||
|
||||
gpu_id = uvm_gpu_id_from_parent_gpu_id(parent_gpu->id);
|
||||
cur_sub_processor_index = cur_gpu ? uvm_id_sub_processor_index(cur_gpu->id) : -1;
|
||||
start_search_index = cur_gpu ? uvm_id_sub_processor_index(cur_gpu->id) + 1 : 0;
|
||||
|
||||
sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS, cur_sub_processor_index + 1);
|
||||
sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS, start_search_index);
|
||||
if (sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS) {
|
||||
gpu = uvm_gpu_get(uvm_id_from_value(uvm_id_value(gpu_id) + sub_processor_index));
|
||||
UVM_ASSERT(gpu != NULL);
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
#include "uvm_conf_computing.h"
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_mmu.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
|
||||
#define UVM_PROC_GPUS_PEER_DIR_NAME "peers"
|
||||
|
||||
@@ -67,6 +68,8 @@ static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
|
||||
switch (link_type) {
|
||||
case UVM_LINK_TYPE_PCIE:
|
||||
return UVM_GPU_LINK_PCIE;
|
||||
case UVM_LINK_TYPE_PCIE_BAR1:
|
||||
return UVM_GPU_LINK_PCIE_BAR1;
|
||||
case UVM_LINK_TYPE_NVLINK_1:
|
||||
return UVM_GPU_LINK_NVLINK_1;
|
||||
case UVM_LINK_TYPE_NVLINK_2:
|
||||
@@ -107,18 +110,18 @@ static void fill_parent_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo
|
||||
}
|
||||
|
||||
parent_gpu->nvswitch_info.is_nvswitch_connected = gpu_info->connectedToSwitch;
|
||||
parent_gpu->peer_address_info.is_direct_connected = gpu_info->nvlDirectConnect;
|
||||
parent_gpu->peer_address_info.is_nvlink_direct_connected = gpu_info->nvlDirectConnect;
|
||||
|
||||
// nvswitch is routed via physical pages, where the upper 13-bits of the
|
||||
// 47-bit address space holds the routing information for each peer.
|
||||
// Currently, this is limited to a 16GB framebuffer window size.
|
||||
if (parent_gpu->nvswitch_info.is_nvswitch_connected) {
|
||||
if (parent_gpu->peer_address_info.is_nvlink_direct_connected) {
|
||||
parent_gpu->peer_address_info.peer_gpa_memory_window_start = gpu_info->nvlDirectConnectMemoryWindowStart;
|
||||
}
|
||||
else if (parent_gpu->nvswitch_info.is_nvswitch_connected) {
|
||||
// nvswitch is routed via physical pages, where the upper 13-bits of the
|
||||
// 47-bit address space holds the routing information for each peer.
|
||||
// Currently, this is limited to a 16GB framebuffer window size.
|
||||
parent_gpu->nvswitch_info.fabric_memory_window_start = gpu_info->nvswitchMemoryWindowStart;
|
||||
parent_gpu->nvswitch_info.egm_fabric_memory_window_start = gpu_info->nvswitchEgmMemoryWindowStart;
|
||||
}
|
||||
else if (parent_gpu->peer_address_info.is_direct_connected) {
|
||||
parent_gpu->peer_address_info.peer_gpa_memory_window_start = gpu_info->nvlDirectConnectMemoryWindowStart;
|
||||
}
|
||||
|
||||
parent_gpu->ats.non_pasid_ats_enabled = gpu_info->nonPasidAtsSupport;
|
||||
|
||||
@@ -533,11 +536,12 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
|
||||
static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
{
|
||||
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 8);
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 9);
|
||||
|
||||
switch (link_type) {
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_INVALID);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_PCIE);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_PCIE_BAR1);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_1);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_2);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_3);
|
||||
@@ -666,14 +670,14 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
gpu->parent->isr.access_counters[i].stats.cpu_exec_count[cpu]);
|
||||
}
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_buffer_entries %u\n",
|
||||
gpu->parent->access_counter_buffer[i].max_notifications);
|
||||
gpu->parent->access_counters.buffer[i].max_notifications);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_cached_get %u\n",
|
||||
gpu->parent->access_counter_buffer[i].cached_get);
|
||||
gpu->parent->access_counters.buffer[i].cached_get);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_cached_put %u\n",
|
||||
gpu->parent->access_counter_buffer[i].cached_put);
|
||||
gpu->parent->access_counters.buffer[i].cached_put);
|
||||
|
||||
get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferGet);
|
||||
put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferPut);
|
||||
get = UVM_GPU_READ_ONCE(*gpu->parent->access_counters.buffer[i].rm_info.pAccessCntrBufferGet);
|
||||
put = UVM_GPU_READ_ONCE(*gpu->parent->access_counters.buffer[i].rm_info.pAccessCntrBufferPut);
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_get %u\n", get);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_put %u\n", put);
|
||||
@@ -766,10 +770,10 @@ static void gpu_access_counters_print_common(uvm_parent_gpu_t *parent_gpu, struc
|
||||
UVM_ASSERT(uvm_procfs_is_debug_enabled());
|
||||
|
||||
// procfs_files are created before gpu_init_isr, we need to check if the
|
||||
// access_counter_buffer is allocated.
|
||||
if (parent_gpu->access_counter_buffer) {
|
||||
// access_counters.buffer is allocated.
|
||||
if (parent_gpu->access_counters.buffer) {
|
||||
for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++) {
|
||||
uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[i];
|
||||
uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counters.buffer[i];
|
||||
|
||||
num_pages_out = atomic64_read(&access_counters->stats.num_pages_out);
|
||||
num_pages_in = atomic64_read(&access_counters->stats.num_pages_in);
|
||||
@@ -885,6 +889,19 @@ static uvm_aperture_t parent_gpu_peer_aperture(uvm_parent_gpu_t *local,
|
||||
else
|
||||
peer_index = 1;
|
||||
|
||||
if (parent_peer_caps->link_type == UVM_GPU_LINK_PCIE_BAR1) {
|
||||
// UVM_APERTURE_SYS can be used if either the local (accessing) GPU
|
||||
// _DOES NOT_ use PCIE atomics, or the remote (owning) GPU _DOES_
|
||||
// accept PCIE atomics. Moreover, the bus topology needs to support
|
||||
// routing of PCIe atomics between the devices.
|
||||
//
|
||||
// If either of the above conditions is not met we need to use
|
||||
// UVM_APERTURE_SYS_NON_COHERENT to prevent use of PCIe atomics.
|
||||
// RM provides the consolidated information in P2P properties.
|
||||
const bool enable_atomics = parent_peer_caps->bar1_p2p_pcie_atomics_enabled[peer_index];
|
||||
return enable_atomics ? UVM_APERTURE_SYS : UVM_APERTURE_SYS_NON_COHERENT;
|
||||
}
|
||||
|
||||
return UVM_APERTURE_PEER(parent_peer_caps->peer_ids[peer_index]);
|
||||
}
|
||||
|
||||
@@ -1164,6 +1181,22 @@ static void deinit_semaphore_pools(uvm_gpu_t *gpu)
|
||||
uvm_gpu_semaphore_pool_destroy(gpu->secure_semaphore_pool);
|
||||
}
|
||||
|
||||
static void init_access_counters_serialize_clear_tracker(uvm_parent_gpu_t *parent)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
for (i = 0; i < UVM_ACCESS_COUNTER_CLEAR_OP_COUNT; i++)
|
||||
uvm_tracker_init(&parent->access_counters.serialize_clear_tracker[i]);
|
||||
}
|
||||
|
||||
static void deinit_access_counters_serialize_clear_tracker(uvm_parent_gpu_t *parent)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
for (i = 0; i < UVM_ACCESS_COUNTER_CLEAR_OP_COUNT; i++)
|
||||
uvm_tracker_deinit(&parent->access_counters.serialize_clear_tracker[i]);
|
||||
}
|
||||
|
||||
static NV_STATUS find_unused_gpu_id(uvm_parent_gpu_t *parent_gpu, uvm_gpu_id_t *out_id)
|
||||
{
|
||||
NvU32 i;
|
||||
@@ -1209,9 +1242,11 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_uuid_copy(&parent_gpu->uuid, gpu_uuid);
|
||||
uvm_sema_init(&parent_gpu->isr.replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
uvm_sema_init(&parent_gpu->isr.non_replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
uvm_mutex_init(&parent_gpu->access_counters_enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS);
|
||||
uvm_mutex_init(&parent_gpu->access_counters_clear_tracker_lock, UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS);
|
||||
uvm_tracker_init(&parent_gpu->access_counters_clear_tracker);
|
||||
uvm_mutex_init(&parent_gpu->access_counters.enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS);
|
||||
uvm_mutex_init(&parent_gpu->access_counters.clear_tracker_lock, UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS);
|
||||
uvm_mutex_init(&parent_gpu->access_counters.serialize_clear_lock, UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS);
|
||||
uvm_tracker_init(&parent_gpu->access_counters.clear_tracker);
|
||||
init_access_counters_serialize_clear_tracker(parent_gpu);
|
||||
uvm_spin_lock_irqsave_init(&parent_gpu->isr.interrupts_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_spin_lock_init(&parent_gpu->instance_ptr_table_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
|
||||
@@ -1229,7 +1264,8 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
return NV_OK;
|
||||
|
||||
cleanup:
|
||||
uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker);
|
||||
uvm_tracker_deinit(&parent_gpu->access_counters.clear_tracker);
|
||||
deinit_access_counters_serialize_clear_tracker(parent_gpu);
|
||||
uvm_kvfree(parent_gpu);
|
||||
|
||||
return status;
|
||||
@@ -1686,24 +1722,41 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
|
||||
}
|
||||
|
||||
// Sync the access counter clear tracker too.
|
||||
if (parent_gpu->access_counters_supported && parent_gpu->access_counter_buffer) {
|
||||
uvm_mutex_lock(&parent_gpu->access_counters_clear_tracker_lock);
|
||||
status = uvm_tracker_wait(&parent_gpu->access_counters_clear_tracker);
|
||||
uvm_mutex_unlock(&parent_gpu->access_counters_clear_tracker_lock);
|
||||
if (parent_gpu->access_counters_supported && parent_gpu->access_counters.buffer) {
|
||||
uvm_mutex_lock(&parent_gpu->access_counters.clear_tracker_lock);
|
||||
status = uvm_tracker_wait(&parent_gpu->access_counters.clear_tracker);
|
||||
uvm_mutex_unlock(&parent_gpu->access_counters.clear_tracker_lock);
|
||||
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(status == uvm_global_get_status());
|
||||
|
||||
if (parent_gpu->access_counters_serialize_clear_ops_by_type) {
|
||||
uvm_access_counter_clear_op_t op;
|
||||
uvm_mutex_lock(&parent_gpu->access_counters.serialize_clear_lock);
|
||||
for (op = 0; op < UVM_ACCESS_COUNTER_CLEAR_OP_COUNT; op++) {
|
||||
status = uvm_tracker_wait(&parent_gpu->access_counters.serialize_clear_tracker[op]);
|
||||
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(status == uvm_global_get_status());
|
||||
}
|
||||
uvm_mutex_unlock(&parent_gpu->access_counters.serialize_clear_lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_sync_trackers(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
sync_parent_gpu_trackers(parent_gpu,
|
||||
parent_gpu->isr.replayable_faults.handling,
|
||||
parent_gpu->isr.non_replayable_faults.handling);
|
||||
}
|
||||
|
||||
// Remove all references the given GPU has to other GPUs, since one of those
|
||||
// other GPUs is getting removed. This involves waiting for any unfinished
|
||||
// trackers contained by this GPU.
|
||||
static void remove_gpus_from_gpu(uvm_gpu_t *gpu)
|
||||
{
|
||||
sync_parent_gpu_trackers(gpu->parent,
|
||||
gpu->parent->isr.replayable_faults.handling,
|
||||
gpu->parent->isr.non_replayable_faults.handling);
|
||||
uvm_parent_gpu_sync_trackers(gpu->parent);
|
||||
|
||||
// Sync all trackers in PMM
|
||||
uvm_pmm_gpu_sync(&gpu->pmm);
|
||||
@@ -1713,7 +1766,7 @@ static void remove_gpus_from_gpu(uvm_gpu_t *gpu)
|
||||
}
|
||||
|
||||
// Remove all references to the given GPU from its parent, since it is being
|
||||
// removed. This involves waiting for any unfinished trackers contained
|
||||
// removed. This involves waiting for any unfinished trackers contained
|
||||
// by the parent GPU.
|
||||
static void remove_gpu_from_parent_gpu(uvm_gpu_t *gpu)
|
||||
{
|
||||
@@ -1823,7 +1876,8 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)
|
||||
for_each_sub_processor_index(sub_processor_index)
|
||||
UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);
|
||||
|
||||
uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker);
|
||||
uvm_tracker_deinit(&parent_gpu->access_counters.clear_tracker);
|
||||
deinit_access_counters_serialize_clear_tracker(parent_gpu);
|
||||
|
||||
uvm_kvfree(parent_gpu);
|
||||
}
|
||||
@@ -1960,7 +2014,7 @@ static void update_stats_migration_cb(uvm_va_space_t *va_space,
|
||||
}
|
||||
else if (is_access_counter) {
|
||||
NvU32 index = event_data->migration.access_counters_buffer_index;
|
||||
atomic64_add(pages, &gpu_dst->parent->access_counter_buffer[index].stats.num_pages_in);
|
||||
atomic64_add(pages, &gpu_dst->parent->access_counters.buffer[index].stats.num_pages_in);
|
||||
}
|
||||
}
|
||||
if (gpu_src) {
|
||||
@@ -1973,7 +2027,7 @@ static void update_stats_migration_cb(uvm_va_space_t *va_space,
|
||||
}
|
||||
else if (is_access_counter) {
|
||||
NvU32 index = event_data->migration.access_counters_buffer_index;
|
||||
atomic64_add(pages, &gpu_src->parent->access_counter_buffer[index].stats.num_pages_out);
|
||||
atomic64_add(pages, &gpu_src->parent->access_counters.buffer[index].stats.num_pages_out);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2114,11 +2168,19 @@ bool uvm_parent_gpus_are_nvswitch_connected(const uvm_parent_gpu_t *parent_gpu0,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool uvm_parent_gpus_are_direct_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1)
|
||||
bool uvm_parent_gpus_are_bar1_peers(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1)
|
||||
{
|
||||
if (parent_gpu0 != parent_gpu1)
|
||||
return parent_gpu_peer_caps(parent_gpu0, parent_gpu1)->link_type == UVM_GPU_LINK_PCIE_BAR1;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool uvm_parent_gpus_are_nvlink_direct_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1)
|
||||
{
|
||||
if (parent_gpu0 != parent_gpu1 &&
|
||||
parent_gpu0->peer_address_info.is_direct_connected &&
|
||||
parent_gpu1->peer_address_info.is_direct_connected)
|
||||
parent_gpu0->peer_address_info.is_nvlink_direct_connected &&
|
||||
parent_gpu1->peer_address_info.is_nvlink_direct_connected)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -2419,6 +2481,17 @@ static NV_STATUS parent_peers_init(uvm_parent_gpu_t *parent_gpu0,
|
||||
parent_peer_caps->optimalNvlinkWriteCEs[0] = p2p_caps_params.optimalNvlinkWriteCEs[0];
|
||||
parent_peer_caps->optimalNvlinkWriteCEs[1] = p2p_caps_params.optimalNvlinkWriteCEs[1];
|
||||
|
||||
// Set IOMMU/DMA mappings for bar1 p2p
|
||||
parent_peer_caps->bar1_p2p_dma_base_address[0] = p2p_caps_params.bar1DmaAddress[0];
|
||||
parent_peer_caps->bar1_p2p_dma_base_address[1] = p2p_caps_params.bar1DmaAddress[1];
|
||||
parent_peer_caps->bar1_p2p_dma_size[0] = p2p_caps_params.bar1DmaSize[0];
|
||||
parent_peer_caps->bar1_p2p_dma_size[1] = p2p_caps_params.bar1DmaSize[1];
|
||||
parent_peer_caps->bar1_p2p_pcie_atomics_enabled[0] = p2p_caps_params.bar1PcieAtomics[0];
|
||||
parent_peer_caps->bar1_p2p_pcie_atomics_enabled[1] = p2p_caps_params.bar1PcieAtomics[1];
|
||||
|
||||
if (parent_peer_caps->bar1_p2p_dma_size[0] || parent_peer_caps->bar1_p2p_dma_size[1])
|
||||
UVM_ASSERT(link_type == UVM_GPU_LINK_PCIE_BAR1);
|
||||
|
||||
return NV_OK;
|
||||
|
||||
cleanup:
|
||||
@@ -2563,7 +2636,7 @@ static void peers_release(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
peers_destroy(gpu0, gpu1, peer_caps);
|
||||
}
|
||||
|
||||
static void parent_peers_destroy_nvlink(uvm_parent_gpu_t *parent_gpu)
|
||||
static void parent_peers_destroy_static_link(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_parent_gpu_t *other_parent_gpu;
|
||||
|
||||
@@ -2585,7 +2658,7 @@ static void parent_peers_destroy_nvlink(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS parent_peers_discover_nvlink(uvm_parent_gpu_t *parent_gpu)
|
||||
static NV_STATUS parent_peers_discover_static_link(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_parent_gpu_t *other_parent_gpu;
|
||||
NV_STATUS status;
|
||||
@@ -2617,12 +2690,12 @@ static NV_STATUS parent_peers_discover_nvlink(uvm_parent_gpu_t *parent_gpu)
|
||||
return NV_OK;
|
||||
|
||||
cleanup:
|
||||
parent_peers_destroy_nvlink(parent_gpu);
|
||||
parent_peers_destroy_static_link(parent_gpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void peers_destroy_nvlink(uvm_gpu_t *gpu)
|
||||
static void peers_destroy_static_link(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_parent_gpu_t *other_parent_gpu;
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
@@ -2656,7 +2729,7 @@ static void peers_destroy_nvlink(uvm_gpu_t *gpu)
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS peers_discover_nvlink(uvm_gpu_t *gpu)
|
||||
static NV_STATUS peers_discover_static_link(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = gpu->parent;
|
||||
uvm_parent_gpu_t *other_parent_gpu;
|
||||
@@ -2688,11 +2761,26 @@ static NV_STATUS peers_discover_nvlink(uvm_gpu_t *gpu)
|
||||
return NV_OK;
|
||||
|
||||
cleanup:
|
||||
peers_destroy_nvlink(gpu);
|
||||
peers_destroy_static_link(gpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_gpu_init_access_bits(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return uvm_rm_locked_call(nvUvmInterfaceAccessBitsBufAlloc(parent_gpu->rm_device, &parent_gpu->vab_info));
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_gpu_update_access_bits(uvm_parent_gpu_t *parent_gpu, UVM_ACCESS_BITS_DUMP_MODE mode)
|
||||
{
|
||||
return nvUvmInterfaceAccessBitsDump(parent_gpu->rm_device, &parent_gpu->vab_info, mode);
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_gpu_deinit_access_bits(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return uvm_rm_locked_call(nvUvmInterfaceAccessBitsBufFree(parent_gpu->rm_device, &parent_gpu->vab_info));
|
||||
}
|
||||
|
||||
// Remove a gpu and unregister it from RM
|
||||
// Note that this is also used in most error paths in add_gpu()
|
||||
static void remove_gpu(uvm_gpu_t *gpu)
|
||||
@@ -2700,6 +2788,7 @@ static void remove_gpu(uvm_gpu_t *gpu)
|
||||
NvU32 sub_processor_index;
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
bool free_parent;
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
@@ -2716,12 +2805,17 @@ static void remove_gpu(uvm_gpu_t *gpu)
|
||||
|
||||
free_parent = (parent_gpu->num_retained_gpus == 0);
|
||||
|
||||
if (free_parent && parent_gpu->access_bits_supported) {
|
||||
status = uvm_gpu_deinit_access_bits(parent_gpu);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
// NVLINK peers must be removed and the relevant access counter buffers must
|
||||
// be flushed before removing this GPU from the global table.
|
||||
peers_destroy_nvlink(gpu);
|
||||
peers_destroy_static_link(gpu);
|
||||
|
||||
if (free_parent)
|
||||
parent_peers_destroy_nvlink(parent_gpu);
|
||||
parent_peers_destroy_static_link(parent_gpu);
|
||||
|
||||
// uvm_mem_free and other uvm_mem APIs invoked by the Confidential Compute
|
||||
// deinitialization must be called before the GPU is removed from the global
|
||||
@@ -2865,21 +2959,27 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
|
||||
if (alloc_parent) {
|
||||
status = parent_peers_discover_nvlink(parent_gpu);
|
||||
status = parent_peers_discover_static_link(parent_gpu);
|
||||
if (status != NV_OK)
|
||||
goto error_retained;
|
||||
}
|
||||
|
||||
status = peers_discover_nvlink(gpu);
|
||||
status = peers_discover_static_link(gpu);
|
||||
if (status != NV_OK)
|
||||
goto error_retained;
|
||||
|
||||
*gpu_out = gpu;
|
||||
|
||||
if (alloc_parent && parent_gpu->access_bits_supported) {
|
||||
status = uvm_gpu_init_access_bits(parent_gpu);
|
||||
if (status != NV_OK)
|
||||
goto error_retained;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
||||
error_retained:
|
||||
UVM_ERR_PRINT("Failed to discover NVLINK peers: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
UVM_ERR_PRINT("Failed to discover NVLINK/BAR1 peers: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
|
||||
// Nobody can have retained the GPU yet, since we still hold the
|
||||
// global lock.
|
||||
@@ -2933,10 +3033,6 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
|
||||
if (status != NV_OK)
|
||||
goto error_unregister;
|
||||
|
||||
// TODO: Bug 5262806: Remove this WAR once the bug is fixed.
|
||||
if (gpu_info->accessCntrBufferCount > 1)
|
||||
gpu_info->accessCntrBufferCount = 1;
|
||||
|
||||
if (parent_gpu != NULL) {
|
||||
// If the UUID has been seen before, and if SMC is enabled, then check
|
||||
// if this specific partition has been seen previously. The UUID-based
|
||||
@@ -3082,10 +3178,25 @@ uvm_gpu_phys_address_t uvm_gpu_peer_phys_address(uvm_gpu_t *owning_gpu, NvU64 ad
|
||||
{
|
||||
uvm_aperture_t aperture = uvm_gpu_peer_aperture(accessing_gpu, owning_gpu);
|
||||
|
||||
if (uvm_parent_gpus_are_direct_connected(accessing_gpu->parent, owning_gpu->parent))
|
||||
if (uvm_parent_gpus_are_nvlink_direct_connected(accessing_gpu->parent, owning_gpu->parent)) {
|
||||
UVM_ASSERT(uvm_aperture_is_peer(aperture));
|
||||
address += owning_gpu->parent->peer_address_info.peer_gpa_memory_window_start;
|
||||
else if (uvm_parent_gpus_are_nvswitch_connected(accessing_gpu->parent, owning_gpu->parent))
|
||||
}
|
||||
else if (uvm_parent_gpus_are_nvswitch_connected(accessing_gpu->parent, owning_gpu->parent)) {
|
||||
UVM_ASSERT(uvm_aperture_is_peer(aperture));
|
||||
address += owning_gpu->parent->nvswitch_info.fabric_memory_window_start;
|
||||
}
|
||||
else if (uvm_aperture_is_sys(aperture)) {
|
||||
// BAR1 P2P can use either coherent or non-coherent sysmem,
|
||||
// depending on atomic capabilities of the peer devices.
|
||||
uvm_parent_gpu_peer_t *parent_peer_caps = parent_gpu_peer_caps(accessing_gpu->parent, owning_gpu->parent);
|
||||
int peer_index = (uvm_id_cmp(accessing_gpu->id, owning_gpu->id) < 0) ? 0 : 1;
|
||||
|
||||
UVM_ASSERT(parent_peer_caps->link_type == UVM_GPU_LINK_PCIE_BAR1);
|
||||
UVM_ASSERT(parent_peer_caps->bar1_p2p_dma_size[peer_index] != 0);
|
||||
|
||||
address += parent_peer_caps->bar1_p2p_dma_base_address[peer_index];
|
||||
}
|
||||
|
||||
return uvm_gpu_phys_address(aperture, address);
|
||||
}
|
||||
@@ -3134,6 +3245,69 @@ NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
|
||||
return gpu_peer_caps(gpu0, gpu1)->ref_count;
|
||||
}
|
||||
|
||||
static bool gpu_address_is_coherent_peer(uvm_gpu_t *gpu, uvm_gpu_phys_address_t address)
|
||||
{
|
||||
bool is_peer = false;
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
phys_addr_t phys_addr;
|
||||
|
||||
if (address.aperture != UVM_APERTURE_SYS)
|
||||
return false;
|
||||
|
||||
// GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
|
||||
// either inline, or via ATS.
|
||||
phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
|
||||
|
||||
// Exposed coherent vidmem can be accessed via sys aperture even without
|
||||
// GPUs being explicit peers, so each parent GPU is a potential peer.
|
||||
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
|
||||
for_each_parent_gpu(parent_gpu) {
|
||||
|
||||
if (parent_gpu == gpu->parent)
|
||||
continue;
|
||||
|
||||
if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
|
||||
phys_addr <= parent_gpu->system_bus.memory_window_end) {
|
||||
is_peer = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
|
||||
return is_peer;
|
||||
}
|
||||
|
||||
static bool gpu_phys_address_is_bar1p2p_peer(uvm_gpu_t *gpu, uvm_gpu_phys_address_t address)
|
||||
{
|
||||
bool is_peer = false;
|
||||
uvm_parent_processor_mask_t peer_parent_gpus;
|
||||
uvm_parent_gpu_t *peer_parent_gpu;
|
||||
|
||||
// BAR1 P2P is accessed via sys aperture
|
||||
if (!uvm_aperture_is_sys(address.aperture))
|
||||
return false;
|
||||
|
||||
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
|
||||
uvm_parent_gpus_from_processor_mask(&peer_parent_gpus, &gpu->peer_info.peer_gpu_mask);
|
||||
for_each_parent_gpu_in_mask(peer_parent_gpu, &peer_parent_gpus) {
|
||||
const uvm_parent_gpu_peer_t *peer_caps = parent_gpu_peer_caps(gpu->parent, peer_parent_gpu);
|
||||
const int peer_index = (uvm_parent_id_cmp(gpu->parent->id, peer_parent_gpu->id) < 0) ? 0 : 1;
|
||||
|
||||
UVM_ASSERT(peer_caps->ref_count > 0);
|
||||
if (peer_caps->link_type != UVM_GPU_LINK_PCIE_BAR1)
|
||||
continue;
|
||||
|
||||
if (address.address >= peer_caps->bar1_p2p_dma_base_address[peer_index] &&
|
||||
address.address < (peer_caps->bar1_p2p_dma_base_address[peer_index] + peer_caps->bar1_p2p_dma_size[peer_index])) {
|
||||
is_peer = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
|
||||
|
||||
return is_peer;
|
||||
}
|
||||
|
||||
bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
|
||||
{
|
||||
if (address.is_virtual) {
|
||||
@@ -3145,21 +3319,18 @@ bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
|
||||
}
|
||||
}
|
||||
else {
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
phys_addr_t phys_addr;
|
||||
|
||||
if (uvm_aperture_is_peer(address.aperture)) {
|
||||
uvm_parent_processor_mask_t parent_gpus;
|
||||
uvm_parent_gpu_t *parent_peer_gpu;
|
||||
uvm_parent_processor_mask_t peer_parent_gpus;
|
||||
uvm_parent_gpu_t *peer_parent_gpu;
|
||||
|
||||
// Local EGM accesses don't go over NVLINK
|
||||
if (gpu->parent->egm.enabled && address.aperture == gpu->parent->egm.local_peer_id)
|
||||
return false;
|
||||
|
||||
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
|
||||
uvm_parent_gpus_from_processor_mask(&parent_gpus, &gpu->peer_info.peer_gpu_mask);
|
||||
for_each_parent_gpu_in_mask(parent_peer_gpu, &parent_gpus) {
|
||||
if (!parent_peer_gpu->egm.enabled)
|
||||
uvm_parent_gpus_from_processor_mask(&peer_parent_gpus, &gpu->peer_info.peer_gpu_mask);
|
||||
for_each_parent_gpu_in_mask(peer_parent_gpu, &peer_parent_gpus) {
|
||||
if (!peer_parent_gpu->egm.enabled)
|
||||
continue;
|
||||
|
||||
// EGM uses peer IDs but they are different from VIDMEM peer
|
||||
@@ -3171,32 +3342,18 @@ bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
|
||||
// when accessing EGM memory
|
||||
// TODO: Bug: 5007527 [UVM] Extend STO recovery to EGM enabled
|
||||
// systems
|
||||
UVM_ASSERT(address.aperture != uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu));
|
||||
UVM_ASSERT(address.aperture != uvm_gpu_egm_peer_aperture(gpu->parent, peer_parent_gpu));
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
|
||||
|
||||
return true;
|
||||
} else if (address.aperture == UVM_APERTURE_SYS) {
|
||||
bool is_peer = false;
|
||||
|
||||
// GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
|
||||
// either inline, or via ATS.
|
||||
phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
|
||||
|
||||
// Exposed coherent vidmem can be accessed via sys aperture
|
||||
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
|
||||
for_each_parent_gpu(parent_gpu) {
|
||||
if (parent_gpu == gpu->parent)
|
||||
continue;
|
||||
|
||||
if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
|
||||
phys_addr <= parent_gpu->system_bus.memory_window_end) {
|
||||
is_peer = true;
|
||||
}
|
||||
}
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
return is_peer;
|
||||
}
|
||||
else if (uvm_aperture_is_sys(address.aperture)) {
|
||||
// SYS aperture is used for coherent peers or BAR1 P2P.
|
||||
// SYS_NON_COHERNET aperture is used for BAR1 P2P.
|
||||
uvm_gpu_phys_address_t phys_addr = uvm_gpu_phys_address(address.aperture, address.address);
|
||||
return gpu_address_is_coherent_peer(gpu, phys_addr) || gpu_phys_address_is_bar1p2p_peer(gpu, phys_addr);
|
||||
}
|
||||
|
||||
UVM_ASSERT(address.aperture == UVM_APERTURE_VID);
|
||||
@@ -3927,3 +4084,50 @@ NV_STATUS uvm_test_get_gpu_time(UVM_TEST_GET_GPU_TIME_PARAMS *params, struct fil
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_dump_access_bits(UVM_TEST_DUMP_ACCESS_BITS_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU64 granularity_size_kb = 0;
|
||||
|
||||
gpu = uvm_va_space_retain_gpu_by_uuid(va_space, ¶ms->gpu_uuid);
|
||||
if (!gpu || !gpu->parent->access_bits_supported) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!gpu->parent->vab_info.accessBitsBufferHandle) {
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto done;
|
||||
}
|
||||
|
||||
// See resman/interface/rmapi/finn/ctrl/ctrlc763.finn for 'granularity' enum values
|
||||
granularity_size_kb = (NvU64)(64) << gpu->parent->vab_info.granularity;
|
||||
params->granularity_size_kb = granularity_size_kb;
|
||||
|
||||
status = uvm_gpu_update_access_bits(gpu->parent, params->mode);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// If this is a length query, we are done after we set the length
|
||||
if (params->current_bits_length == 0) {
|
||||
params->current_bits_length = ARRAY_SIZE(gpu->parent->vab_info.currentBits);
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Copy the bits to user space
|
||||
if (copy_to_user(params->current_bits,
|
||||
gpu->parent->vab_info.currentBits,
|
||||
sizeof(NvU64) * params->current_bits_length)) {
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
goto done;
|
||||
}
|
||||
|
||||
done:
|
||||
if (gpu)
|
||||
uvm_gpu_release(gpu);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@@ -615,6 +615,7 @@ typedef enum
|
||||
{
|
||||
UVM_GPU_LINK_INVALID = 0,
|
||||
UVM_GPU_LINK_PCIE,
|
||||
UVM_GPU_LINK_PCIE_BAR1,
|
||||
UVM_GPU_LINK_NVLINK_1,
|
||||
UVM_GPU_LINK_NVLINK_2,
|
||||
UVM_GPU_LINK_NVLINK_3,
|
||||
@@ -996,6 +997,9 @@ struct uvm_parent_gpu_struct
|
||||
// Total amount of physical memory available on the parent GPU.
|
||||
NvU64 max_allocatable_address;
|
||||
|
||||
// Access bits buffer information
|
||||
UvmGpuAccessBitsBufferAlloc vab_info;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM() || defined(NV_MEMORY_DEVICE_COHERENT_PRESENT)
|
||||
uvm_pmm_gpu_devmem_t *devmem;
|
||||
#endif
|
||||
@@ -1069,6 +1073,11 @@ struct uvm_parent_gpu_struct
|
||||
|
||||
bool access_counters_supported;
|
||||
|
||||
// True when HW does not allow mixing different clear types concurrently.
|
||||
bool access_counters_serialize_clear_ops_by_type;
|
||||
|
||||
bool access_bits_supported;
|
||||
|
||||
bool fault_cancel_va_supported;
|
||||
|
||||
// True if the GPU has hardware support for scoped atomics
|
||||
@@ -1209,15 +1218,25 @@ struct uvm_parent_gpu_struct
|
||||
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
|
||||
nv_kthread_q_t lazy_free_q;
|
||||
|
||||
// This is only valid if supports_access_counters is set to true. This array
|
||||
// has rm_info.accessCntrBufferCount entries.
|
||||
uvm_access_counter_buffer_t *access_counter_buffer;
|
||||
uvm_mutex_t access_counters_enablement_lock;
|
||||
struct
|
||||
{
|
||||
// This is only valid if supports_access_counters is set to true. This
|
||||
// array has rm_info.accessCntrBufferCount entries.
|
||||
uvm_access_counter_buffer_t *buffer;
|
||||
uvm_mutex_t enablement_lock;
|
||||
|
||||
// Tracker used to aggregate access counters clear operations, needed for
|
||||
// GPU removal. It is only used when supports_access_counters is set.
|
||||
uvm_tracker_t access_counters_clear_tracker;
|
||||
uvm_mutex_t access_counters_clear_tracker_lock;
|
||||
// Tracker used to aggregate access counters clear operations, needed
|
||||
// for GPU removal. It is used when supports_access_counters is set.
|
||||
uvm_tracker_t clear_tracker;
|
||||
uvm_mutex_t clear_tracker_lock;
|
||||
|
||||
// The following access_counters fields are used when
|
||||
// access_counters_serialize_clear_ops_by_type is set.
|
||||
// The serialize_clear_tracker is not the common case, its use is
|
||||
// decoupled from the clear_tracker (above.)
|
||||
uvm_tracker_t serialize_clear_tracker[UVM_ACCESS_COUNTER_CLEAR_OP_COUNT];
|
||||
uvm_mutex_t serialize_clear_lock;
|
||||
} access_counters;
|
||||
|
||||
// Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
|
||||
NvU32 utlb_per_gpc_count;
|
||||
@@ -1335,6 +1354,14 @@ struct uvm_parent_gpu_struct
|
||||
// only affects ATS systems.
|
||||
bool no_ats_range_required : 1;
|
||||
|
||||
// Page tree initialization requires the initialization of the entire
|
||||
// depth-0 allocated area, not only the HW supported entry count range.
|
||||
// The GMMU page table walk cache operates at its own CL granularity
|
||||
// (32B). We must have an allocated depth-0 page table of at least this
|
||||
// size, regardless of how many entries are supported by HW.
|
||||
// The allocation size is determined by MMU HAL allocation_size().
|
||||
bool gmmu_pt_depth0_init_required : 1;
|
||||
|
||||
// See the comments on uvm_dma_map_invalidation_t
|
||||
uvm_dma_map_invalidation_t dma_map_invalidation;
|
||||
|
||||
@@ -1371,7 +1398,7 @@ struct uvm_parent_gpu_struct
|
||||
struct
|
||||
{
|
||||
// Is the GPU directly connected to peer GPUs.
|
||||
bool is_direct_connected;
|
||||
bool is_nvlink_direct_connected;
|
||||
|
||||
// 48-bit fabric memory physical offset that peer gpus need in order
|
||||
// access to be rounted to the correct peer.
|
||||
@@ -1467,6 +1494,22 @@ typedef struct
|
||||
// iff max(gpu_id_1, gpu_id_2) is EGM-enabled.
|
||||
NvU8 egm_peer_ids[2];
|
||||
|
||||
// IOMMU/DMA mappings of the peer vidmem via bar1. Access to this window
|
||||
// are routed to peer GPU vidmem. The values are provided by RM and RM is
|
||||
// responsible for creating IOMMU mappings if such mappings are required.
|
||||
// RM is also responsible for querying PCIe bus topology and determining
|
||||
// if PCIe atomics are supported between the peers.
|
||||
// These fields are valid for link type UVM_GPU_LINK_PCIE_BAR1, and the
|
||||
// address is only valid if size > 0.
|
||||
// bar1_p2p_dma_base_address[i] provides DMA window used by GPU[i] to
|
||||
// access bar1 region of GPU[1-i].
|
||||
NvU64 bar1_p2p_dma_base_address[2];
|
||||
NvU64 bar1_p2p_dma_size[2];
|
||||
|
||||
// True if GPU[i] can use PCIe atomic operations when accessing BAR1
|
||||
// region of GPU[i-1].
|
||||
bool bar1_p2p_pcie_atomics_enabled[2];
|
||||
|
||||
// The link type between the peer parent GPUs, currently either PCIe or
|
||||
// NVLINK.
|
||||
uvm_gpu_link_type_t link_type;
|
||||
@@ -1580,9 +1623,10 @@ static NvU64 uvm_gpu_retained_count(uvm_gpu_t *gpu)
|
||||
|
||||
// Decrease the refcount on the parent GPU object, and actually delete the
|
||||
// object if the refcount hits zero.
|
||||
void uvm_parent_gpu_kref_put(uvm_parent_gpu_t *gpu);
|
||||
void uvm_parent_gpu_kref_put(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Returns a GPU peer pair index in the range [0 .. UVM_MAX_UNIQUE_GPU_PAIRS).
|
||||
// waiting for any unfinished trackers contained by the parent GPU.
|
||||
void uvm_parent_gpu_sync_trackers(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
static bool uvm_parent_gpu_supports_full_coherence(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
@@ -1591,6 +1635,7 @@ static bool uvm_parent_gpu_supports_full_coherence(uvm_parent_gpu_t *parent_gpu)
|
||||
return parent_gpu->is_integrated_gpu;
|
||||
}
|
||||
|
||||
// Returns a GPU peer pair index in the range [0 .. UVM_MAX_UNIQUE_GPU_PAIRS).
|
||||
NvU32 uvm_gpu_pair_index(const uvm_gpu_id_t id0, const uvm_gpu_id_t id1);
|
||||
|
||||
// Either retains an existing PCIe peer entry or creates a new one. In both
|
||||
@@ -1633,7 +1678,9 @@ uvm_aperture_t uvm_gpu_egm_peer_aperture(uvm_parent_gpu_t *local_gpu, uvm_parent
|
||||
|
||||
bool uvm_parent_gpus_are_nvswitch_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);
|
||||
|
||||
bool uvm_parent_gpus_are_direct_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);
|
||||
bool uvm_parent_gpus_are_bar1_peers(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);
|
||||
|
||||
bool uvm_parent_gpus_are_nvlink_direct_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);
|
||||
|
||||
static bool uvm_gpus_are_smc_peers(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
|
||||
{
|
||||
@@ -1700,7 +1747,7 @@ static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_
|
||||
// Check whether the provided address points to peer memory:
|
||||
// * Physical address using one of the PEER apertures
|
||||
// * Physical address using SYS aperture that belongs to an exposed coherent
|
||||
// memory
|
||||
// memory, or a BAR1 P2P address
|
||||
// * Virtual address in the region [peer_va_base, peer_va_base + peer_va_size)
|
||||
bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address);
|
||||
|
||||
|
||||
@@ -126,15 +126,15 @@ static uvm_access_counter_buffer_t *parent_gpu_access_counter_buffer_get(uvm_par
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
|
||||
UVM_ASSERT(parent_gpu->access_counter_buffer);
|
||||
UVM_ASSERT(parent_gpu->access_counters.buffer);
|
||||
|
||||
return &parent_gpu->access_counter_buffer[notif_buf_index];
|
||||
return &parent_gpu->access_counters.buffer[notif_buf_index];
|
||||
}
|
||||
|
||||
static uvm_access_counter_buffer_t *parent_gpu_access_counter_buffer_get_or_null(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 notif_buf_index)
|
||||
{
|
||||
if (parent_gpu->access_counter_buffer)
|
||||
if (parent_gpu->access_counters.buffer)
|
||||
return parent_gpu_access_counter_buffer_get(parent_gpu, notif_buf_index);
|
||||
return NULL;
|
||||
}
|
||||
@@ -221,9 +221,70 @@ static NV_STATUS parent_gpu_clear_tracker_wait(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_mutex_lock(&parent_gpu->access_counters_clear_tracker_lock);
|
||||
status = uvm_tracker_wait(&parent_gpu->access_counters_clear_tracker);
|
||||
uvm_mutex_unlock(&parent_gpu->access_counters_clear_tracker_lock);
|
||||
uvm_mutex_lock(&parent_gpu->access_counters.clear_tracker_lock);
|
||||
status = uvm_tracker_wait(&parent_gpu->access_counters.clear_tracker);
|
||||
uvm_mutex_unlock(&parent_gpu->access_counters.clear_tracker_lock);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS access_counters_push_begin(uvm_gpu_t *gpu, uvm_push_t *push, uvm_access_counter_clear_op_t clear_op)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_tracker_t *pending_clear_op_tracker = NULL;
|
||||
static const char *push_info_msg[2] = { "Clear access counter: batch",
|
||||
"Clear access counter: all" };
|
||||
|
||||
if (gpu->parent->access_counters_serialize_clear_ops_by_type) {
|
||||
// The following logic only works when we have 2 clear_op options.
|
||||
// Otherwise, we must select the pending clear op tracker.
|
||||
BUILD_BUG_ON(UVM_ACCESS_COUNTER_CLEAR_OP_COUNT != 2);
|
||||
pending_clear_op_tracker = &gpu->parent->access_counters.serialize_clear_tracker[!clear_op];
|
||||
|
||||
// On push_begin (below) success, this lock is released in
|
||||
// access_counters_push_end();
|
||||
uvm_mutex_lock(&gpu->parent->access_counters.serialize_clear_lock);
|
||||
}
|
||||
|
||||
// uvm_push_begin_acquire() is converted to uvm_push_begin() when
|
||||
// pending_clear_op_tracker is NULL. Otherwise, it adds a semaphore acquire
|
||||
// at the push prologue. The semaphore acquire waits until all pending clear
|
||||
// ops are finished before processing the different type clear op. The wait
|
||||
// could be none, if there is no pending clear ops in flight.
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_MEMOPS,
|
||||
pending_clear_op_tracker,
|
||||
push,
|
||||
push_info_msg[clear_op]);
|
||||
if (status != NV_OK && gpu->parent->access_counters_serialize_clear_ops_by_type)
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters.serialize_clear_lock);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS access_counters_push_end(uvm_push_t *push, uvm_access_counter_clear_op_t clear_op)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
uvm_push_end(push);
|
||||
|
||||
if (push->gpu->parent->access_counters_serialize_clear_ops_by_type) {
|
||||
uvm_tracker_t *tracker = &push->gpu->parent->access_counters.serialize_clear_tracker[clear_op];
|
||||
|
||||
uvm_tracker_remove_completed(tracker);
|
||||
status = uvm_tracker_add_push_safe(tracker, push);
|
||||
|
||||
// This lock is acquired in access_counters_push_begin();
|
||||
uvm_mutex_unlock(&push->gpu->parent->access_counters.serialize_clear_lock);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&push->gpu->parent->access_counters.clear_tracker_lock);
|
||||
uvm_tracker_remove_completed(&push->gpu->parent->access_counters.clear_tracker);
|
||||
status = uvm_tracker_add_push_safe(&push->gpu->parent->access_counters.clear_tracker, push);
|
||||
uvm_mutex_unlock(&push->gpu->parent->access_counters.clear_tracker_lock);
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -235,28 +296,19 @@ static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu, uvm_access_counter_buf
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_MEMOPS,
|
||||
&push,
|
||||
"Clear access counter: all");
|
||||
status = access_counters_push_begin(gpu, &push, UVM_ACCESS_COUNTER_CLEAR_OP_ALL);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu),
|
||||
access_counters->index);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
gpu->parent->host_hal->access_counter_clear_all(&push);
|
||||
|
||||
uvm_push_end(&push);
|
||||
|
||||
uvm_mutex_lock(&gpu->parent->access_counters_clear_tracker_lock);
|
||||
uvm_tracker_remove_completed(&gpu->parent->access_counters_clear_tracker);
|
||||
status = uvm_tracker_add_push_safe(&gpu->parent->access_counters_clear_tracker, &push);
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_clear_tracker_lock);
|
||||
|
||||
return status;
|
||||
return access_counters_push_end(&push, UVM_ACCESS_COUNTER_CLEAR_OP_ALL);
|
||||
}
|
||||
|
||||
// Clear the access counter notifications and add it to the per-GPU clear
|
||||
@@ -277,26 +329,20 @@ static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
|
||||
|
||||
UVM_ASSERT(clear_op == UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED);
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch");
|
||||
status = access_counters_push_begin(gpu, &push, UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu),
|
||||
access_counters->index);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_notifications; i++)
|
||||
gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]);
|
||||
|
||||
uvm_push_end(&push);
|
||||
|
||||
uvm_mutex_lock(&gpu->parent->access_counters_clear_tracker_lock);
|
||||
uvm_tracker_remove_completed(&gpu->parent->access_counters_clear_tracker);
|
||||
status = uvm_tracker_add_push_safe(&gpu->parent->access_counters_clear_tracker, &push);
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_clear_tracker_lock);
|
||||
|
||||
return status;
|
||||
return access_counters_push_end(&push, UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED);
|
||||
}
|
||||
|
||||
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
@@ -603,7 +649,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
|
||||
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
|
||||
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
|
||||
uvm_mutex_lock(&gpu->parent->access_counters.enablement_lock);
|
||||
|
||||
if (uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
|
||||
status = NV_OK;
|
||||
@@ -631,7 +677,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
|
||||
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);
|
||||
|
||||
return status;
|
||||
|
||||
@@ -646,7 +692,7 @@ cleanup:
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -703,7 +749,7 @@ void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
|
||||
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
|
||||
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
|
||||
uvm_mutex_lock(&gpu->parent->access_counters.enablement_lock);
|
||||
|
||||
if (uvm_parent_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors,
|
||||
gpu->parent->id)) {
|
||||
@@ -719,7 +765,7 @@ void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
|
||||
}
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);
|
||||
}
|
||||
|
||||
static void write_get(uvm_access_counter_buffer_t *access_counters, NvU32 get)
|
||||
@@ -2054,7 +2100,7 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
|
||||
goto exit_release_gpu;
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
|
||||
uvm_mutex_lock(&gpu->parent->access_counters.enablement_lock);
|
||||
|
||||
for (notif_buf_index = 0; notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
|
||||
@@ -2072,7 +2118,7 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
|
||||
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
|
||||
|
||||
exit_ac_lock:
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);
|
||||
|
||||
exit_release_gpu:
|
||||
uvm_gpu_release(gpu);
|
||||
@@ -2101,15 +2147,15 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
|
||||
goto exit_release_gpu;
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
|
||||
uvm_mutex_lock(&gpu->parent->access_counters.enablement_lock);
|
||||
|
||||
// Access counters not enabled. Nothing to reset
|
||||
if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);
|
||||
goto exit_release_gpu;
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);
|
||||
|
||||
// Clear operations affect all notification buffers, we use the
|
||||
// notif_buf_index = 0;
|
||||
@@ -2233,10 +2279,10 @@ NV_STATUS uvm_test_query_access_counters(UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS *
|
||||
goto exit_release_gpu;
|
||||
}
|
||||
|
||||
buffer_size = gpu->parent->access_counter_buffer[0].rm_info.bufferSize;
|
||||
buffer_size = gpu->parent->access_counters.buffer[0].rm_info.bufferSize;
|
||||
|
||||
for (index = 1; index < gpu->parent->rm_info.accessCntrBufferCount; index++)
|
||||
UVM_ASSERT(gpu->parent->access_counter_buffer[index].rm_info.bufferSize == buffer_size);
|
||||
UVM_ASSERT(gpu->parent->access_counters.buffer[index].rm_info.bufferSize == buffer_size);
|
||||
|
||||
params->num_notification_buffers = gpu->parent->rm_info.accessCntrBufferCount;
|
||||
params->num_notification_entries = buffer_size / gpu->parent->access_counter_buffer_hal->entry_size(gpu->parent);
|
||||
|
||||
@@ -183,7 +183,7 @@ static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu, N
|
||||
nv_kref_get(&parent_gpu->gpu_kref);
|
||||
|
||||
// Interrupts need to be disabled to avoid an interrupt storm
|
||||
uvm_access_counters_intr_disable(&parent_gpu->access_counter_buffer[notif_buf_index]);
|
||||
uvm_access_counters_intr_disable(&parent_gpu->access_counters.buffer[notif_buf_index]);
|
||||
|
||||
nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
|
||||
&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item);
|
||||
@@ -327,12 +327,12 @@ static NV_STATUS uvm_isr_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU3
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->access_counter_buffer[notif_buf_index].batch_service_context.block_service_context.block_context =
|
||||
parent_gpu->access_counters.buffer[notif_buf_index].batch_service_context.block_service_context.block_context =
|
||||
block_context;
|
||||
|
||||
nv_kthread_q_item_init(&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item,
|
||||
access_counters_isr_bottom_half_entry,
|
||||
&parent_gpu->access_counter_buffer[notif_buf_index]);
|
||||
&parent_gpu->access_counters.buffer[notif_buf_index]);
|
||||
|
||||
// Access counters interrupts are initially disabled. They are
|
||||
// dynamically enabled when the GPU is registered on a VA space.
|
||||
@@ -431,9 +431,9 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_buffer)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->access_counter_buffer = uvm_kvmalloc_zero(sizeof(*parent_gpu->access_counter_buffer) *
|
||||
index_count);
|
||||
if (!parent_gpu->access_counter_buffer)
|
||||
parent_gpu->access_counters.buffer = uvm_kvmalloc_zero(sizeof(*parent_gpu->access_counters.buffer) *
|
||||
index_count);
|
||||
if (!parent_gpu->access_counters.buffer)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc)
|
||||
@@ -535,8 +535,8 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
// been successfully initialized.
|
||||
uvm_parent_gpu_deinit_access_counters(parent_gpu, notif_buf_index);
|
||||
|
||||
if (parent_gpu->access_counter_buffer) {
|
||||
uvm_access_counter_buffer_t *access_counter = &parent_gpu->access_counter_buffer[notif_buf_index];
|
||||
if (parent_gpu->access_counters.buffer) {
|
||||
uvm_access_counter_buffer_t *access_counter = &parent_gpu->access_counters.buffer[notif_buf_index];
|
||||
block_context = access_counter->batch_service_context.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
}
|
||||
@@ -546,7 +546,7 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
|
||||
uvm_kvfree(parent_gpu->isr.access_counters);
|
||||
uvm_kvfree(parent_gpu->access_counter_buffer);
|
||||
uvm_kvfree(parent_gpu->access_counters.buffer);
|
||||
}
|
||||
|
||||
if (parent_gpu->non_replayable_faults_supported) {
|
||||
|
||||
@@ -154,6 +154,7 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.id = HOPPER_DMA_COPY_A,
|
||||
.parent_id = AMPERE_DMA_COPY_B,
|
||||
.u.ce_ops = {
|
||||
.phys_mode = uvm_hal_hopper_ce_phys_mode,
|
||||
.semaphore_release = uvm_hal_hopper_ce_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_hopper_ce_semaphore_timestamp,
|
||||
.semaphore_reduction_inc = uvm_hal_hopper_ce_semaphore_reduction_inc,
|
||||
@@ -172,7 +173,9 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
{
|
||||
.id = BLACKWELL_DMA_COPY_A,
|
||||
.parent_id = HOPPER_DMA_COPY_A,
|
||||
.u.ce_ops = {},
|
||||
.u.ce_ops = {
|
||||
.memcopy_is_valid = uvm_hal_blackwell_ce_memcopy_is_valid,
|
||||
},
|
||||
},
|
||||
{
|
||||
.id = BLACKWELL_DMA_COPY_B,
|
||||
@@ -1166,8 +1169,6 @@ void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
|
||||
void uvm_hal_host_l2_invalidate_unsupported(uvm_push_t *push, uvm_aperture_t aperture)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
UVM_ERR_PRINT("L2 cache invalidation: Called on unsupported GPU %s (arch: 0x%x, impl: 0x%x)\n",
|
||||
uvm_gpu_name(gpu), gpu->parent->rm_info.gpuArch, gpu->parent->rm_info.gpuImplementation);
|
||||
UVM_ASSERT_MSG(false, "L2 invalidate is not supported on %s",
|
||||
uvm_parent_gpu_name(gpu->parent));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -348,6 +348,7 @@ void uvm_hal_hopper_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 of
|
||||
typedef NvU32 (*uvm_hal_ce_phys_mode_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
NvU32 uvm_hal_maxwell_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
NvU32 uvm_hal_ampere_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
NvU32 uvm_hal_hopper_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
|
||||
typedef NvU32 (*uvm_hal_ce_plc_mode_t)(void);
|
||||
NvU32 uvm_hal_maxwell_ce_plc_mode(void);
|
||||
@@ -368,6 +369,7 @@ typedef bool (*uvm_hal_ce_memcopy_is_valid)(uvm_push_t *push, uvm_gpu_address_t
|
||||
bool uvm_hal_maxwell_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_blackwell_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
|
||||
// Patching of the memcopy source; if not needed for a given architecture use
|
||||
// the (empty) uvm_hal_ce_memcopy_patch_src_stub implementation
|
||||
|
||||
@@ -50,6 +50,10 @@ typedef enum
|
||||
// It is directly encoded as SYS_COH in PTEs and CE/esched methods.
|
||||
UVM_APERTURE_SYS,
|
||||
|
||||
// SYS_NON_COHERENT aperture is used when we must prevent PCIe atomics from
|
||||
// being issued to BAR1 P2P addresses. It's only used to control the use of
|
||||
// atomics with no other impact on the coherence model.
|
||||
//
|
||||
// On platforms that support the GPU coherently caching system memory,
|
||||
// SYS_NON_COHERENT prevents other clients from snooping the GPU L2 cache.
|
||||
// This allows noncoherent caching of system memory by GPUs on these
|
||||
@@ -93,6 +97,11 @@ typedef enum
|
||||
|
||||
const char *uvm_aperture_string(uvm_aperture_t aperture);
|
||||
|
||||
static bool uvm_aperture_is_sys(uvm_aperture_t aperture)
|
||||
{
|
||||
return (aperture == UVM_APERTURE_SYS) || (aperture == UVM_APERTURE_SYS_NON_COHERENT);
|
||||
}
|
||||
|
||||
static bool uvm_aperture_is_peer(uvm_aperture_t aperture)
|
||||
{
|
||||
return (aperture >= UVM_APERTURE_PEER_0) && (aperture < UVM_APERTURE_PEER_MAX);
|
||||
@@ -514,9 +523,9 @@ static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_ACCESS_COUNTER_CLEAR_OP_NONE = 0,
|
||||
UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED,
|
||||
UVM_ACCESS_COUNTER_CLEAR_OP_ALL
|
||||
UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED = 0,
|
||||
UVM_ACCESS_COUNTER_CLEAR_OP_ALL,
|
||||
UVM_ACCESS_COUNTER_CLEAR_OP_COUNT,
|
||||
} uvm_access_counter_clear_op_t;
|
||||
|
||||
struct uvm_access_counter_buffer_entry_struct
|
||||
|
||||
@@ -1758,7 +1758,6 @@ static void gpu_chunk_free(uvm_va_block_t *va_block,
|
||||
return;
|
||||
|
||||
UVM_ASSERT(gpu_chunk->va_block == va_block);
|
||||
UVM_ASSERT(gpu_chunk->va_block_page_index == page_index);
|
||||
|
||||
uvm_mmu_chunk_unmap(gpu_chunk, &va_block->tracker);
|
||||
gpu_state->chunks[page_index] = NULL;
|
||||
@@ -1867,7 +1866,6 @@ static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
|
||||
|
||||
if (gpu_state->chunks[page_index] == gpu_chunk) {
|
||||
UVM_ASSERT(gpu_chunk->va_block == va_block);
|
||||
UVM_ASSERT(gpu_chunk->va_block_page_index == page_index);
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -1887,11 +1885,7 @@ static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
|
||||
uvm_processor_mask_set(&va_block->resident, gpu->id);
|
||||
uvm_page_mask_set(&gpu_state->resident, page_index);
|
||||
|
||||
// It is safe to modify the page index field without holding any PMM locks
|
||||
// because the chunk is allocated, which means that none of the other
|
||||
// fields in the bitmap can change.
|
||||
gpu_chunk->va_block = va_block;
|
||||
gpu_chunk->va_block_page_index = page_index;
|
||||
|
||||
gpu_state->chunks[page_index] = gpu_chunk;
|
||||
|
||||
|
||||
@@ -95,6 +95,10 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_serialize_clear_ops_by_type = false;
|
||||
|
||||
parent_gpu->access_bits_supported = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
@@ -113,6 +117,8 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->ats.no_ats_range_required = true;
|
||||
|
||||
parent_gpu->ats.gmmu_pt_depth0_init_required = false;
|
||||
|
||||
// Hopper doesn't prefetch translations for physical requests, so the only
|
||||
// concern would be if we enabled physical ATS with 4K pages. In that case
|
||||
// we could see a mix of cached valid and invalid translations in the same
|
||||
|
||||
@@ -34,22 +34,68 @@ static NvU32 ce_aperture(uvm_aperture_t aperture)
|
||||
HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
|
||||
BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM) !=
|
||||
HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
|
||||
BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, NONCOHERENT_SYSMEM) !=
|
||||
HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, NONCOHERENT_SYSMEM));
|
||||
BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) !=
|
||||
HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, PEERMEM));
|
||||
|
||||
if (aperture == UVM_APERTURE_SYS) {
|
||||
return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM);
|
||||
}
|
||||
else if (aperture == UVM_APERTURE_SYS_NON_COHERENT) {
|
||||
// SYS_NON_COHERENT aperture is currently only used for certain
|
||||
// BAR1 P2P addresses. The use of SYS vs. SYS_NON_COHERENT aperture
|
||||
// controls the ability to use PCIe atomics to access the BAR1 region.
|
||||
// The only way to potentially use atomic operations in UVM is a
|
||||
// semaphore reduction operation.
|
||||
// Since UVM doesn't use semaphore operations on peer (or physical)
|
||||
// addresses, it'd be safe to encode SYS_NON_COHERENT aperture as
|
||||
// COHERENT_SYSMEM for CE methods.
|
||||
// NONCOHERENT_SYSMEM encoding is used for correctness and potential
|
||||
// future use of SYS_NON_COHERENT aperture outside of atomics control
|
||||
// in BAR1 P2P.
|
||||
return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, NONCOHERENT_SYSMEM);
|
||||
}
|
||||
else if (aperture == UVM_APERTURE_VID) {
|
||||
return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(uvm_aperture_is_peer(aperture));
|
||||
return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) |
|
||||
HWVALUE(C8B5, SET_SRC_PHYS_MODE, FLA, 0) |
|
||||
HWVALUE(C8B5, SET_SRC_PHYS_MODE, PEER_ID, UVM_APERTURE_PEER_ID(aperture));
|
||||
}
|
||||
}
|
||||
|
||||
// Push SET_{SRC,DST}_PHYS mode if needed and return LAUNCH_DMA_{SRC,DST}_TYPE
|
||||
// flags
|
||||
NvU32 uvm_hal_hopper_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
NvU32 launch_dma_src_dst_type = 0;
|
||||
|
||||
if (src.is_virtual)
|
||||
launch_dma_src_dst_type |= HWCONST(C8B5, LAUNCH_DMA, SRC_TYPE, VIRTUAL);
|
||||
else
|
||||
launch_dma_src_dst_type |= HWCONST(C8B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL);
|
||||
|
||||
if (dst.is_virtual)
|
||||
launch_dma_src_dst_type |= HWCONST(C8B5, LAUNCH_DMA, DST_TYPE, VIRTUAL);
|
||||
else
|
||||
launch_dma_src_dst_type |= HWCONST(C8B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
|
||||
|
||||
if (!src.is_virtual && !dst.is_virtual) {
|
||||
NV_PUSH_2U(C8B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture),
|
||||
SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
|
||||
}
|
||||
else if (!src.is_virtual) {
|
||||
NV_PUSH_1U(C8B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture));
|
||||
}
|
||||
else if (!dst.is_virtual) {
|
||||
NV_PUSH_1U(C8B5, SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
|
||||
}
|
||||
|
||||
return launch_dma_src_dst_type;
|
||||
}
|
||||
void uvm_hal_hopper_ce_offset_out(uvm_push_t *push, NvU64 offset_out)
|
||||
{
|
||||
NV_PUSH_2U(C8B5, OFFSET_OUT_UPPER, HWVALUE(C8B5, OFFSET_OUT_UPPER, UPPER, NvOffset_HI32(offset_out)),
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2024 NVIDIA Corporation
|
||||
Copyright (c) 2020-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -121,7 +121,10 @@ static NvLength allocation_size_hopper(NvU32 depth, NvU64 page_size)
|
||||
if (depth == 5 && page_size == UVM_PAGE_SIZE_64K)
|
||||
return 256;
|
||||
|
||||
// depth 0 requires only a 16-byte allocation, but it must be 4k aligned.
|
||||
// depth 0 requires only a 16-byte allocation to house the 2 entries at the
|
||||
// 0-depth level. Due to HW constraints, the effective minimum allocation
|
||||
// size is 32 bytes. It must be 4k aligned.
|
||||
// See comments in uvm_gpu.h:gmmu_pt_depth0_init_required for details.
|
||||
return 4096;
|
||||
}
|
||||
|
||||
@@ -250,7 +253,7 @@ static NvU64 unmapped_pte_hopper(NvU64 page_size)
|
||||
HWCONST64(_MMU_VER3, PTE, PCF, NO_VALID_4KB_PAGE);
|
||||
}
|
||||
|
||||
static NvU64 poisoned_pte_hopper(void)
|
||||
static NvU64 poisoned_pte_hopper(uvm_page_tree_t *tree)
|
||||
{
|
||||
// An invalid PTE won't be fatal from faultable units like SM, which is the
|
||||
// most likely source of bad PTE accesses.
|
||||
@@ -264,7 +267,7 @@ static NvU64 poisoned_pte_hopper(void)
|
||||
// be aligned to page_size.
|
||||
NvU64 phys_addr = 0x2bad000000ULL;
|
||||
|
||||
NvU64 pte_bits = make_pte_hopper(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
|
||||
NvU64 pte_bits = tree->hal->make_pte(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
|
||||
return WRITE_HWCONST64(pte_bits, _MMU_VER3, PTE, PCF, PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD);
|
||||
}
|
||||
|
||||
@@ -380,10 +383,10 @@ static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_
|
||||
pde_bits |= HWCONST64(_MMU_VER3, PDE, APERTURE, VIDEO_MEMORY);
|
||||
break;
|
||||
case UVM_APERTURE_SYS_NON_COHERENT:
|
||||
// SYS_NON_COHERENT aperture is currently only used for testing
|
||||
// in kernel_driver_get_rm_ptes. Since UVM never places page
|
||||
// tables in peer memory. SYS_NON_COHERENT should never be used
|
||||
// in PDEs.
|
||||
// SYS_NON_COHERENT aperture is currently only used for some
|
||||
// BAR1 P2P addresses and testing in kernel_driver_get_rm_ptes.
|
||||
// Since UVM never places page tables in peer memory,
|
||||
// SYS_NON_COHERENT should never be used in PDEs.
|
||||
// falls through
|
||||
default:
|
||||
UVM_ASSERT_MSG(0, "Invalid aperture: %d\n", phys_alloc->addr.aperture);
|
||||
@@ -415,10 +418,10 @@ static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_pag
|
||||
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, APERTURE_BIG, VIDEO_MEMORY);
|
||||
break;
|
||||
case UVM_APERTURE_SYS_NON_COHERENT:
|
||||
// SYS_NON_COHERENT aperture is currently only used for testing
|
||||
// in kernel_driver_get_rm_ptes. Since UVM never places page
|
||||
// tables in peer memory. SYS_NON_COHERENT should never be used
|
||||
// in PDEs.
|
||||
// SYS_NON_COHERENT aperture is currently only used for some
|
||||
// BAR1 P2P addresses and testing in kernel_driver_get_rm_ptes.
|
||||
// Since UVM never places page tables in peer memory,
|
||||
// SYS_NON_COHERENT should never be used in PDEs.
|
||||
// falls through
|
||||
default:
|
||||
UVM_ASSERT_MSG(0, "Invalid big aperture %d\n", phys_alloc->addr.aperture);
|
||||
@@ -453,10 +456,10 @@ static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_p
|
||||
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, APERTURE_SMALL, VIDEO_MEMORY);
|
||||
break;
|
||||
case UVM_APERTURE_SYS_NON_COHERENT:
|
||||
// SYS_NON_COHERENT aperture is currently only used for testing
|
||||
// in kernel_driver_get_rm_ptes. Since UVM never places page
|
||||
// tables in peer memory. SYS_NON_COHERENT should never be used
|
||||
// in PDEs.
|
||||
// SYS_NON_COHERENT aperture is currently only used for some
|
||||
// BAR1 P2P addresses and testing in kernel_driver_get_rm_ptes.
|
||||
// Since UVM never places page tables in peer memory,
|
||||
// SYS_NON_COHERENT should never be used in PDEs.
|
||||
// falls through
|
||||
default:
|
||||
UVM_ASSERT_MSG(0, "Invalid small aperture %d\n", phys_alloc->addr.aperture);
|
||||
|
||||
@@ -147,40 +147,6 @@ typedef struct
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_SET_STREAM_STOPPED_PARAMS;
|
||||
|
||||
//
|
||||
// UvmCallTestFunction
|
||||
//
|
||||
#define UVM_RUN_TEST UVM_IOCTL_BASE(9)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvProcessorUuid gpuUuid; // IN
|
||||
NvU32 test; // IN
|
||||
struct
|
||||
{
|
||||
NvProcessorUuid peerGpuUuid; // IN
|
||||
NvU32 peerId; // IN
|
||||
} multiGpu;
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_RUN_TEST_PARAMS;
|
||||
|
||||
//
|
||||
// This is a magic offset for mmap. Any mapping of an offset above this
|
||||
// threshold will be treated as a counters mapping, not as an allocation
|
||||
// mapping. Since allocation offsets must be identical to the virtual address
|
||||
// of the mapping, this threshold has to be an offset that cannot be
|
||||
// a valid virtual address.
|
||||
//
|
||||
#if defined(__linux__)
|
||||
#if defined(NV_64_BITS)
|
||||
#define UVM_EVENTS_OFFSET_BASE (1UL << 63)
|
||||
#define UVM_COUNTERS_OFFSET_BASE (1UL << 62)
|
||||
#else
|
||||
#define UVM_EVENTS_OFFSET_BASE (1UL << 31)
|
||||
#define UVM_COUNTERS_OFFSET_BASE (1UL << 30)
|
||||
#endif
|
||||
#endif // defined(__linux___)
|
||||
|
||||
//
|
||||
// UvmAddSession
|
||||
//
|
||||
@@ -189,27 +155,9 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
NvU32 pidTarget; // IN
|
||||
#ifdef __linux__
|
||||
NvP64 countersBaseAddress NV_ALIGN_BYTES(8); // IN
|
||||
NvS32 sessionIndex; // OUT (session index that got added)
|
||||
#endif
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_ADD_SESSION_PARAMS;
|
||||
|
||||
//
|
||||
// UvmRemoveSession
|
||||
//
|
||||
#define UVM_REMOVE_SESSION UVM_IOCTL_BASE(11)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#ifdef __linux__
|
||||
NvS32 sessionIndex; // IN (session index to be removed)
|
||||
#endif
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_REMOVE_SESSION_PARAMS;
|
||||
|
||||
|
||||
#define UVM_MAX_COUNTERS_PER_IOCTL_CALL 32
|
||||
|
||||
//
|
||||
@@ -219,9 +167,6 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#ifdef __linux__
|
||||
NvS32 sessionIndex; // IN
|
||||
#endif
|
||||
UvmCounterConfig config[UVM_MAX_COUNTERS_PER_IOCTL_CALL]; // IN
|
||||
NvU32 count; // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
@@ -234,9 +179,6 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#ifdef __linux__
|
||||
NvS32 sessionIndex; // IN
|
||||
#endif
|
||||
NvU32 scope; // IN (UvmCounterScope)
|
||||
NvU32 counterName; // IN (UvmCounterName)
|
||||
NvProcessorUuid gpuUuid; // IN
|
||||
@@ -251,15 +193,10 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#ifdef __linux__
|
||||
NvS32 sessionIndex; // IN
|
||||
#endif
|
||||
NvU32 eventQueueIndex; // OUT
|
||||
NvU64 queueSize NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 notificationCount NV_ALIGN_BYTES(8); // IN
|
||||
#if defined(WIN32) || defined(WIN64)
|
||||
NvU64 notificationHandle NV_ALIGN_BYTES(8); // IN
|
||||
#endif
|
||||
NvU32 timeStampType; // IN (UvmEventTimeStampType)
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_CREATE_EVENT_QUEUE_PARAMS;
|
||||
@@ -271,9 +208,6 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#ifdef __linux__
|
||||
NvS32 sessionIndex; // IN
|
||||
#endif
|
||||
NvU32 eventQueueIndex; // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_REMOVE_EVENT_QUEUE_PARAMS;
|
||||
@@ -285,9 +219,6 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#ifdef __linux__
|
||||
NvS32 sessionIndex; // IN
|
||||
#endif
|
||||
NvU32 eventQueueIndex; // IN
|
||||
NvP64 userRODataAddr NV_ALIGN_BYTES(8); // IN
|
||||
NvP64 userRWDataAddr NV_ALIGN_BYTES(8); // IN
|
||||
@@ -304,39 +235,12 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#ifdef __linux__
|
||||
NvS32 sessionIndex; // IN
|
||||
#endif
|
||||
NvU32 eventQueueIndex; // IN
|
||||
NvS32 eventType; // IN
|
||||
NvU32 enable; // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_EVENT_CTRL_PARAMS;
|
||||
|
||||
//
|
||||
// UvmRegisterMpsServer
|
||||
//
|
||||
#define UVM_REGISTER_MPS_SERVER UVM_IOCTL_BASE(18)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS_V1]; // IN
|
||||
NvU32 numGpus; // IN
|
||||
NvU64 serverId NV_ALIGN_BYTES(8); // OUT
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_REGISTER_MPS_SERVER_PARAMS;
|
||||
|
||||
//
|
||||
// UvmRegisterMpsClient
|
||||
//
|
||||
#define UVM_REGISTER_MPS_CLIENT UVM_IOCTL_BASE(19)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU64 serverId NV_ALIGN_BYTES(8); // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_REGISTER_MPS_CLIENT_PARAMS;
|
||||
|
||||
//
|
||||
// UvmEventGetGpuUuidTable
|
||||
//
|
||||
@@ -442,7 +346,6 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvProcessorUuid gpuUuid; // IN
|
||||
NvHandle hClient; // IN
|
||||
NvHandle hChannel; // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
@@ -510,7 +413,6 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
NvU64 base NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 length NV_ALIGN_BYTES(8); // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_FREE_PARAMS;
|
||||
|
||||
@@ -1128,8 +1030,8 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
NvU64 base NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 length; // IN
|
||||
NvU64 flags; // IN
|
||||
NvU64 length NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 flags NV_ALIGN_BYTES(8); // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_DISCARD_PARAMS;
|
||||
|
||||
|
||||
@@ -40,10 +40,6 @@
|
||||
#include "nv-linux.h"
|
||||
|
||||
#include <linux/log2.h>
|
||||
#if defined(NV_PRIO_TREE_PRESENT)
|
||||
#include <linux/prio_tree.h>
|
||||
#endif
|
||||
|
||||
#include <linux/jhash.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/rbtree.h>
|
||||
@@ -117,21 +113,14 @@
|
||||
#define NVIDIA_UVM_PRETTY_PRINTING_PREFIX "nvidia-uvm: "
|
||||
#define pr_fmt(fmt) NVIDIA_UVM_PRETTY_PRINTING_PREFIX fmt
|
||||
|
||||
// Dummy printing function that maintains syntax and format specifier checking
|
||||
// but doesn't print anything and doesn't evaluate the print parameters. This is
|
||||
// roughly equivalent to the kernel's no_printk function. We use this instead
|
||||
// because:
|
||||
// 1) no_printk was not available until 2.6.36
|
||||
// 2) Until 4.5 no_printk was implemented as a static function, meaning its
|
||||
// parameters were always evaluated
|
||||
#define UVM_NO_PRINT(fmt, ...) \
|
||||
do { \
|
||||
if (0) \
|
||||
printk(fmt, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define NV_UVM_GFP_FLAGS (GFP_KERNEL | __GFP_NOMEMALLOC)
|
||||
|
||||
#if defined(NV_GET_DEV_PAGEMAP_HAS_PGMAP_ARG)
|
||||
#define NV_GET_DEV_PAGEMAP(pfn) get_dev_pagemap(pfn, NULL)
|
||||
#else
|
||||
#define NV_GET_DEV_PAGEMAP get_dev_pagemap
|
||||
#endif
|
||||
|
||||
/* Return a nanosecond-precise value */
|
||||
static inline NvU64 NV_GETTIME(void)
|
||||
{
|
||||
@@ -141,25 +130,6 @@ static inline NvU64 NV_GETTIME(void)
|
||||
return (NvU64) timespec64_to_ns(&tm);
|
||||
}
|
||||
|
||||
// atomic_long_read_acquire and atomic_long_set_release were added in commit
|
||||
// b5d47ef9ea5c5fe31d7eabeb79f697629bd9e2cb ("locking/atomics: Switch to
|
||||
// generated atomic-long") in v5.1 (2019-05-05).
|
||||
// TODO: Bug 3849079: We always use these definitions on newer kernels.
|
||||
#define atomic_long_read_acquire uvm_atomic_long_read_acquire
|
||||
static inline long uvm_atomic_long_read_acquire(atomic_long_t *p)
|
||||
{
|
||||
long val = atomic_long_read(p);
|
||||
smp_mb();
|
||||
return val;
|
||||
}
|
||||
|
||||
#define atomic_long_set_release uvm_atomic_long_set_release
|
||||
static inline void uvm_atomic_long_set_release(atomic_long_t *p, long v)
|
||||
{
|
||||
smp_mb();
|
||||
atomic_long_set(p, v);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
struct mem_cgroup *new_memcg;
|
||||
|
||||
@@ -407,6 +407,11 @@
|
||||
// Operations allowed while holding this lock
|
||||
// - Pushing work to SEC2 channels
|
||||
//
|
||||
// - Access counters clear operations
|
||||
// Order: UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS
|
||||
//
|
||||
// It protects the parent_gpu's access counters clear tracker.
|
||||
//
|
||||
// - Concurrent push semaphore
|
||||
// Order: UVM_LOCK_ORDER_PUSH
|
||||
// Semaphore (uvm_semaphore_t)
|
||||
@@ -432,11 +437,6 @@
|
||||
// Order: UVM_LOCK_ORDER_PMM_ROOT_CHUNK
|
||||
// Exclusive bitlock (mutex) per each root chunk internal to PMM.
|
||||
//
|
||||
// - Access counters clear operations
|
||||
// Order: UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS
|
||||
//
|
||||
// It protects the parent_gpu's access counters clear tracker.
|
||||
//
|
||||
// - Channel lock
|
||||
// Order: UVM_LOCK_ORDER_CHANNEL
|
||||
// Spinlock (uvm_spinlock_t) or exclusive lock (mutex)
|
||||
@@ -524,11 +524,11 @@ typedef enum
|
||||
UVM_LOCK_ORDER_KEY_ROTATION_WLC,
|
||||
UVM_LOCK_ORDER_CSL_WLC_PUSH,
|
||||
UVM_LOCK_ORDER_CSL_SEC2_PUSH,
|
||||
UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS,
|
||||
UVM_LOCK_ORDER_PUSH,
|
||||
UVM_LOCK_ORDER_PMM,
|
||||
UVM_LOCK_ORDER_PMM_PMA,
|
||||
UVM_LOCK_ORDER_PMM_ROOT_CHUNK,
|
||||
UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS,
|
||||
UVM_LOCK_ORDER_CHANNEL,
|
||||
UVM_LOCK_ORDER_WLC_CHANNEL,
|
||||
UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST,
|
||||
|
||||
@@ -61,6 +61,10 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = false;
|
||||
|
||||
parent_gpu->access_counters_serialize_clear_ops_by_type = false;
|
||||
|
||||
parent_gpu->access_bits_supported = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = false;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = false;
|
||||
@@ -75,5 +79,7 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->ats.no_ats_range_required = false;
|
||||
|
||||
parent_gpu->ats.gmmu_pt_depth0_init_required = false;
|
||||
|
||||
parent_gpu->conf_computing.per_channel_key_rotation = false;
|
||||
}
|
||||
|
||||
@@ -268,7 +268,7 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
|
||||
// Check if the copy is over NVLINK and simulate dropped traffic if there's
|
||||
// an NVLINK error.
|
||||
// Src address cannot be peer as that wouldn't pass the valid check above.
|
||||
if (uvm_gpu_address_is_peer(gpu, dst) && uvm_gpu_get_injected_nvlink_error(gpu) != NV_OK)
|
||||
if (uvm_gpu_get_injected_nvlink_error(gpu) != NV_OK && uvm_gpu_address_is_peer(gpu, dst))
|
||||
size = 0;
|
||||
|
||||
gpu->parent->ce_hal->memcopy_patch_src(push, &src);
|
||||
|
||||
@@ -352,7 +352,7 @@ uvm_hal_maxwell_access_counter_query_clear_op_unsupported(uvm_parent_gpu_t *pare
|
||||
NvU32 num_entries)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "host access_counter_query_clear_op called on Maxwell GPU\n");
|
||||
return UVM_ACCESS_COUNTER_CLEAR_OP_NONE;
|
||||
return UVM_ACCESS_COUNTER_CLEAR_OP_COUNT;
|
||||
}
|
||||
|
||||
NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -253,7 +253,7 @@ static NvU64 make_pte_maxwell(uvm_aperture_t aperture, NvU64 address, uvm_prot_t
|
||||
else
|
||||
pte_bits |= HWCONST64(_MMU, PTE, VOL, TRUE);
|
||||
|
||||
// aperture 34:32
|
||||
// aperture 34:33
|
||||
if (aperture == UVM_APERTURE_SYS)
|
||||
aperture_bits = NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY;
|
||||
else if (aperture == UVM_APERTURE_VID)
|
||||
@@ -296,7 +296,7 @@ static NvU64 make_sked_reflected_pte_maxwell(void)
|
||||
return pte_bits;
|
||||
}
|
||||
|
||||
static NvU64 poisoned_pte_maxwell(void)
|
||||
static NvU64 poisoned_pte_maxwell(uvm_page_tree_t *tree)
|
||||
{
|
||||
// An invalid PTE is also fatal on Maxwell, but a PRIV violation will
|
||||
// immediately identify bad PTE usage.
|
||||
@@ -309,7 +309,7 @@ static NvU64 poisoned_pte_maxwell(void)
|
||||
// This address has to fit within 37 bits (max address width of vidmem) and
|
||||
// be aligned to page_size.
|
||||
NvU64 phys_addr = 0x1bad000000ULL;
|
||||
NvU64 pte_bits = make_pte_maxwell(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
|
||||
NvU64 pte_bits = tree->hal->make_pte(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
|
||||
|
||||
return WRITE_HWCONST64(pte_bits, _MMU, PTE, PRIVILEGE, TRUE);
|
||||
}
|
||||
@@ -317,8 +317,13 @@ static NvU64 poisoned_pte_maxwell(void)
|
||||
// Sparse mappings are not supported.
|
||||
static NvU64 make_sparse_pte_maxwell_unsupported(void)
|
||||
{
|
||||
NvU64 pte_bits;
|
||||
|
||||
UVM_ASSERT_MSG(0, "Sparse mappings unsupported on pre-Pascal GPUs\n");
|
||||
return poisoned_pte_maxwell();
|
||||
|
||||
pte_bits = HWCONST64(_MMU, PTE, VALID, FALSE);
|
||||
|
||||
return pte_bits;
|
||||
}
|
||||
|
||||
static uvm_mmu_mode_hal_t maxwell_64_mmu_mode_hal =
|
||||
|
||||
@@ -588,7 +588,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
service_context->block_context->make_resident.dest_nid))
|
||||
skipped_migrate = true;
|
||||
}
|
||||
else if (uvm_processor_mask_test(&managed_range->va_range.uvm_lite_gpus, dest_id) &&
|
||||
else if (uvm_processor_mask_test(&managed_range->uvm_lite_gpus, dest_id) &&
|
||||
!uvm_va_policy_preferred_location_equal(policy, dest_id, NUMA_NO_NODE)) {
|
||||
// Don't migrate to a non-faultable GPU that is in UVM-Lite mode,
|
||||
// unless it's the preferred location
|
||||
|
||||
@@ -43,8 +43,6 @@
|
||||
|
||||
#ifdef UVM_MIGRATE_VMA_SUPPORTED
|
||||
|
||||
static struct kmem_cache *g_uvm_migrate_vma_state_cache __read_mostly;
|
||||
|
||||
static const gfp_t g_migrate_vma_gfp_flags = NV_UVM_GFP_FLAGS | GFP_HIGHUSER_MOVABLE | __GFP_THISNODE;
|
||||
|
||||
static uvm_sgt_t *uvm_select_sgt(uvm_processor_id_t src_id, int src_nid, migrate_vma_state_t *state)
|
||||
@@ -1497,7 +1495,7 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
|
||||
uvm_migrate_args->dst_node_id = uvm_gpu_numa_node(gpu);
|
||||
}
|
||||
|
||||
state = nv_kmem_cache_zalloc(g_uvm_migrate_vma_state_cache, NV_UVM_GFP_FLAGS);
|
||||
state = uvm_kvmalloc_zero(sizeof(migrate_vma_state_t));
|
||||
if (!state)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
@@ -1519,22 +1517,17 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
|
||||
out:
|
||||
uvm_kvfree(state->dma.sgt_cpu);
|
||||
uvm_kvfree(state->cpu_page_mask);
|
||||
kmem_cache_free(g_uvm_migrate_vma_state_cache, state);
|
||||
uvm_kvfree(state);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_migrate_pageable_init(void)
|
||||
{
|
||||
g_uvm_migrate_vma_state_cache = NV_KMEM_CACHE_CREATE("migrate_vma_state_t", migrate_vma_state_t);
|
||||
if (!g_uvm_migrate_vma_state_cache)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_migrate_pageable_exit(void)
|
||||
{
|
||||
kmem_cache_destroy_safe(&g_uvm_migrate_vma_state_cache);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -152,9 +152,10 @@ static NV_STATUS phys_mem_allocate_sysmem(uvm_page_tree_t *tree, NvLength size,
|
||||
}
|
||||
|
||||
// The aperture may filter the biggest page size:
|
||||
// - UVM_APERTURE_VID biggest page size on vidmem mappings
|
||||
// - UVM_APERTURE_SYS biggest page size on sysmem mappings
|
||||
// - UVM_APERTURE_PEER_0-7 biggest page size on peer mappings
|
||||
// - UVM_APERTURE_VID biggest page size on vidmem mappings
|
||||
// - UVM_APERTURE_SYS biggest page size on sysmem mappings
|
||||
// - UVM_APERTURE_SYS_NON_COHERENT biggest page size on BAR1 mappings
|
||||
// - UVM_APERTURE_PEER_0-7 biggest page size on peer mappings
|
||||
static NvU64 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
|
||||
{
|
||||
UVM_ASSERT(aperture < UVM_APERTURE_DEFAULT);
|
||||
@@ -306,7 +307,7 @@ static void *uvm_mmu_page_table_cpu_map(uvm_gpu_t *gpu, uvm_mmu_page_table_alloc
|
||||
}
|
||||
else {
|
||||
NvU64 page_offset = offset_in_page(phys_alloc->addr.address);
|
||||
return kmap(uvm_mmu_page_table_page(gpu, phys_alloc)) + page_offset;
|
||||
return (char *)kmap(uvm_mmu_page_table_page(gpu, phys_alloc)) + page_offset;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -392,7 +393,7 @@ static void pde_fill_gpu(uvm_page_tree_t *tree,
|
||||
NvU64 pde_data[2], entry_size;
|
||||
uvm_gpu_address_t pde_entry_addr = uvm_mmu_gpu_address(tree->gpu, directory->phys_alloc.addr);
|
||||
NvU32 max_inline_entries;
|
||||
uvm_push_flag_t push_membar_flag = UVM_PUSH_FLAG_COUNT;
|
||||
uvm_membar_t push_membar;
|
||||
uvm_gpu_address_t inline_data_addr;
|
||||
uvm_push_inline_data_t inline_data;
|
||||
NvU32 entry_count, i, j;
|
||||
@@ -403,12 +404,7 @@ static void pde_fill_gpu(uvm_page_tree_t *tree,
|
||||
UVM_ASSERT(sizeof(pde_data) >= entry_size);
|
||||
|
||||
max_inline_entries = UVM_PUSH_INLINE_DATA_MAX_SIZE / entry_size;
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
|
||||
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_NONE;
|
||||
else if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
|
||||
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_GPU;
|
||||
|
||||
push_membar = uvm_push_get_and_reset_membar_flag(push);
|
||||
pde_entry_addr.address += start_index * entry_size;
|
||||
|
||||
for (i = 0; i < pde_count;) {
|
||||
@@ -420,11 +416,11 @@ static void pde_fill_gpu(uvm_page_tree_t *tree,
|
||||
entry_count = min(pde_count - i, max_inline_entries);
|
||||
|
||||
// No membar is needed until the last memory operation. Otherwise,
|
||||
// use caller's membar flag.
|
||||
// use caller's membar.
|
||||
if ((i + entry_count) < pde_count)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
else if (push_membar_flag != UVM_PUSH_FLAG_COUNT)
|
||||
uvm_push_set_flag(push, push_membar_flag);
|
||||
uvm_push_set_membar(push, UVM_MEMBAR_NONE);
|
||||
else
|
||||
uvm_push_set_membar(push, push_membar);
|
||||
|
||||
uvm_push_inline_data_begin(push, &inline_data);
|
||||
for (j = 0; j < entry_count; j++) {
|
||||
@@ -458,6 +454,16 @@ static void pde_fill(uvm_page_tree_t *tree,
|
||||
pde_fill_cpu(tree, directory, start_index, pde_count, phys_addr);
|
||||
}
|
||||
|
||||
static void phys_mem_init_memset(uvm_gpu_t *gpu, uvm_push_t *push, uvm_page_directory_t *dir, NvU64 value)
|
||||
{
|
||||
NvU64 size = dir->phys_alloc.size;
|
||||
|
||||
if (push)
|
||||
gpu->parent->ce_hal->memset_8(push, uvm_mmu_gpu_address(push->gpu, dir->phys_alloc.addr), value, size);
|
||||
else
|
||||
uvm_mmu_page_table_cpu_memset_8(gpu, &dir->phys_alloc, 0, value, size / sizeof(value));
|
||||
}
|
||||
|
||||
static void phys_mem_init(uvm_page_tree_t *tree, NvU64 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
|
||||
{
|
||||
NvU64 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
|
||||
@@ -490,24 +496,38 @@ static void phys_mem_init(uvm_page_tree_t *tree, NvU64 page_size, uvm_page_direc
|
||||
}
|
||||
|
||||
// Initialize the memory to a reasonable value.
|
||||
if (push) {
|
||||
tree->gpu->parent->ce_hal->memset_8(push,
|
||||
uvm_mmu_gpu_address(tree->gpu, dir->phys_alloc.addr),
|
||||
*clear_bits,
|
||||
dir->phys_alloc.size);
|
||||
}
|
||||
else {
|
||||
uvm_mmu_page_table_cpu_memset_8(tree->gpu,
|
||||
&dir->phys_alloc,
|
||||
0,
|
||||
*clear_bits,
|
||||
dir->phys_alloc.size / sizeof(*clear_bits));
|
||||
}
|
||||
phys_mem_init_memset(tree->gpu, push, dir, *clear_bits);
|
||||
}
|
||||
else {
|
||||
// Initialize the entire directory allocated page table area due to Bug
|
||||
// 5282495. See comment in ats.gmmu_pt_depth0_init_required declaration.
|
||||
if (dir->depth == 0 && tree->gpu->parent->ats.gmmu_pt_depth0_init_required) {
|
||||
uvm_membar_t push_membar;
|
||||
|
||||
// Retrieve and store the caller's membar, since
|
||||
// phys_mem_init_memset() will consume it.
|
||||
if (push) {
|
||||
push_membar = uvm_push_get_and_reset_membar_flag(push);
|
||||
|
||||
// No membar is required, pde_fill() will push the caller's
|
||||
// membar.
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
}
|
||||
|
||||
// phys_mem_init_memset() consumes and resets the CE's push pipeline
|
||||
// flag, which is required to avoid WaW issues since pde_fill()
|
||||
// will write to the same range and its first operation is not
|
||||
// pipelined.
|
||||
phys_mem_init_memset(tree->gpu, push, dir, 0);
|
||||
|
||||
if (push) {
|
||||
// Restore the caller's membar for pde_fill().
|
||||
uvm_push_set_membar(push, push_membar);
|
||||
}
|
||||
}
|
||||
|
||||
pde_fill(tree, dir, 0, entries_count, phys_allocs, push);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
|
||||
@@ -1671,7 +1691,7 @@ static NV_STATUS poison_ptes(uvm_page_tree_t *tree,
|
||||
|
||||
tree->gpu->parent->ce_hal->memset_8(&push,
|
||||
uvm_mmu_gpu_address(tree->gpu, pte_dir->phys_alloc.addr),
|
||||
tree->hal->poisoned_pte(),
|
||||
tree->hal->poisoned_pte(tree),
|
||||
pte_dir->phys_alloc.size);
|
||||
|
||||
// If both the new PTEs and the parent PDE are in vidmem, then a GPU-
|
||||
@@ -2388,23 +2408,21 @@ NV_STATUS uvm_mmu_create_peer_identity_mappings(uvm_gpu_t *gpu, uvm_gpu_t *peer)
|
||||
uvm_aperture_t aperture;
|
||||
NvU64 phys_offset;
|
||||
uvm_gpu_identity_mapping_t *peer_mapping;
|
||||
uvm_gpu_phys_address_t phys_address;
|
||||
|
||||
UVM_ASSERT(gpu->parent->peer_copy_mode < UVM_GPU_PEER_COPY_MODE_COUNT);
|
||||
|
||||
if (gpu->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_VIRTUAL || peer->mem_info.size == 0)
|
||||
return NV_OK;
|
||||
|
||||
aperture = uvm_gpu_peer_aperture(gpu, peer);
|
||||
// Use transformation of address 0 to get offset and aperture for all
|
||||
// other addresses.
|
||||
phys_address = uvm_gpu_peer_phys_address(peer, 0, gpu);
|
||||
aperture = phys_address.aperture;
|
||||
phys_offset = phys_address.address;
|
||||
page_size = mmu_biggest_page_size(&gpu->address_space_tree, aperture);
|
||||
size = UVM_ALIGN_UP(peer->mem_info.max_allocatable_address + 1, page_size);
|
||||
peer_mapping = uvm_gpu_get_peer_mapping(gpu, peer->id);
|
||||
phys_offset = 0ULL;
|
||||
|
||||
if (uvm_parent_gpus_are_nvswitch_connected(gpu->parent, peer->parent)) {
|
||||
// Add the 47-bit physical address routing bits for this peer to the
|
||||
// generated PTEs
|
||||
phys_offset = peer->parent->nvswitch_info.fabric_memory_window_start;
|
||||
}
|
||||
|
||||
UVM_ASSERT(page_size);
|
||||
UVM_ASSERT(size);
|
||||
@@ -2983,16 +3001,12 @@ NV_STATUS uvm_mmu_l2_invalidate(uvm_gpu_t *gpu, uvm_aperture_t aperture)
|
||||
UVM_CHANNEL_TYPE_MEMOPS,
|
||||
&push,
|
||||
"L2 cache invalidate");
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("L2 cache invalidation: Failed to begin push, status: %s\n", nvstatusToString(status));
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
gpu->parent->host_hal->l2_invalidate(&push, aperture);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
if (status != NV_OK)
|
||||
UVM_ERR_PRINT("ERROR: L2 cache invalidation: Failed to complete push, status: %s\n", nvstatusToString(status));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -217,7 +217,7 @@ struct uvm_mmu_mode_hal_struct
|
||||
// Bit pattern used for debug purposes to clobber PTEs which ought to be
|
||||
// unused. In practice this will generate a PRIV violation or a physical
|
||||
// memory out-of-range error so we can immediately identify bad PTE usage.
|
||||
NvU64 (*poisoned_pte)(void);
|
||||
NvU64 (*poisoned_pte)(uvm_page_tree_t *tree);
|
||||
|
||||
// Write a PDE bit-pattern to entry based on the data in allocs (which may
|
||||
// point to two items for dual PDEs).
|
||||
@@ -228,7 +228,7 @@ struct uvm_mmu_mode_hal_struct
|
||||
// invalid/clean PDE entries.
|
||||
void (*make_pde)(void *entry, uvm_mmu_page_table_alloc_t **allocs, uvm_page_directory_t *dir, NvU32 child_index);
|
||||
|
||||
// size of an entry in a directory/table. Generally either 8 or 16 bytes.
|
||||
// size of an entry in a directory/table. Generally either 8 or 16 bytes.
|
||||
// (in the case of Pascal dual PDEs)
|
||||
NvLength (*entry_size)(NvU32 depth);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -44,6 +44,10 @@
|
||||
#include "clc36f.h"
|
||||
#include "clc3b5.h"
|
||||
|
||||
// TURING_*
|
||||
#include "clc46f.h"
|
||||
#include "clc5b5.h"
|
||||
|
||||
// AMPERE_*
|
||||
#include "clc56f.h"
|
||||
#include "clc6b5.h"
|
||||
@@ -1783,11 +1787,16 @@ static NV_STATUS entry_test_page_size_volta(uvm_gpu_t *gpu, size_t page_size)
|
||||
return entry_test_page_size_pascal(gpu, page_size);
|
||||
}
|
||||
|
||||
static NV_STATUS entry_test_page_size_ampere(uvm_gpu_t *gpu, size_t page_size)
|
||||
static NV_STATUS entry_test_page_size_turing(uvm_gpu_t *gpu, size_t page_size)
|
||||
{
|
||||
return entry_test_page_size_volta(gpu, page_size);
|
||||
}
|
||||
|
||||
static NV_STATUS entry_test_page_size_ampere(uvm_gpu_t *gpu, size_t page_size)
|
||||
{
|
||||
return entry_test_page_size_turing(gpu, page_size);
|
||||
}
|
||||
|
||||
static NV_STATUS entry_test_page_size_hopper(uvm_gpu_t *gpu, size_t page_size)
|
||||
{
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
@@ -1810,11 +1819,13 @@ typedef NV_STATUS (*entry_test_page_size_func)(uvm_gpu_t *gpu, size_t page_size)
|
||||
|
||||
static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
static const NvU64 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
|
||||
NvU64 pde_bits;
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2];
|
||||
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999000LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
|
||||
uvm_page_tree_t tree;
|
||||
uvm_mmu_mode_hal_t *hal;
|
||||
uvm_page_directory_t dir;
|
||||
NvU64 big_page_size, page_size;
|
||||
@@ -1895,19 +1906,30 @@ static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
|
||||
0x1BBBBBB000LL,
|
||||
UVM_PROT_READ_ONLY,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x80000002FBBBBBB5LL);
|
||||
|
||||
TEST_NV_CHECK_RET(test_page_tree_init(gpu, big_page_size, &tree));
|
||||
TEST_CHECK_GOTO(tree.hal->poisoned_pte(&tree) == 0x800000011bad0007ull, cleanup_tree);
|
||||
uvm_page_tree_deinit(&tree);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
||||
cleanup_tree:
|
||||
uvm_page_tree_deinit(&tree);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU64 pde_bits[2];
|
||||
size_t i, num_page_sizes;
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999000LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
|
||||
uvm_page_tree_t tree;
|
||||
uvm_page_directory_t dir;
|
||||
|
||||
// big versions have [11:8] set as well to test the page table merging
|
||||
@@ -1993,7 +2015,16 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
for (i = 0; i < num_page_sizes; i++)
|
||||
TEST_NV_CHECK_RET(entry_test_page_size(gpu, page_sizes[i]));
|
||||
|
||||
TEST_NV_CHECK_RET(test_page_tree_init(gpu, UVM_PAGE_SIZE_64K, &tree));
|
||||
TEST_CHECK_GOTO(tree.hal->poisoned_pte(&tree) == 0x1bad000e9ull, cleanup_tree);
|
||||
uvm_page_tree_deinit(&tree);
|
||||
|
||||
return NV_OK;
|
||||
|
||||
cleanup_tree:
|
||||
uvm_page_tree_deinit(&tree);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
|
||||
@@ -2070,6 +2101,30 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS entry_test_turing(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_page_tree_t tree;
|
||||
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU32 i, num_page_sizes;
|
||||
|
||||
num_page_sizes = get_page_sizes(gpu, page_sizes);
|
||||
|
||||
for (i = 0; i < num_page_sizes; i++)
|
||||
TEST_NV_CHECK_RET(entry_test_page_size(gpu, page_sizes[i]));
|
||||
|
||||
TEST_NV_CHECK_RET(test_page_tree_init(gpu, UVM_PAGE_SIZE_64K, &tree));
|
||||
TEST_CHECK_GOTO(tree.hal->poisoned_pte(&tree) == 0x6000001bad000e9ull, cleanup_tree);
|
||||
uvm_page_tree_deinit(&tree);
|
||||
|
||||
return NV_OK;
|
||||
|
||||
cleanup_tree:
|
||||
uvm_page_tree_deinit(&tree);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
|
||||
{
|
||||
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
@@ -2093,6 +2148,7 @@ static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999000LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0xBBBBBBB000LL);
|
||||
uvm_page_tree_t tree;
|
||||
|
||||
// Big versions have [11:8] set as well to test the page table merging
|
||||
uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999900LL);
|
||||
@@ -2101,6 +2157,7 @@ static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
|
||||
memset(dirs, 0, sizeof(dirs));
|
||||
|
||||
// Fake directory tree.
|
||||
for (i = 0; i < ARRAY_SIZE(dirs); i++) {
|
||||
dirs[i] = uvm_kvmalloc_zero(sizeof(uvm_page_directory_t) + sizeof(dirs[i]->entries[0]) * 512);
|
||||
@@ -2272,10 +2329,19 @@ static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
// sked reflected
|
||||
TEST_CHECK_GOTO(hal->make_sked_reflected_pte() == 0xF0F, cleanup);
|
||||
|
||||
// poisoned - use a fake tree as it is required by poisoned_pte's MMU HAL.
|
||||
// The tests above manually set the MMU HAL but used functions that don't
|
||||
// have a uvm_page_tree_t argument.
|
||||
TEST_NV_CHECK_GOTO(test_page_tree_init(gpu, UVM_PAGE_SIZE_64K, &tree), cleanup);
|
||||
TEST_CHECK_GOTO(tree.hal->poisoned_pte(&tree) == 0x2bad0006f9ull, cleanup_tree);
|
||||
|
||||
num_page_sizes = get_page_sizes(gpu, page_sizes);
|
||||
|
||||
for (i = 0; i < num_page_sizes; i++)
|
||||
TEST_NV_CHECK_GOTO(entry_test_page_size(gpu, page_sizes[i]), cleanup);
|
||||
TEST_NV_CHECK_GOTO(entry_test_page_size(gpu, page_sizes[i]), cleanup_tree);
|
||||
|
||||
cleanup_tree:
|
||||
uvm_page_tree_deinit(&tree);
|
||||
|
||||
cleanup:
|
||||
for (i = 0; i < ARRAY_SIZE(dirs); i++)
|
||||
@@ -2286,6 +2352,9 @@ cleanup:
|
||||
|
||||
static NV_STATUS entry_test_blackwell(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
|
||||
{
|
||||
// We use entry_test_ampere() because we only want to check for an
|
||||
// additional page size, no MMU page table format changes between Hopper and
|
||||
// Blackwell.
|
||||
return entry_test_ampere(gpu, entry_test_page_size_blackwell);
|
||||
}
|
||||
|
||||
@@ -2523,6 +2592,15 @@ static NV_STATUS fake_gpu_init_volta(uvm_gpu_t *fake_gpu)
|
||||
fake_gpu);
|
||||
}
|
||||
|
||||
static NV_STATUS fake_gpu_init_turing(uvm_gpu_t *fake_gpu)
|
||||
{
|
||||
return fake_gpu_init(TURING_CHANNEL_GPFIFO_A,
|
||||
TURING_DMA_COPY_A,
|
||||
NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
|
||||
0,
|
||||
fake_gpu);
|
||||
}
|
||||
|
||||
static NV_STATUS fake_gpu_init_ampere(uvm_gpu_t *fake_gpu)
|
||||
{
|
||||
return fake_gpu_init(AMPERE_CHANNEL_GPFIFO_A,
|
||||
@@ -2641,6 +2719,15 @@ static NV_STATUS volta_test_page_tree(uvm_gpu_t *volta)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS turing_test_page_tree(uvm_gpu_t *turing)
|
||||
{
|
||||
TEST_CHECK_RET(fake_gpu_init_turing(turing) == NV_OK);
|
||||
|
||||
MEM_NV_CHECK_RET(entry_test_turing(turing, entry_test_page_size_turing), NV_OK);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS ampere_test_page_tree(uvm_gpu_t *ampere)
|
||||
{
|
||||
NvU32 i, tlb_batch_saved_max_pages;
|
||||
@@ -2803,6 +2890,7 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
|
||||
TEST_NV_CHECK_GOTO(maxwell_test_page_tree(gpu), done);
|
||||
TEST_NV_CHECK_GOTO(pascal_test_page_tree(gpu), done);
|
||||
TEST_NV_CHECK_GOTO(volta_test_page_tree(gpu), done);
|
||||
TEST_NV_CHECK_GOTO(turing_test_page_tree(gpu), done);
|
||||
TEST_NV_CHECK_GOTO(ampere_test_page_tree(gpu), done);
|
||||
TEST_NV_CHECK_GOTO(hopper_test_page_tree(gpu), done);
|
||||
TEST_NV_CHECK_GOTO(blackwell_test_page_tree(gpu), done);
|
||||
|
||||
@@ -90,6 +90,10 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = false;
|
||||
|
||||
parent_gpu->access_counters_serialize_clear_ops_by_type = false;
|
||||
|
||||
parent_gpu->access_bits_supported = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = false;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = false;
|
||||
@@ -104,5 +108,7 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->ats.no_ats_range_required = false;
|
||||
|
||||
parent_gpu->ats.gmmu_pt_depth0_init_required = false;
|
||||
|
||||
parent_gpu->conf_computing.per_channel_key_rotation = false;
|
||||
}
|
||||
|
||||
@@ -297,7 +297,6 @@ static NvU64 make_pte_pascal(uvm_aperture_t aperture, NvU64 address, uvm_prot_t
|
||||
// vid address 32:8
|
||||
pte_bits |= HWVALUE64(_MMU_VER2, PTE, ADDRESS_VID, address);
|
||||
|
||||
|
||||
// peer id 35:33
|
||||
if (aperture != UVM_APERTURE_VID)
|
||||
pte_bits |= HWVALUE64(_MMU_VER2, PTE, ADDRESS_VID_PEER, UVM_APERTURE_PEER_ID(aperture));
|
||||
@@ -306,6 +305,7 @@ static NvU64 make_pte_pascal(uvm_aperture_t aperture, NvU64 address, uvm_prot_t
|
||||
pte_bits |= HWVALUE64(_MMU_VER2, PTE, COMPTAGLINE, 0);
|
||||
}
|
||||
|
||||
// kind 63:56
|
||||
pte_bits |= HWVALUE64(_MMU_VER2, PTE, KIND, NV_MMU_PTE_KIND_PITCH);
|
||||
|
||||
return pte_bits;
|
||||
@@ -327,7 +327,7 @@ static NvU64 make_sparse_pte_pascal(void)
|
||||
HWCONST64(_MMU_VER2, PTE, VOL, TRUE);
|
||||
}
|
||||
|
||||
static NvU64 poisoned_pte_pascal(void)
|
||||
static NvU64 poisoned_pte_pascal(uvm_page_tree_t *tree)
|
||||
{
|
||||
// An invalid PTE won't be fatal from faultable units like SM, which is the
|
||||
// most likely source of bad PTE accesses.
|
||||
@@ -340,7 +340,7 @@ static NvU64 poisoned_pte_pascal(void)
|
||||
// be aligned to page_size.
|
||||
NvU64 phys_addr = 0x1bad000000ULL;
|
||||
|
||||
NvU64 pte_bits = make_pte_pascal(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
|
||||
NvU64 pte_bits = tree->hal->make_pte(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
|
||||
return WRITE_HWCONST64(pte_bits, _MMU_VER2, PTE, PRIVILEGE, TRUE);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -318,6 +318,60 @@ static void compute_prefetch_mask(uvm_va_block_region_t faulted_region,
|
||||
}
|
||||
}
|
||||
|
||||
// Determine whether prefetching should be applied for the given migration.
|
||||
//
|
||||
// This function evaluates multiple conditions to decide if prefetching is
|
||||
// beneficial:
|
||||
//
|
||||
// 1. No preferred location policy: Always allow prefetching when no preferred
|
||||
// location is set, as there are no policy constraints.
|
||||
//
|
||||
// 2. Moving to preferred location: Always allow prefetching when migrating
|
||||
// toward the preferred location, as this aligns with the policy.
|
||||
//
|
||||
// 3. Confidential Computing exceptions: In CC environments, remote memory
|
||||
// mapping is not always available, forcing memory migration. Allow
|
||||
// prefetching out of the preferred location to facilitate these flows
|
||||
// and improve performance:
|
||||
// - DtoH transfers (migrating to CPU/sysmem)
|
||||
// - HtoD transfers when pages are already resident on CPU
|
||||
//
|
||||
// Returns true if prefetching logic should be applied, false otherwise.
|
||||
static bool should_apply_prefetch_logic(const uvm_va_policy_t *policy,
|
||||
uvm_processor_id_t new_residency,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_page_mask_t *faulted_pages)
|
||||
{
|
||||
// No preferred location set - always allow prefetching
|
||||
if (!UVM_ID_IS_VALID(policy->preferred_location))
|
||||
return true;
|
||||
|
||||
// Moving to preferred location - always allow prefetching
|
||||
if (uvm_id_equal(policy->preferred_location, new_residency))
|
||||
return true;
|
||||
|
||||
// CC sysmem exception logic - allow prefetching out of preferred location
|
||||
// for CC-related sysmem transfers when remote mapping is not available
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return false;
|
||||
|
||||
// DtoH: migrating to CPU/sysmem
|
||||
if (UVM_ID_IS_CPU(new_residency))
|
||||
return true;
|
||||
|
||||
// HtoD: check if any faulted pages are currently resident on CPU
|
||||
if (UVM_ID_IS_GPU(new_residency)) {
|
||||
const uvm_page_mask_t *cpu_resident_mask = uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU, NUMA_NO_NODE);
|
||||
if (cpu_resident_mask && uvm_page_mask_intersects(faulted_pages, cpu_resident_mask))
|
||||
return true;
|
||||
}
|
||||
|
||||
// PPCIE, MPT CC (P2P access) can do remote mappings - no prefetching
|
||||
// needed
|
||||
return false;
|
||||
}
|
||||
|
||||
// Within a block we only allow prefetching to a single processor. Therefore,
|
||||
// if two processors are accessing non-overlapping regions within the same
|
||||
// block they won't benefit from prefetching.
|
||||
@@ -358,13 +412,16 @@ static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_blo
|
||||
if (UVM_ID_IS_CPU(new_residency) || va_block->gpus[uvm_id_gpu_index(new_residency)] != NULL)
|
||||
resident_mask = uvm_va_block_resident_mask_get(va_block, new_residency, NUMA_NO_NODE);
|
||||
|
||||
// If this is a first-touch fault and the destination processor is the
|
||||
// preferred location, populate the whole max_prefetch_region.
|
||||
// - If this is a first-touch fault and the destination processor is the
|
||||
// preferred location, populate the whole max_prefetch_region.
|
||||
// - Do not prefetch pages out of the preferred location (policy location
|
||||
// is valid and does not match the new residency), unless confidential
|
||||
// computing is enabled.
|
||||
if (uvm_processor_mask_empty(&va_block->resident) &&
|
||||
uvm_id_equal(new_residency, policy->preferred_location)) {
|
||||
uvm_page_mask_region_fill(prefetch_pages, max_prefetch_region);
|
||||
}
|
||||
else {
|
||||
else if (should_apply_prefetch_logic(policy, new_residency, va_block, va_block_context, faulted_pages)) {
|
||||
init_bitmap_tree_from_region(bitmap_tree, max_prefetch_region, resident_mask, faulted_pages);
|
||||
|
||||
update_bitmap_tree_from_va_block(bitmap_tree,
|
||||
|
||||
@@ -197,6 +197,8 @@ typedef struct
|
||||
unsigned max_resets;
|
||||
|
||||
NvU64 pin_ns;
|
||||
|
||||
NvS8 lapse_stat;
|
||||
} params;
|
||||
|
||||
uvm_va_space_t *va_space;
|
||||
@@ -262,13 +264,22 @@ static unsigned uvm_perf_thrashing_pin_threshold = UVM_PERF_THRASHING_PIN_THRESH
|
||||
|
||||
// TODO: Bug 1768615: [uvm] Automatically tune default values for thrashing
|
||||
// detection/prevention parameters
|
||||
#define UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT 500
|
||||
#define UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT 500ULL
|
||||
#define UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT_EMULATION (UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT * 800)
|
||||
|
||||
#define UVM_PERF_THRASHING_LAPSE_USEC_MAX (UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT * 1000)
|
||||
#define UVM_PERF_THRASHING_LAPSE_USEC_MIN (UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT / 100)
|
||||
|
||||
// Lapse of time in microseconds that determines if two consecutive events on
|
||||
// the same page can be considered thrashing
|
||||
static unsigned uvm_perf_thrashing_lapse_usec = UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT;
|
||||
|
||||
#define UVM_PERF_LAPSE_VOTE_THRESHOLD 32
|
||||
|
||||
// Number of lapse intervals greater than uvm_perf_thrashing_lapse_usec,
|
||||
// big enough to consider readjusting.
|
||||
static unsigned int uvm_perf_lapse_vote_threshold = UVM_PERF_LAPSE_VOTE_THRESHOLD;
|
||||
|
||||
#define UVM_PERF_THRASHING_NAP_DEFAULT 1
|
||||
#define UVM_PERF_THRASHING_NAP_MAX 100
|
||||
|
||||
@@ -309,6 +320,7 @@ module_param(uvm_perf_thrashing_enable, uint, S_IRUGO);
|
||||
module_param(uvm_perf_thrashing_threshold, uint, S_IRUGO);
|
||||
module_param(uvm_perf_thrashing_pin_threshold, uint, S_IRUGO);
|
||||
module_param(uvm_perf_thrashing_lapse_usec, uint, S_IRUGO);
|
||||
module_param(uvm_perf_lapse_vote_threshold, uint, S_IRUGO);
|
||||
module_param(uvm_perf_thrashing_nap, uint, S_IRUGO);
|
||||
module_param(uvm_perf_thrashing_epoch, uint, S_IRUGO);
|
||||
module_param(uvm_perf_thrashing_pin, uint, S_IRUGO);
|
||||
@@ -324,6 +336,7 @@ static bool g_uvm_perf_thrashing_enable;
|
||||
static unsigned g_uvm_perf_thrashing_threshold;
|
||||
static unsigned g_uvm_perf_thrashing_pin_threshold;
|
||||
static NvU64 g_uvm_perf_thrashing_lapse_usec;
|
||||
static unsigned g_uvm_perf_lapse_vote_threshold;
|
||||
static NvU64 g_uvm_perf_thrashing_nap;
|
||||
static NvU64 g_uvm_perf_thrashing_epoch;
|
||||
static NvU64 g_uvm_perf_thrashing_pin;
|
||||
@@ -1607,6 +1620,29 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
|
||||
return hint;
|
||||
}
|
||||
|
||||
static void adjust_thrashing_lapse(va_space_thrashing_info_t *ti, NvU64 lapse)
|
||||
{
|
||||
// If lapse is non-default, i.e. provided by user explicitly, don't adjust it
|
||||
if (g_uvm_perf_thrashing_lapse_usec != UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT)
|
||||
return;
|
||||
|
||||
// Update statistics without if-else conditionals.
|
||||
ti->params.lapse_stat += 2 * !(lapse < ti->params.lapse_ns) - 1;
|
||||
|
||||
// Voting capped exponential adjustment
|
||||
if (ti->params.lapse_stat >= g_uvm_perf_lapse_vote_threshold &&
|
||||
ti->params.lapse_ns < UVM_PERF_THRASHING_LAPSE_USEC_MAX * 1000)
|
||||
ti->params.lapse_ns += min(ti->params.lapse_ns / 8, UVM_PERF_THRASHING_LAPSE_USEC_MAX / 10 * 1000);
|
||||
else
|
||||
if (-ti->params.lapse_stat <= -(int)g_uvm_perf_lapse_vote_threshold &&
|
||||
ti->params.lapse_ns > UVM_PERF_THRASHING_LAPSE_USEC_MIN * 1000)
|
||||
ti->params.lapse_ns -= max(ti->params.lapse_ns / 8, UVM_PERF_THRASHING_LAPSE_USEC_MIN * 1000);
|
||||
else
|
||||
return;
|
||||
|
||||
ti->params.lapse_stat /= 2;
|
||||
}
|
||||
|
||||
// Function called on fault that tells the fault handler if any operation
|
||||
// should be performed to minimize thrashing. The logic is as follows:
|
||||
//
|
||||
@@ -1710,6 +1746,8 @@ uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
|
||||
|
||||
last_time_stamp = page_thrashing_get_time_stamp(page_thrashing);
|
||||
|
||||
adjust_thrashing_lapse(va_space_thrashing, time_stamp - last_time_stamp);
|
||||
|
||||
// If the lapse since the last thrashing event is longer than a thrashing
|
||||
// lapse we are no longer thrashing
|
||||
if (time_stamp - last_time_stamp > va_space_thrashing->params.lapse_ns &&
|
||||
@@ -2012,6 +2050,8 @@ NV_STATUS uvm_perf_thrashing_init(void)
|
||||
|
||||
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_lapse_usec, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT);
|
||||
|
||||
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_lapse_vote_threshold, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT);
|
||||
|
||||
INIT_THRASHING_PARAMETER_NONZERO_MAX(uvm_perf_thrashing_nap,
|
||||
UVM_PERF_THRASHING_NAP_DEFAULT,
|
||||
UVM_PERF_THRASHING_NAP_MAX);
|
||||
|
||||
@@ -98,14 +98,16 @@
|
||||
//
|
||||
// When a memory allocation from PMA fails and eviction is requested, PMM will
|
||||
// check whether it can evict any user memory chunks to satisfy the request.
|
||||
// All allocated user memory root chunks are tracked in an LRU list
|
||||
// (root_chunks.va_block_used). A root chunk is moved to the tail of that list
|
||||
// whenever any of its subchunks is allocated (unpinned) by a VA block (see
|
||||
// uvm_pmm_gpu_unpin_allocated()). When a root chunk is selected for eviction,
|
||||
// it has the eviction flag set (see pick_root_chunk_to_evict()). This flag
|
||||
// affects many of the PMM operations on all of the subchunks of the root chunk
|
||||
// being evicted. See usage of (root_)chunk_is_in_eviction(), in particular in
|
||||
// chunk_free_locked() and claim_free_chunk().
|
||||
// All allocated user memory root chunks are tracked in one of several LRU lists
|
||||
// (root_chunks.alloc_list[n]). The list used depends on the state of the chunk
|
||||
// (see uvm_pmm_alloc_list_t). A root chunk is moved to the tail of the used
|
||||
// list (UVM_PMM_ALLOC_LIST_USED) whenever any of its subchunks is allocated
|
||||
// (unpinned) by a VA block (see uvm_pmm_gpu_unpin_allocated()). When a root
|
||||
// chunk is selected for eviction, it has the eviction flag set
|
||||
// (see pick_root_chunk_to_evict()). This flag affects many of the PMM
|
||||
// operations on all of the subchunks of the root chunk being evicted. See usage
|
||||
// of (root_)chunk_is_in_eviction(), in particular in chunk_free_locked() and
|
||||
// claim_free_chunk().
|
||||
//
|
||||
// To evict a root chunk, all of its free subchunks are pinned, then all
|
||||
// resident pages backed by it are moved to the CPU one VA block at a time.
|
||||
@@ -645,7 +647,7 @@ static void chunk_update_lists_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk
|
||||
else if (root_chunk->chunk.state != UVM_PMM_GPU_CHUNK_STATE_FREE) {
|
||||
UVM_ASSERT(root_chunk->chunk.state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT ||
|
||||
root_chunk->chunk.state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
list_move_tail(&root_chunk->chunk.list, &pmm->root_chunks.va_block_used);
|
||||
list_move_tail(&root_chunk->chunk.list, &pmm->root_chunks.alloc_list[UVM_PMM_ALLOC_LIST_USED]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -663,7 +665,6 @@ void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm
|
||||
UVM_ASSERT(list_empty(&chunk->list));
|
||||
UVM_ASSERT(va_block);
|
||||
UVM_ASSERT(chunk->va_block == va_block);
|
||||
UVM_ASSERT(chunk->va_block_page_index < uvm_va_block_num_cpu_pages(va_block));
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
|
||||
@@ -757,12 +758,8 @@ static bool assert_chunk_mergeable(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
UVM_ASSERT(child->state == first_child->state);
|
||||
|
||||
if ((first_child->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) && uvm_gpu_chunk_is_user(first_child)) {
|
||||
uvm_gpu_chunk_t *prev_child = chunk->suballoc->subchunks[i-1];
|
||||
|
||||
UVM_ASSERT(child->va_block == child_va_block);
|
||||
UVM_ASSERT(child->va_block_page_index ==
|
||||
prev_child->va_block_page_index + uvm_gpu_chunk_get_size(prev_child) / PAGE_SIZE);
|
||||
UVM_ASSERT(child->is_referenced == prev_child->is_referenced);
|
||||
UVM_ASSERT(child->is_referenced == first_child->is_referenced);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -798,7 +795,6 @@ static void merge_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
UVM_ASSERT(subchunk->va_block);
|
||||
|
||||
chunk->va_block = subchunk->va_block;
|
||||
chunk->va_block_page_index = subchunk->va_block_page_index;
|
||||
chunk->is_referenced = subchunk->is_referenced;
|
||||
}
|
||||
else if (child_state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
|
||||
@@ -1198,7 +1194,6 @@ void uvm_pmm_gpu_mark_chunk_evicted(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
UVM_ASSERT(chunk->va_block);
|
||||
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk_pin(pmm, chunk);
|
||||
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
@@ -1412,7 +1407,7 @@ static void chunk_start_eviction(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
uvm_gpu_chunk_set_in_eviction(chunk, true);
|
||||
}
|
||||
|
||||
static void root_chunk_update_eviction_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, struct list_head *list)
|
||||
static void root_chunk_update_eviction_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_pmm_alloc_list_t alloc_list)
|
||||
{
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
|
||||
@@ -1426,7 +1421,7 @@ static void root_chunk_update_eviction_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t
|
||||
// eviction lists.
|
||||
UVM_ASSERT(!list_empty(&chunk->list));
|
||||
|
||||
list_move_tail(&chunk->list, list);
|
||||
list_move_tail(&chunk->list, &pmm->root_chunks.alloc_list[alloc_list]);
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
@@ -1434,17 +1429,49 @@ static void root_chunk_update_eviction_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t
|
||||
|
||||
void uvm_pmm_gpu_mark_root_chunk_used(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
root_chunk_update_eviction_list(pmm, chunk, &pmm->root_chunks.va_block_used);
|
||||
root_chunk_update_eviction_list(pmm, chunk, UVM_PMM_ALLOC_LIST_USED);
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_mark_root_chunk_unused(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
root_chunk_update_eviction_list(pmm, chunk, &pmm->root_chunks.va_block_unused);
|
||||
root_chunk_update_eviction_list(pmm, chunk, UVM_PMM_ALLOC_LIST_UNUSED);
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_mark_root_chunk_discarded(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
root_chunk_update_eviction_list(pmm, chunk, &pmm->root_chunks.va_block_discarded);
|
||||
root_chunk_update_eviction_list(pmm, chunk, UVM_PMM_ALLOC_LIST_DISCARDED);
|
||||
}
|
||||
|
||||
static uvm_pmm_alloc_list_t get_alloc_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
uvm_pmm_alloc_list_t alloc_list;
|
||||
|
||||
uvm_assert_spinlock_locked(&pmm->list_lock);
|
||||
|
||||
for (alloc_list = 0; alloc_list < UVM_PMM_ALLOC_LIST_COUNT; alloc_list++) {
|
||||
uvm_gpu_chunk_t *entry;
|
||||
list_for_each_entry(entry, &pmm->root_chunks.alloc_list[alloc_list], list) {
|
||||
if (entry == chunk)
|
||||
return alloc_list;
|
||||
}
|
||||
}
|
||||
|
||||
return UVM_PMM_ALLOC_LIST_COUNT;
|
||||
}
|
||||
|
||||
static uvm_gpu_chunk_t *get_first_allocated_chunk(uvm_pmm_gpu_t *pmm)
|
||||
{
|
||||
uvm_pmm_alloc_list_t alloc_list;
|
||||
|
||||
uvm_assert_spinlock_locked(&pmm->list_lock);
|
||||
|
||||
for (alloc_list = 0; alloc_list < UVM_PMM_ALLOC_LIST_COUNT; alloc_list++) {
|
||||
uvm_gpu_chunk_t *chunk = list_first_chunk(&pmm->root_chunks.alloc_list[alloc_list]);
|
||||
if (chunk)
|
||||
return chunk;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static uvm_gpu_root_chunk_t *pick_root_chunk_to_evict(uvm_pmm_gpu_t *pmm)
|
||||
@@ -1471,19 +1498,10 @@ static uvm_gpu_root_chunk_t *pick_root_chunk_to_evict(uvm_pmm_gpu_t *pmm)
|
||||
UVM_ASSERT(chunk->is_zero);
|
||||
}
|
||||
|
||||
if (!chunk)
|
||||
chunk = list_first_chunk(&pmm->root_chunks.va_block_unused);
|
||||
|
||||
if (!chunk) {
|
||||
// Discarded pages are chosen to be evicted after unused pages,
|
||||
// as we expect some of them to get reverted to used pages.
|
||||
chunk = list_first_chunk(&pmm->root_chunks.va_block_discarded);
|
||||
}
|
||||
|
||||
// TODO: Bug 1765193: Move the chunks to the tail of the used list whenever
|
||||
// they get mapped.
|
||||
if (!chunk)
|
||||
chunk = list_first_chunk(&pmm->root_chunks.va_block_used);
|
||||
chunk = get_first_allocated_chunk(pmm);
|
||||
|
||||
if (chunk)
|
||||
chunk_start_eviction(pmm, chunk);
|
||||
@@ -1492,6 +1510,7 @@ static uvm_gpu_root_chunk_t *pick_root_chunk_to_evict(uvm_pmm_gpu_t *pmm)
|
||||
|
||||
if (chunk)
|
||||
return root_chunk_from_chunk(pmm, chunk);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1867,7 +1886,6 @@ static void init_root_chunk(uvm_pmm_gpu_t *pmm,
|
||||
UVM_ASSERT(!chunk->parent);
|
||||
UVM_ASSERT(!chunk->suballoc);
|
||||
UVM_ASSERT(!chunk->va_block);
|
||||
UVM_ASSERT(chunk->va_block_page_index == PAGES_PER_UVM_VA_BLOCK);
|
||||
UVM_ASSERT(list_empty(&chunk->list));
|
||||
UVM_ASSERT(uvm_gpu_chunk_get_size(chunk) == UVM_CHUNK_SIZE_MAX);
|
||||
UVM_ASSERT(!root_chunk_has_elevated_page(pmm, root_chunk));
|
||||
@@ -2109,7 +2127,6 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
subchunk->type = chunk->type;
|
||||
uvm_gpu_chunk_set_size(subchunk, subchunk_size);
|
||||
subchunk->parent = chunk;
|
||||
subchunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
subchunk->is_zero = chunk->is_zero;
|
||||
INIT_LIST_HEAD(&subchunk->list);
|
||||
|
||||
@@ -2121,7 +2138,6 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
uvm_assert_mutex_locked(&chunk->va_block->lock);
|
||||
|
||||
subchunk->va_block = chunk->va_block;
|
||||
subchunk->va_block_page_index = chunk->va_block_page_index + (i * subchunk_size) / PAGE_SIZE;
|
||||
subchunk->is_referenced = chunk->is_referenced;
|
||||
}
|
||||
else if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
|
||||
@@ -2140,7 +2156,6 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_referenced = false;
|
||||
}
|
||||
else if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
|
||||
@@ -2154,7 +2169,6 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
root_chunk->chunk.suballoc->pinned_leaf_chunks += 1;
|
||||
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_referenced = false;
|
||||
}
|
||||
|
||||
@@ -2243,7 +2257,6 @@ static void chunk_free_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
UVM_ASSERT(chunk->va_block);
|
||||
UVM_ASSERT(list_empty(&chunk->list));
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_zero = false;
|
||||
chunk_pin(pmm, chunk);
|
||||
}
|
||||
@@ -2261,7 +2274,6 @@ static void chunk_free_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
}
|
||||
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_zero = false;
|
||||
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED)
|
||||
@@ -2913,134 +2925,6 @@ cleanup:
|
||||
return status;
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Start/end of the physical region to be traversed (IN)
|
||||
NvU64 phys_start;
|
||||
NvU64 phys_end;
|
||||
|
||||
// Pointer to the array of mappins where to store results (OUT)
|
||||
uvm_reverse_map_t *mappings;
|
||||
|
||||
// Number of entries written to mappings (OUT)
|
||||
NvU32 num_mappings;
|
||||
} get_chunk_mappings_data_t;
|
||||
|
||||
// Chunk traversal function used for phys-to-virt translation. These are the
|
||||
// possible return values.
|
||||
//
|
||||
// - NV_ERR_OUT_OF_RANGE: no allocated physical chunks were found
|
||||
// - NV_ERR_MORE_DATA_AVAILABLE: allocated physical chunks were found
|
||||
// - NV_OK: allocated physical chunks may have been found. Check num_mappings
|
||||
static NV_STATUS get_chunk_mappings_in_range(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, void *data)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
get_chunk_mappings_data_t *get_chunk_mappings_data = (get_chunk_mappings_data_t *)data;
|
||||
NvU64 chunk_end = chunk->address + uvm_gpu_chunk_get_size(chunk) - 1;
|
||||
|
||||
uvm_assert_mutex_locked(&pmm->lock);
|
||||
|
||||
// Kernel chunks do not have assigned VA blocks so we can just skip them
|
||||
if (chunk->type == UVM_PMM_GPU_MEMORY_TYPE_KERNEL)
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
// This chunk is located before the requested physical range. Skip its
|
||||
// children and keep going
|
||||
if (chunk_end < get_chunk_mappings_data->phys_start)
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
// We are beyond the search phys range. Stop traversing.
|
||||
if (chunk->address > get_chunk_mappings_data->phys_end) {
|
||||
if (get_chunk_mappings_data->num_mappings > 0)
|
||||
return NV_ERR_MORE_DATA_AVAILABLE;
|
||||
else
|
||||
return NV_ERR_OUT_OF_RANGE;
|
||||
}
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
|
||||
// Return results for allocated leaf chunks, only
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
|
||||
uvm_reverse_map_t *reverse_map;
|
||||
|
||||
UVM_ASSERT(chunk->va_block);
|
||||
uvm_va_block_retain(chunk->va_block);
|
||||
|
||||
reverse_map = &get_chunk_mappings_data->mappings[get_chunk_mappings_data->num_mappings];
|
||||
|
||||
reverse_map->va_block = chunk->va_block;
|
||||
reverse_map->region = uvm_va_block_region(chunk->va_block_page_index,
|
||||
chunk->va_block_page_index + uvm_gpu_chunk_get_size(chunk) / PAGE_SIZE);
|
||||
reverse_map->owner = gpu->id;
|
||||
|
||||
// If we land in the middle of a chunk, adjust the offset
|
||||
if (get_chunk_mappings_data->phys_start > chunk->address) {
|
||||
NvU64 offset = get_chunk_mappings_data->phys_start - chunk->address;
|
||||
|
||||
reverse_map->region.first += offset / PAGE_SIZE;
|
||||
}
|
||||
|
||||
// If the physical range doesn't cover the whole chunk, adjust num_pages
|
||||
if (get_chunk_mappings_data->phys_end < chunk_end)
|
||||
reverse_map->region.outer -= (chunk_end - get_chunk_mappings_data->phys_end) / PAGE_SIZE;
|
||||
|
||||
++get_chunk_mappings_data->num_mappings;
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region_size, uvm_reverse_map_t *out_mappings)
|
||||
{
|
||||
NvU64 chunk_base_addr = UVM_ALIGN_DOWN(phys_addr, UVM_CHUNK_SIZE_MAX);
|
||||
NvU64 size_in_chunk = min(UVM_CHUNK_SIZE_MAX - (phys_addr - chunk_base_addr), region_size);
|
||||
NvU32 num_mappings = 0;
|
||||
|
||||
UVM_ASSERT(PAGE_ALIGNED(phys_addr));
|
||||
UVM_ASSERT(PAGE_ALIGNED(region_size));
|
||||
|
||||
uvm_mutex_lock(&pmm->lock);
|
||||
|
||||
// Traverse the whole requested region
|
||||
do {
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_gpu_root_chunk_t *root_chunk = root_chunk_from_address(pmm, phys_addr);
|
||||
uvm_gpu_chunk_t *chunk = &root_chunk->chunk;
|
||||
get_chunk_mappings_data_t get_chunk_mappings_data;
|
||||
|
||||
get_chunk_mappings_data.phys_start = phys_addr;
|
||||
get_chunk_mappings_data.phys_end = phys_addr + size_in_chunk - 1;
|
||||
get_chunk_mappings_data.mappings = out_mappings + num_mappings;
|
||||
get_chunk_mappings_data.num_mappings = 0;
|
||||
|
||||
// Walk the chunks for the current root chunk
|
||||
status = chunk_walk_pre_order(pmm,
|
||||
chunk,
|
||||
get_chunk_mappings_in_range,
|
||||
&get_chunk_mappings_data);
|
||||
if (status == NV_ERR_OUT_OF_RANGE)
|
||||
break;
|
||||
|
||||
if (get_chunk_mappings_data.num_mappings > 0) {
|
||||
UVM_ASSERT(status == NV_OK || status == NV_ERR_MORE_DATA_AVAILABLE);
|
||||
num_mappings += get_chunk_mappings_data.num_mappings;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
region_size -= size_in_chunk;
|
||||
phys_addr += size_in_chunk;
|
||||
size_in_chunk = min((NvU64)UVM_CHUNK_SIZE_MAX, region_size);
|
||||
} while (region_size > 0);
|
||||
|
||||
uvm_mutex_unlock(&pmm->lock);
|
||||
|
||||
return num_mappings;
|
||||
}
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
||||
uvm_gpu_chunk_t *uvm_pmm_devmem_page_to_chunk(struct page *page)
|
||||
@@ -3139,7 +3023,6 @@ static void devmem_page_free(struct page *page)
|
||||
UVM_ASSERT(chunk->is_referenced);
|
||||
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_referenced = false;
|
||||
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
|
||||
@@ -3477,12 +3360,10 @@ void uvm_pmm_gpu_device_p2p_init(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
void uvm_pmm_gpu_device_p2p_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
unsigned long pci_start_pfn = pci_resource_start(parent_gpu->pci_dev,
|
||||
uvm_device_p2p_static_bar(parent_gpu)) >> PAGE_SHIFT;
|
||||
struct page *p2p_page;
|
||||
|
||||
if (parent_gpu->device_p2p_initialised && !uvm_parent_gpu_is_coherent(parent_gpu)) {
|
||||
p2p_page = pfn_to_page(pci_start_pfn);
|
||||
struct page *p2p_page = pfn_to_page(pci_resource_start(parent_gpu->pci_dev,
|
||||
uvm_device_p2p_static_bar(parent_gpu)) >> PAGE_SHIFT);
|
||||
|
||||
devm_memunmap_pages(&parent_gpu->pci_dev->dev, page_pgmap(p2p_page));
|
||||
}
|
||||
|
||||
@@ -3546,6 +3427,7 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
|
||||
{ 0, uvm_mem_kernel_chunk_sizes(gpu)},
|
||||
};
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_pmm_alloc_list_t alloc_list;
|
||||
size_t i, j, k;
|
||||
|
||||
// UVM_CHUNK_SIZE_INVALID is UVM_CHUNK_SIZE_MAX shifted left by 1. This
|
||||
@@ -3560,10 +3442,11 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
|
||||
INIT_LIST_HEAD(&pmm->free_list[i][j][k]);
|
||||
}
|
||||
}
|
||||
INIT_LIST_HEAD(&pmm->root_chunks.va_block_used);
|
||||
INIT_LIST_HEAD(&pmm->root_chunks.va_block_unused);
|
||||
|
||||
for (alloc_list = 0; alloc_list < UVM_PMM_ALLOC_LIST_COUNT; alloc_list++)
|
||||
INIT_LIST_HEAD(&pmm->root_chunks.alloc_list[alloc_list]);
|
||||
|
||||
INIT_LIST_HEAD(&pmm->root_chunks.va_block_lazy_free);
|
||||
INIT_LIST_HEAD(&pmm->root_chunks.va_block_discarded);
|
||||
nv_kthread_q_item_init(&pmm->root_chunks.va_block_lazy_free_q_item, process_lazy_free_entry, pmm);
|
||||
|
||||
uvm_mutex_init(&pmm->lock, UVM_LOCK_ORDER_PMM);
|
||||
@@ -3620,7 +3503,6 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
|
||||
chunk->state = UVM_PMM_GPU_CHUNK_STATE_PMA_OWNED;
|
||||
uvm_gpu_chunk_set_size(chunk, UVM_CHUNK_SIZE_MAX);
|
||||
chunk->address = i * UVM_CHUNK_SIZE_MAX;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
}
|
||||
|
||||
status = uvm_bit_locks_init(&pmm->root_chunks.bitlocks, pmm->root_chunks.count, UVM_LOCK_ORDER_PMM_ROOT_CHUNK);
|
||||
@@ -4073,3 +3955,55 @@ NV_STATUS uvm_test_pmm_query_pma_stats(UVM_TEST_PMM_QUERY_PMA_STATS_PARAMS *para
|
||||
uvm_gpu_release(gpu);
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_pmm_get_alloc_list(UVM_TEST_PMM_GET_ALLOC_LIST_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_gpu_chunk_t *chunk;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
// -Wall implies -Wenum-compare, so cast through int to avoid warnings
|
||||
BUILD_BUG_ON((int)UVM_TEST_PMM_ALLOC_LIST_UNUSED != (int)UVM_PMM_ALLOC_LIST_UNUSED);
|
||||
BUILD_BUG_ON((int)UVM_TEST_PMM_ALLOC_LIST_DISCARDED != (int)UVM_PMM_ALLOC_LIST_DISCARDED);
|
||||
BUILD_BUG_ON((int)UVM_TEST_PMM_ALLOC_LIST_USED != (int)UVM_PMM_ALLOC_LIST_USED);
|
||||
BUILD_BUG_ON((int)UVM_TEST_PMM_ALLOC_LIST_COUNT != (int)UVM_PMM_ALLOC_LIST_COUNT);
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
gpu = uvm_va_space_get_gpu_by_uuid(va_space, ¶ms->gpu_uuid);
|
||||
if (!gpu) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = uvm_va_block_find(va_space, params->address, &va_block);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
// No chunk or chunk not on an alloc list
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
chunk = uvm_va_block_lookup_gpu_chunk(va_block, gpu, params->address);
|
||||
if (chunk) {
|
||||
uvm_pmm_alloc_list_t alloc_list;
|
||||
|
||||
uvm_spin_lock(&gpu->pmm.list_lock);
|
||||
alloc_list = get_alloc_list(&gpu->pmm, chunk);
|
||||
uvm_spin_unlock(&gpu->pmm.list_lock);
|
||||
|
||||
if (alloc_list != UVM_PMM_ALLOC_LIST_COUNT) {
|
||||
params->list_type = alloc_list;
|
||||
status = NV_OK;
|
||||
}
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
out:
|
||||
uvm_va_space_up_read(va_space);
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -174,6 +174,34 @@ static void uvm_pmm_list_zero_checks(void)
|
||||
BUILD_BUG_ON(UVM_PMM_LIST_ZERO_COUNT > 2);
|
||||
}
|
||||
|
||||
// Lists for allocated root chunks. When picking a root chunk to evict, lists
|
||||
// with lower numerical order are checked first.
|
||||
typedef enum
|
||||
{
|
||||
// Root chunks unused by VA blocks, i.e. allocated, but not holding any
|
||||
// resident pages. These take priority when evicting as no data needs to be
|
||||
// migrated for them to be evicted.
|
||||
//
|
||||
// For simplicity, the list is approximate, tracking unused chunks only from
|
||||
// root chunk sized (2M) VA blocks.
|
||||
//
|
||||
// Updated by the VA block code with uvm_pmm_gpu_mark_root_chunk_(un)used().
|
||||
UVM_PMM_ALLOC_LIST_UNUSED,
|
||||
|
||||
// Discarded root GPU chunks, which are still resident on the GPU. Chunks on
|
||||
// this list are evicted with a lower priority than unused chunks because we
|
||||
// expect some of them to get reverted to used pages.
|
||||
//
|
||||
// Updated by the VA block code with
|
||||
// uvm_pmm_gpu_mark_root_chunk_discarded().
|
||||
UVM_PMM_ALLOC_LIST_DISCARDED,
|
||||
|
||||
// Root chunks used by VA blocks, likely with resident pages.
|
||||
UVM_PMM_ALLOC_LIST_USED,
|
||||
|
||||
UVM_PMM_ALLOC_LIST_COUNT
|
||||
} uvm_pmm_alloc_list_t;
|
||||
|
||||
// Maximum chunk sizes per type of allocation in single GPU.
|
||||
// The worst case today is Maxwell with 4 allocations sizes for page tables and
|
||||
// 2 page sizes used by uvm_mem_t. Notably one of the allocations for page
|
||||
@@ -277,7 +305,7 @@ struct uvm_gpu_chunk_struct
|
||||
|
||||
size_t log2_size : order_base_2(UVM_CHUNK_SIZE_MASK_SIZE);
|
||||
|
||||
// Start page index within va_block
|
||||
// Start page index within va_block.
|
||||
uvm_page_index_t va_block_page_index : order_base_2(PAGES_PER_UVM_VA_BLOCK + 1);
|
||||
|
||||
// This allows determining what PMM owns the chunk. Users of this field
|
||||
@@ -348,30 +376,12 @@ typedef struct uvm_pmm_gpu_struct
|
||||
// Bit locks for the root chunks with 1 bit per each root chunk
|
||||
uvm_bit_locks_t bitlocks;
|
||||
|
||||
// List of root chunks unused by VA blocks, i.e. allocated, but not
|
||||
// holding any resident pages. These take priority when evicting as no
|
||||
// data needs to be migrated for them to be evicted.
|
||||
//
|
||||
// For simplicity, the list is approximate, tracking unused chunks only
|
||||
// from root chunk sized (2M) VA blocks.
|
||||
//
|
||||
// Updated by the VA block code with
|
||||
// uvm_pmm_gpu_mark_root_chunk_(un)used().
|
||||
struct list_head va_block_unused;
|
||||
|
||||
// List of discarded root GPU chunks, which are still mapped on the GPU.
|
||||
// Chunks on this list are evicted with a lower priority than unused chunks.
|
||||
//
|
||||
// Updated by the VA block code with
|
||||
// uvm_pmm_gpu_mark_root_chunk_discarded().
|
||||
struct list_head va_block_discarded;
|
||||
|
||||
// List of root chunks used by VA blocks
|
||||
struct list_head va_block_used;
|
||||
// LRU lists for picking which root chunks to evict
|
||||
struct list_head alloc_list[UVM_PMM_ALLOC_LIST_COUNT];
|
||||
|
||||
// List of chunks needing to be lazily freed and a queue for processing
|
||||
// the list. TODO: Bug 3881835: revisit whether to use nv_kthread_q_t
|
||||
// or workqueue.
|
||||
// the list. TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or
|
||||
// workqueue.
|
||||
struct list_head va_block_lazy_free;
|
||||
nv_kthread_q_item_t va_block_lazy_free_q_item;
|
||||
} root_chunks;
|
||||
@@ -612,21 +622,6 @@ static uvm_chunk_size_t uvm_chunk_find_prev_size(uvm_chunk_sizes_mask_t chunk_si
|
||||
return (uvm_chunk_size_t)1 << __fls(chunk_sizes);
|
||||
}
|
||||
|
||||
// Obtain the {va_block, virt_addr} information for the chunks in the given
|
||||
// [phys_addr:phys_addr + region_size) range. One entry per chunk is returned.
|
||||
// phys_addr and region_size must be page-aligned.
|
||||
//
|
||||
// Valid translations are written to out_mappings sequentially (there are no
|
||||
// gaps). The caller is required to provide enough entries in out_pages for the
|
||||
// whole region. The function returns the number of entries written to
|
||||
// out_mappings.
|
||||
//
|
||||
// The returned reverse map is a snapshot: it is stale as soon as it is
|
||||
// returned, and the caller is responsible for locking the VA block(s) and
|
||||
// checking that the chunks are still there. Also, the VA block(s) are
|
||||
// retained, and it's up to the caller to release them.
|
||||
NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region_size, uvm_reverse_map_t *out_mappings);
|
||||
|
||||
// Iterates over every size in the input mask from smallest to largest
|
||||
#define for_each_chunk_size(__size, __chunk_sizes) \
|
||||
for ((__size) = (__chunk_sizes) ? uvm_chunk_find_first_size(__chunk_sizes) : \
|
||||
@@ -653,4 +648,6 @@ NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region
|
||||
for (; (__size) != UVM_CHUNK_SIZE_INVALID; \
|
||||
(__size) = uvm_chunk_find_prev_size((__chunk_sizes), (__size)))
|
||||
|
||||
NV_STATUS uvm_test_pmm_get_alloc_list(UVM_TEST_PMM_GET_ALLOC_LIST_PARAMS *params, struct file *filp);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -1009,166 +1009,6 @@ NV_STATUS uvm_test_pmm_async_alloc(UVM_TEST_PMM_ASYNC_ALLOC_PARAMS *params, stru
|
||||
return status;
|
||||
}
|
||||
|
||||
static uvm_reverse_map_t g_reverse_map_entries[PAGES_PER_UVM_VA_BLOCK * 4];
|
||||
|
||||
static NV_STATUS test_pmm_reverse_map_single(uvm_gpu_t *gpu, uvm_va_space_t *va_space, NvU64 addr)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 num_translations;
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_gpu_phys_address_t phys_addr;
|
||||
bool is_resident;
|
||||
|
||||
status = uvm_va_block_find(va_space, addr, &va_block);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
TEST_CHECK_RET(uvm_va_block_size(va_block) == UVM_VA_BLOCK_SIZE);
|
||||
|
||||
// Verify that all pages are populated on the GPU
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
is_resident = uvm_processor_mask_test(&va_block->resident, gpu->id) &&
|
||||
uvm_page_mask_full(uvm_va_block_resident_mask_get(va_block, gpu->id, NUMA_NO_NODE));
|
||||
if (is_resident)
|
||||
phys_addr = uvm_va_block_gpu_phys_page_address(va_block, 0, gpu);
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
TEST_CHECK_RET(is_resident);
|
||||
|
||||
// In this test a single VA range covers the whole 2MB physical region. We
|
||||
// expect a single translation to be returned for a 2MB chunk.
|
||||
num_translations = uvm_pmm_gpu_phys_to_virt(&gpu->pmm, phys_addr.address, UVM_VA_BLOCK_SIZE, g_reverse_map_entries);
|
||||
TEST_CHECK_RET(num_translations == 1);
|
||||
TEST_CHECK_RET(g_reverse_map_entries[0].va_block == va_block);
|
||||
TEST_CHECK_RET(g_reverse_map_entries[0].region.first == 0);
|
||||
TEST_CHECK_RET(uvm_va_block_region_num_pages(g_reverse_map_entries[0].region) == uvm_va_block_num_cpu_pages(va_block));
|
||||
|
||||
uvm_va_block_release(va_block);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t *va_space, NvU64 addr, NvU64 size)
|
||||
{
|
||||
uvm_va_range_managed_t *managed_range;
|
||||
uvm_va_block_t *va_block = NULL;
|
||||
uvm_va_block_context_t *va_block_context = NULL;
|
||||
NvU32 num_blocks;
|
||||
NvU32 index = 0;
|
||||
uvm_gpu_phys_address_t phys_addr = {0};
|
||||
bool is_resident;
|
||||
|
||||
// In this test, the [addr:addr + size) VA region contains
|
||||
// several managed ranges with different sizes.
|
||||
|
||||
// Find the first block to compute the base physical address of the root
|
||||
// chunk
|
||||
uvm_for_each_va_range_managed_in(managed_range, va_space, addr, addr + size - 1) {
|
||||
va_block = uvm_va_range_block(managed_range, 0);
|
||||
if (va_block)
|
||||
break;
|
||||
}
|
||||
TEST_CHECK_RET(va_block);
|
||||
|
||||
va_block_context = uvm_va_block_context_alloc(NULL);
|
||||
TEST_CHECK_RET(va_block_context);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
is_resident = uvm_id_equal(uvm_va_block_page_get_closest_resident(va_block, va_block_context, 0, gpu->id), gpu->id);
|
||||
if (is_resident) {
|
||||
phys_addr = uvm_va_block_gpu_phys_page_address(va_block, 0, gpu);
|
||||
phys_addr.address = UVM_ALIGN_DOWN(phys_addr.address, UVM_VA_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
uvm_va_block_context_free(va_block_context);
|
||||
|
||||
TEST_CHECK_RET(is_resident);
|
||||
|
||||
// Perform the lookup for the whole root chunk
|
||||
num_blocks = uvm_pmm_gpu_phys_to_virt(&gpu->pmm, phys_addr.address, size, g_reverse_map_entries);
|
||||
TEST_CHECK_RET(num_blocks != 0);
|
||||
|
||||
// Iterate over all managed ranges and their VA blocks within the 2MB VA
|
||||
// region. Some blocks are not populated. However, we assume that blocks
|
||||
// have been populated in order so they have been assigned physical
|
||||
// addresses incrementally. Therefore, the reverse translations will show
|
||||
// them in order.
|
||||
uvm_for_each_va_range_managed_in(managed_range, va_space, addr, addr + size - 1) {
|
||||
for_each_va_block_in_va_range(managed_range, va_block) {
|
||||
NvU32 num_va_block_pages = 0;
|
||||
|
||||
// Iterate over all the translations for the current VA block. One
|
||||
// translation per chunk is returned. We compute the total number of
|
||||
// pages covered in the translations to check that they match with
|
||||
// the number of pages in the VA block.
|
||||
while (g_reverse_map_entries[index].va_block == va_block) {
|
||||
uvm_reverse_map_t *reverse_mapping;
|
||||
|
||||
reverse_mapping = &g_reverse_map_entries[index];
|
||||
|
||||
uvm_va_block_release(va_block);
|
||||
num_va_block_pages += uvm_va_block_region_num_pages(reverse_mapping->region);
|
||||
UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_start(reverse_mapping)));
|
||||
UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_end(reverse_mapping)));
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
// Verify that all pages are populated on the GPU
|
||||
is_resident = uvm_page_mask_region_full(uvm_va_block_resident_mask_get(va_block, gpu->id, NUMA_NO_NODE),
|
||||
reverse_mapping->region);
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
TEST_CHECK_RET(is_resident);
|
||||
|
||||
++index;
|
||||
}
|
||||
|
||||
if (num_va_block_pages)
|
||||
TEST_CHECK_RET(num_va_block_pages == uvm_va_block_num_cpu_pages(va_block));
|
||||
}
|
||||
}
|
||||
TEST_CHECK_RET(index == num_blocks);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_pmm_reverse_map(UVM_TEST_PMM_REVERSE_MAP_PARAMS *params, struct file *filp)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_va_space_t *va_space;
|
||||
|
||||
va_space = uvm_va_space_get(filp);
|
||||
|
||||
// Take the global lock to void interferences from different instances of
|
||||
// the test, since we use global variables
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
uvm_va_space_down_write(va_space);
|
||||
|
||||
gpu = uvm_va_space_get_gpu_by_uuid(va_space, ¶ms->gpu_uuid);
|
||||
if (!gpu || !uvm_processor_mask_test(&va_space->registered_gpus, gpu->id)) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
goto exit_unlock;
|
||||
}
|
||||
|
||||
status = test_pmm_reverse_map_single(gpu, va_space, params->range_address1);
|
||||
|
||||
if (status == NV_OK)
|
||||
status = test_pmm_reverse_map_many_blocks(gpu, va_space, params->range_address2, params->range_size2);
|
||||
|
||||
exit_unlock:
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_chunk_with_elevated_page(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_pmm_gpu_t *pmm = &gpu->pmm;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2018 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -35,17 +35,22 @@
|
||||
#if defined(CONFIG_PROC_FS)
|
||||
// This parameter enables additional debug procfs entries. It's enabled by
|
||||
// default for debug and develop builds and disabled for release builds.
|
||||
int uvm_enable_debug_procfs = UVM_IS_DEBUG() || UVM_IS_DEVELOP();
|
||||
static int uvm_enable_debug_procfs = UVM_IS_DEBUG() || UVM_IS_DEVELOP();
|
||||
module_param(uvm_enable_debug_procfs, int, S_IRUGO);
|
||||
MODULE_PARM_DESC(uvm_enable_debug_procfs, "Enable debug procfs entries in /proc/" UVM_PROC_DIR_NAME);
|
||||
#else
|
||||
int uvm_enable_debug_procfs = 0;
|
||||
static int uvm_enable_debug_procfs = 0;
|
||||
#endif
|
||||
|
||||
static struct proc_dir_entry *uvm_proc_dir;
|
||||
static struct proc_dir_entry *uvm_proc_gpus;
|
||||
static struct proc_dir_entry *uvm_proc_cpu;
|
||||
|
||||
bool uvm_procfs_is_debug_enabled(void)
|
||||
{
|
||||
return uvm_enable_debug_procfs != 0;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_procfs_init(void)
|
||||
{
|
||||
if (!uvm_procfs_is_enabled())
|
||||
@@ -80,4 +85,3 @@ struct proc_dir_entry *uvm_procfs_get_cpu_base_dir(void)
|
||||
{
|
||||
return uvm_proc_cpu;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2019 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -45,10 +45,7 @@ static bool uvm_procfs_is_enabled(void)
|
||||
|
||||
// Is debug procfs enabled? This indicates that debug procfs files should be
|
||||
// created.
|
||||
static bool uvm_procfs_is_debug_enabled(void)
|
||||
{
|
||||
return uvm_enable_debug_procfs != 0;
|
||||
}
|
||||
bool uvm_procfs_is_debug_enabled(void);
|
||||
|
||||
struct proc_dir_entry *uvm_procfs_get_gpu_base_dir(void);
|
||||
struct proc_dir_entry *uvm_procfs_get_cpu_base_dir(void);
|
||||
@@ -71,8 +68,8 @@ void uvm_procfs_close_callback(void);
|
||||
|
||||
// Defer PM lock acquisition until the respective read() callback
|
||||
// is invoked, to ensure the lock is acquired and released by
|
||||
// the same thread. Else the lock tracking validation code must
|
||||
// be disabled for this lock, which is undesirable. As a result,
|
||||
// the same thread. Else the lock tracking validation code must
|
||||
// be disabled for this lock, which is undesirable. As a result,
|
||||
// lockless macro is used below. See bug 2594854 for additional
|
||||
// information.
|
||||
#define UVM_DEFINE_SINGLE_PROCFS_FILE(name) \
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user