Compare commits

...

2 Commits

Author SHA1 Message Date
Bernhard Stoeckner
ef65a13097 535.288.01 2026-01-13 18:04:57 +01:00
Maneet Singh
66ab8e8596 535.274.02 2025-09-30 12:40:20 -07:00
31 changed files with 361 additions and 149 deletions

View File

@@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules,
version 535.261.03.
version 535.288.01.
## How to Build
@@ -17,7 +17,7 @@ as root:
Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
535.261.03 driver release. This can be achieved by installing
535.288.01 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,
@@ -180,7 +180,7 @@ software applications.
## Compatible GPUs
The open-gpu-kernel-modules can be used on any Turing or later GPU
(see the table below). However, in the 535.261.03 release,
(see the table below). However, in the 535.288.01 release,
GeForce and Workstation support is still considered alpha-quality.
To enable use of the open kernel modules on GeForce and Workstation GPUs,
@@ -188,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
parameter to 1. For more details, see the NVIDIA GPU driver end user
README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.261.03/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.288.01/README/kernel_open.html
In the below table, if three IDs are listed, the first is the PCI Device
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI

View File

@@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
ccflags-y += -I$(src)
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
ccflags-y += -DNV_VERSION_STRING=\"535.261.03\"
ccflags-y += -DNV_VERSION_STRING=\"535.288.01\"
ifneq ($(SYSSRCHOST1X),)
ccflags-y += -I$(SYSSRCHOST1X)

View File

@@ -870,9 +870,9 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
#define NV_PRINT_AT(nv_debug_level,at) \
{ \
nv_printf(nv_debug_level, \
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, flags = 0x%08x, " \
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %lld, flags = 0x%08x, " \
"page_table = 0x%p\n", __FUNCTION__, __LINE__, at, \
at->num_pages, NV_ATOMIC_READ(at->usage_count), \
at->num_pages, (long long)atomic64_read(&at->usage_count), \
at->flags, at->page_table); \
}
@@ -1196,7 +1196,7 @@ typedef struct nvidia_pte_s {
typedef struct nv_alloc_s {
struct nv_alloc_s *next;
struct device *dev;
atomic_t usage_count;
atomic64_t usage_count;
struct {
NvBool contig : 1;
NvBool guest : 1;
@@ -1493,7 +1493,8 @@ typedef struct
typedef struct nv_linux_state_s {
nv_state_t nv_state;
atomic_t usage_count;
atomic64_t usage_count;
NvU32 suspend_count;
struct device *dev;
@@ -1832,9 +1833,9 @@ static inline NvBool nv_alloc_release(nv_linux_file_private_t *nvlfp, nv_alloc_t
{
NV_PRINT_AT(NV_DBG_MEMINFO, at);
if (NV_ATOMIC_DEC_AND_TEST(at->usage_count))
if (atomic64_dec_and_test(&at->usage_count))
{
NV_ATOMIC_INC(at->usage_count);
atomic64_inc(&at->usage_count);
at->next = nvlfp->free_list;
nvlfp->free_list = at;

View File

@@ -35,17 +35,6 @@
#include <linux/sched/signal.h> /* signal_pending for kernels >= 4.11 */
#endif
#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL)
typedef raw_spinlock_t nv_spinlock_t;
#define NV_SPIN_LOCK_INIT(lock) raw_spin_lock_init(lock)
#define NV_SPIN_LOCK_IRQ(lock) raw_spin_lock_irq(lock)
#define NV_SPIN_UNLOCK_IRQ(lock) raw_spin_unlock_irq(lock)
#define NV_SPIN_LOCK_IRQSAVE(lock,flags) raw_spin_lock_irqsave(lock,flags)
#define NV_SPIN_UNLOCK_IRQRESTORE(lock,flags) raw_spin_unlock_irqrestore(lock,flags)
#define NV_SPIN_LOCK(lock) raw_spin_lock(lock)
#define NV_SPIN_UNLOCK(lock) raw_spin_unlock(lock)
#define NV_SPIN_UNLOCK_WAIT(lock) raw_spin_unlock_wait(lock)
#else
typedef spinlock_t nv_spinlock_t;
#define NV_SPIN_LOCK_INIT(lock) spin_lock_init(lock)
#define NV_SPIN_LOCK_IRQ(lock) spin_lock_irq(lock)
@@ -55,7 +44,6 @@ typedef spinlock_t nv_spinlock_t;
#define NV_SPIN_LOCK(lock) spin_lock(lock)
#define NV_SPIN_UNLOCK(lock) spin_unlock(lock)
#define NV_SPIN_UNLOCK_WAIT(lock) spin_unlock_wait(lock)
#endif
#define NV_INIT_MUTEX(mutex) sema_init(mutex, 1)

View File

@@ -4041,6 +4041,43 @@ compile_test() {
fi
;;
drm_fb_create_takes_format_info)
#
# Determine if a `struct drm_format_info *` is passed into
# the .fb_create callback. If so, it will have 4 arguments.
# This parameter was added in commit 81112eaac559 ("drm:
# Pass the format info to .fb_create") in linux-next
# (2025-07-16)
CODE="
#include <drm/drm_mode_config.h>
#include <drm/drm_framebuffer.h>
static const struct drm_mode_config_funcs funcs;
void conftest_drm_fb_create_takes_format_info(void) {
funcs.fb_create(NULL, NULL, NULL, NULL);
}"
compile_check_conftest "$CODE" "NV_DRM_FB_CREATE_TAKES_FORMAT_INFO" "" "types"
;;
drm_fill_fb_struct_takes_format_info)
#
# Determine if a `struct drm_format_info *` is passed into
# drm_helper_mode_fill_fb_struct(). If so, it will have 4 arguments.
# This parameter was added in commit a34cc7bf1034 ("drm:
# Allow the caller to pass in the format info to
# drm_helper_mode_fill_fb_struct()") in linux-next
# (2025-07-16)
CODE="
#include <drm/drm_modeset_helper.h>
void conftest_drm_fill_fb_struct_takes_format_info(void) {
drm_helper_mode_fill_fb_struct(NULL, NULL, NULL, NULL);
}"
compile_check_conftest "$CODE" "NV_DRM_FILL_FB_STRUCT_TAKES_FORMAT_INFO" "" "types"
;;
drm_connector_funcs_have_mode_in_name)
#
# Determine if _mode_ is present in connector function names. We

View File

@@ -154,11 +154,14 @@ static void nv_drm_output_poll_changed(struct drm_device *dev)
static struct drm_framebuffer *nv_drm_framebuffer_create(
struct drm_device *dev,
struct drm_file *file,
#if defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG)
#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO)
const struct drm_format_info *info,
#endif
#if defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG)
const struct drm_mode_fb_cmd2 *cmd
#else
#else
struct drm_mode_fb_cmd2 *cmd
#endif
#endif
)
{
struct drm_mode_fb_cmd2 local_cmd;
@@ -169,11 +172,14 @@ static struct drm_framebuffer *nv_drm_framebuffer_create(
fb = nv_drm_internal_framebuffer_create(
dev,
file,
#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO)
info,
#endif
&local_cmd);
#if !defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG)
#if !defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG)
*cmd = local_cmd;
#endif
#endif
return fb;
}

View File

@@ -206,6 +206,9 @@ fail:
struct drm_framebuffer *nv_drm_internal_framebuffer_create(
struct drm_device *dev,
struct drm_file *file,
#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO)
const struct drm_format_info *info,
#endif
struct drm_mode_fb_cmd2 *cmd)
{
struct nv_drm_device *nv_dev = to_nv_device(dev);
@@ -259,6 +262,9 @@ struct drm_framebuffer *nv_drm_internal_framebuffer_create(
dev,
#endif
&nv_fb->base,
#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO)
info,
#endif
cmd);
/*

View File

@@ -59,6 +59,9 @@ static inline struct nv_drm_framebuffer *to_nv_framebuffer(
struct drm_framebuffer *nv_drm_internal_framebuffer_create(
struct drm_device *dev,
struct drm_file *file,
#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO)
const struct drm_format_info *info,
#endif
struct drm_mode_fb_cmd2 *cmd);
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */

View File

@@ -451,6 +451,13 @@ int nv_drm_atomic_commit(struct drm_device *dev,
#else
drm_atomic_helper_swap_state(dev, state);
#endif
/*
* Used to update legacy modeset state pointers to support UAPIs not updated
* by the core atomic modeset infrastructure.
*
* Example: /sys/class/drm/<card connector>/enabled
*/
drm_atomic_helper_update_legacy_modeset_state(dev, state);
/*
* nv_drm_atomic_commit_internal() must not return failure after

View File

@@ -139,3 +139,4 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_helper_funcs_mode_valid_has_const_mode_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_fb_create_takes_format_info

View File

@@ -11519,6 +11519,11 @@ NV_STATUS uvm_va_block_evict_chunks(uvm_va_block_t *va_block,
return NV_ERR_NO_MEMORY;
}
if (uvm_va_block_is_hmm(va_block)) {
memset(block_context->hmm.src_pfns, 0, sizeof(block_context->hmm.src_pfns));
memset(block_context->hmm.dst_pfns, 0, sizeof(block_context->hmm.dst_pfns));
}
pages_to_evict = &block_context->caller_page_mask;
uvm_page_mask_zero(pages_to_evict);
chunk_region.outer = 0;

View File

@@ -42,9 +42,12 @@ MODULE_ALIAS_CHARDEV_MAJOR(NV_MAJOR_DEVICE_NUMBER);
* DMA_BUF namespace is added by commit id 16b0314aa746
* ("dma-buf: move dma-buf symbols into the DMA_BUF module namespace") in 5.16
*/
#if defined(NV_MODULE_IMPORT_NS_TAKES_CONSTANT)
MODULE_IMPORT_NS(DMA_BUF);
#endif
#else
MODULE_IMPORT_NS("DMA_BUF");
#endif // defined(NV_MODULE_IMPORT_NS_TAKES_CONSTANT)
#endif // defined(MODULE_IMPORT_NS)
static NvU32 nv_num_instances;

View File

@@ -72,7 +72,7 @@ nvidia_vma_open(struct vm_area_struct *vma)
if (at != NULL)
{
NV_ATOMIC_INC(at->usage_count);
atomic64_inc(&at->usage_count);
NV_PRINT_AT(NV_DBG_MEMINFO, at);
}
@@ -404,7 +404,7 @@ static int nvidia_mmap_sysmem(
int ret = 0;
unsigned long start = 0;
NV_ATOMIC_INC(at->usage_count);
atomic64_inc(&at->usage_count);
start = vma->vm_start;
for (j = page_index; j < (page_index + pages); j++)
@@ -436,7 +436,7 @@ static int nvidia_mmap_sysmem(
if (ret)
{
NV_ATOMIC_DEC(at->usage_count);
atomic64_dec(&at->usage_count);
return -EAGAIN;
}
start += PAGE_SIZE;

View File

@@ -798,7 +798,7 @@ nv_pci_remove(struct pci_dev *pci_dev)
* For eGPU, fall off the bus along with clients active is a valid scenario.
* Hence skipping the sanity check for eGPU.
*/
if ((NV_ATOMIC_READ(nvl->usage_count) != 0) && !(nv->is_external_gpu))
if ((atomic64_read(&nvl->usage_count) != 0) && !(nv->is_external_gpu))
{
nv_printf(NV_DBG_ERRORS,
"NVRM: Attempting to remove device %04x:%02x:%02x.%x with non-zero usage count!\n",
@@ -809,7 +809,7 @@ nv_pci_remove(struct pci_dev *pci_dev)
* We can't return from this function without corrupting state, so we wait for
* the usage count to go to zero.
*/
while (NV_ATOMIC_READ(nvl->usage_count) != 0)
while (atomic64_read(&nvl->usage_count) != 0)
{
/*
@@ -865,7 +865,7 @@ nv_pci_remove(struct pci_dev *pci_dev)
#endif
/* Update the frontend data structures */
if (NV_ATOMIC_READ(nvl->usage_count) == 0)
if (atomic64_read(&nvl->usage_count) == 0)
{
nvidia_frontend_remove_device((void *)&nv_fops, nvl);
}
@@ -890,7 +890,7 @@ nv_pci_remove(struct pci_dev *pci_dev)
nv_unregister_ibmnpu_devices(nv);
nv_destroy_ibmnpu_info(nv);
if (NV_ATOMIC_READ(nvl->usage_count) == 0)
if (atomic64_read(&nvl->usage_count) == 0)
{
nv_lock_destroy_locks(sp, nv);
}
@@ -906,7 +906,7 @@ nv_pci_remove(struct pci_dev *pci_dev)
num_nv_devices--;
if (NV_ATOMIC_READ(nvl->usage_count) == 0)
if (atomic64_read(&nvl->usage_count) == 0)
{
NV_PCI_DISABLE_DEVICE(pci_dev);
NV_KFREE(nvl, sizeof(nv_linux_state_t));

View File

@@ -889,7 +889,7 @@ nv_procfs_close_unbind_lock(
down(&nvl->ldata_lock);
if ((value == 1) && !(nv->flags & NV_FLAG_UNBIND_LOCK))
{
if (NV_ATOMIC_READ(nvl->usage_count) == 0)
if (atomic64_read(&nvl->usage_count) == 0)
rm_unbind_lock(sp, nv);
if (nv->flags & NV_FLAG_UNBIND_LOCK)

View File

@@ -167,7 +167,7 @@ NvBool nv_get_rsync_relaxed_ordering_mode(
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
/* shouldn't be called without opening a device */
WARN_ON(NV_ATOMIC_READ(nvl->usage_count) == 0);
WARN_ON(atomic64_read(&nvl->usage_count) == 0);
/*
* g_rsync_info.relaxed_ordering_mode can be safely accessed outside of
@@ -185,7 +185,7 @@ void nv_wait_for_rsync(
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
/* shouldn't be called without opening a device */
WARN_ON(NV_ATOMIC_READ(nvl->usage_count) == 0);
WARN_ON(atomic64_read(&nvl->usage_count) == 0);
/*
* g_rsync_info.relaxed_ordering_mode can be safely accessed outside of

View File

@@ -311,7 +311,7 @@ nv_alloc_t *nvos_create_alloc(
}
memset(at->page_table, 0, pt_size);
NV_ATOMIC_SET(at->usage_count, 0);
atomic64_set(&at->usage_count, 0);
for (i = 0; i < at->num_pages; i++)
{
@@ -341,7 +341,7 @@ int nvos_free_alloc(
if (at == NULL)
return -1;
if (NV_ATOMIC_READ(at->usage_count))
if (atomic64_read(&at->usage_count))
return 1;
for (i = 0; i < at->num_pages; i++)
@@ -1283,16 +1283,16 @@ static int nv_start_device(nv_state_t *nv, nvidia_stack_t *sp)
{
rc = os_alloc_mutex(&nvl->isr_bh_unlocked_mutex);
if (rc != 0)
goto failed;
goto failed_release_irq;
nv_kthread_q_item_init(&nvl->bottom_half_q_item, nvidia_isr_bh_unlocked, (void *)nv);
rc = nv_kthread_q_init(&nvl->bottom_half_q, nv_device_name);
if (rc != 0)
goto failed;
goto failed_release_irq;
kthread_init = NV_TRUE;
rc = nv_kthread_q_init(&nvl->queue.nvk, "nv_queue");
if (rc)
goto failed;
goto failed_release_irq;
nv->queue = &nvl->queue;
if (nv_platform_use_auto_online(nvl))
@@ -1300,33 +1300,18 @@ static int nv_start_device(nv_state_t *nv, nvidia_stack_t *sp)
rc = nv_kthread_q_init(&nvl->remove_numa_memory_q,
"nv_remove_numa_memory");
if (rc)
goto failed;
goto failed_release_irq;
remove_numa_memory_kthread_init = NV_TRUE;
}
}
if (!rm_init_adapter(sp, nv))
{
if (!(nv->flags & NV_FLAG_USES_MSIX) &&
!(nv->flags & NV_FLAG_SOC_DISPLAY) &&
!(nv->flags & NV_FLAG_SOC_IGPU))
{
free_irq(nv->interrupt_line, (void *) nvl);
}
else if (nv->flags & NV_FLAG_SOC_DISPLAY)
{
}
#if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
else
{
nv_free_msix_irq(nvl);
}
#endif
NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
"rm_init_adapter failed, device minor number %d\n",
nvl->minor_num);
rc = -EIO;
goto failed;
goto failed_release_irq;
}
{
@@ -1360,6 +1345,26 @@ static int nv_start_device(nv_state_t *nv, nvidia_stack_t *sp)
return 0;
failed_release_irq:
if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
{
if (!(nv->flags & NV_FLAG_USES_MSIX) &&
!(nv->flags & NV_FLAG_SOC_DISPLAY) &&
!(nv->flags & NV_FLAG_SOC_IGPU))
{
free_irq(nv->interrupt_line, (void *) nvl);
}
else if (nv->flags & NV_FLAG_SOC_DISPLAY)
{
}
#if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
else
{
nv_free_msix_irq(nvl);
}
#endif
}
failed:
#if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
if (nv->flags & NV_FLAG_USES_MSI)
@@ -1450,13 +1455,10 @@ static int nv_open_device(nv_state_t *nv, nvidia_stack_t *sp)
return -ENODEV;
}
if (unlikely(NV_ATOMIC_READ(nvl->usage_count) >= NV_S32_MAX))
return -EMFILE;
if ( ! (nv->flags & NV_FLAG_OPEN))
{
/* Sanity check: !NV_FLAG_OPEN requires usage_count == 0 */
if (NV_ATOMIC_READ(nvl->usage_count) != 0)
if (atomic64_read(&nvl->usage_count) != 0)
{
NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
"Minor device %u is referenced without being open!\n",
@@ -1476,7 +1478,8 @@ static int nv_open_device(nv_state_t *nv, nvidia_stack_t *sp)
return -EBUSY;
}
NV_ATOMIC_INC(nvl->usage_count);
atomic64_inc(&nvl->usage_count);
return 0;
}
@@ -1775,7 +1778,7 @@ static void nv_close_device(nv_state_t *nv, nvidia_stack_t *sp)
{
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
if (NV_ATOMIC_READ(nvl->usage_count) == 0)
if (atomic64_read(&nvl->usage_count) == 0)
{
nv_printf(NV_DBG_ERRORS,
"NVRM: Attempting to close unopened minor device %u!\n",
@@ -1784,7 +1787,7 @@ static void nv_close_device(nv_state_t *nv, nvidia_stack_t *sp)
return;
}
if (NV_ATOMIC_DEC_AND_TEST(nvl->usage_count))
if (atomic64_dec_and_test(&nvl->usage_count))
nv_stop_device(nv, sp);
}
@@ -1815,7 +1818,7 @@ nvidia_close_callback(
nv_close_device(nv, sp);
bRemove = (!NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv)) &&
(NV_ATOMIC_READ(nvl->usage_count) == 0) &&
(atomic64_read(&nvl->usage_count) == 0) &&
rm_get_device_remove_flag(sp, nv->gpu_id);
for (i = 0; i < NV_FOPS_STACK_INDEX_COUNT; ++i)
@@ -1839,7 +1842,7 @@ nvidia_close_callback(
* any cleanup related to linux layer locks and nv linux state struct.
* nvidia_pci_remove when scheduled will do necessary cleanup.
*/
if ((NV_ATOMIC_READ(nvl->usage_count) == 0) && nv->removed)
if ((atomic64_read(&nvl->usage_count) == 0) && nv->removed)
{
nvidia_frontend_remove_device((void *)&nv_fops, nvl);
nv_lock_destroy_locks(sp, nv);
@@ -2141,22 +2144,34 @@ nvidia_ioctl(
NV_CTL_DEVICE_ONLY(nv);
if (num_arg_gpus == 0 || nvlfp->num_attached_gpus != 0 ||
arg_size % sizeof(NvU32) != 0)
if ((num_arg_gpus == 0) || (arg_size % sizeof(NvU32) != 0))
{
status = -EINVAL;
goto done;
}
/* atomically check and alloc attached_gpus */
down(&nvl->ldata_lock);
if (nvlfp->num_attached_gpus != 0)
{
up(&nvl->ldata_lock);
status = -EINVAL;
goto done;
}
NV_KMALLOC(nvlfp->attached_gpus, arg_size);
if (nvlfp->attached_gpus == NULL)
{
up(&nvl->ldata_lock);
status = -ENOMEM;
goto done;
}
memcpy(nvlfp->attached_gpus, arg_copy, arg_size);
nvlfp->num_attached_gpus = num_arg_gpus;
up(&nvl->ldata_lock);
for (i = 0; i < nvlfp->num_attached_gpus; i++)
{
if (nvlfp->attached_gpus[i] == 0)
@@ -2171,9 +2186,15 @@ nvidia_ioctl(
if (nvlfp->attached_gpus[i] != 0)
nvidia_dev_put(nvlfp->attached_gpus[i], sp);
}
/* atomically free attached_gpus */
down(&nvl->ldata_lock);
NV_KFREE(nvlfp->attached_gpus, arg_size);
nvlfp->num_attached_gpus = 0;
up(&nvl->ldata_lock);
status = -EINVAL;
break;
}
@@ -2286,7 +2307,7 @@ nvidia_ioctl(
* Only the current client should have an open file
* descriptor for the device, to allow safe offlining.
*/
if (NV_ATOMIC_READ(nvl->usage_count) > 1)
if (atomic64_read(&nvl->usage_count) > 1)
{
status = -EBUSY;
goto unlock;
@@ -2664,12 +2685,12 @@ nvidia_ctl_open(
/* save the nv away in file->private_data */
nvlfp->nvptr = nvl;
if (NV_ATOMIC_READ(nvl->usage_count) == 0)
if (atomic64_read(&nvl->usage_count) == 0)
{
nv->flags |= (NV_FLAG_OPEN | NV_FLAG_CONTROL);
}
NV_ATOMIC_INC(nvl->usage_count);
atomic64_inc(&nvl->usage_count);
up(&nvl->ldata_lock);
return 0;
@@ -2695,7 +2716,7 @@ nvidia_ctl_close(
nv_printf(NV_DBG_INFO, "NVRM: nvidia_ctl_close\n");
down(&nvl->ldata_lock);
if (NV_ATOMIC_DEC_AND_TEST(nvl->usage_count))
if (atomic64_dec_and_test(&nvl->usage_count))
{
nv->flags &= ~NV_FLAG_OPEN;
}
@@ -2864,7 +2885,7 @@ nv_alias_pages(
at->guest_id = guest_id;
*priv_data = at;
NV_ATOMIC_INC(at->usage_count);
atomic64_inc(&at->usage_count);
NV_PRINT_AT(NV_DBG_MEMINFO, at);
@@ -3439,7 +3460,7 @@ NV_STATUS NV_API_CALL nv_alloc_pages(
}
*priv_data = at;
NV_ATOMIC_INC(at->usage_count);
atomic64_inc(&at->usage_count);
NV_PRINT_AT(NV_DBG_MEMINFO, at);
@@ -3475,7 +3496,7 @@ NV_STATUS NV_API_CALL nv_free_pages(
* This is described in greater detail in the comments above the
* nvidia_vma_(open|release)() callbacks in nv-mmap.c.
*/
if (!NV_ATOMIC_DEC_AND_TEST(at->usage_count))
if (!atomic64_dec_and_test(&at->usage_count))
return NV_OK;
if (!at->flags.guest)
@@ -3503,7 +3524,7 @@ NvBool nv_lock_init_locks
NV_INIT_MUTEX(&nvl->ldata_lock);
NV_INIT_MUTEX(&nvl->mmap_lock);
NV_ATOMIC_SET(nvl->usage_count, 0);
atomic64_set(&nvl->usage_count, 0);
if (!rm_init_event_locks(sp, nv))
return NV_FALSE;

View File

@@ -36,25 +36,25 @@
// and then checked back in. You cannot make changes to these sections without
// corresponding changes to the buildmeister script
#ifndef NV_BUILD_BRANCH
#define NV_BUILD_BRANCH r539_41
#define NV_BUILD_BRANCH r539_62
#endif
#ifndef NV_PUBLIC_BRANCH
#define NV_PUBLIC_BRANCH r539_41
#define NV_PUBLIC_BRANCH r539_62
#endif
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r539_41-927"
#define NV_BUILD_CHANGELIST_NUM (36124219)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r539_62-1069"
#define NV_BUILD_CHANGELIST_NUM (36903621)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "rel/gpu_drv/r535/r539_41-927"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36124219)
#define NV_BUILD_NAME "rel/gpu_drv/r535/r539_62-1069"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36903621)
#else /* Windows builds */
#define NV_BUILD_BRANCH_VERSION "r539_41-1"
#define NV_BUILD_CHANGELIST_NUM (36117060)
#define NV_BUILD_BRANCH_VERSION "r539_62-1"
#define NV_BUILD_CHANGELIST_NUM (36902724)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "539.42"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36117060)
#define NV_BUILD_NAME "539.63"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36902724)
#define NV_BUILD_BRANCH_BASE_VERSION R535
#endif
// End buildmeister python edited section

View File

@@ -4,7 +4,7 @@
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
#define NV_VERSION_STRING "535.261.03"
#define NV_VERSION_STRING "535.288.01"
#else

View File

@@ -1253,25 +1253,25 @@ NvBool isLibosPreserveLogBufferFull(LIBOS_LOG_DECODE *pLogDecode, NvU32 gpuInsta
{
NvU64 i = (NvU32)(pLogDecode->numLogBuffers);
NvU32 tag = LIBOS_LOG_NVLOG_BUFFER_TAG(pLogDecode->sourceName, i * 2);
NVLOG_BUFFER_HANDLE handle = 0;
NV_STATUS status = nvlogGetBufferHandleFromTag(tag, &handle);
if (status != NV_OK)
//
// Cannot use nvlogGetBufferHandleFromTag here since in multi GPU case,
// we can have multiple buffers with exact same tag, only differentiable
// from gpuInstance
//
for (i = 0; i < NVLOG_MAX_BUFFERS; i++)
{
return NV_FALSE;
}
NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[handle];
if (pNvLogBuffer == NULL)
{
return NV_FALSE;
}
if (FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) &&
DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance &&
(pNvLogBuffer->pos >= pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64)))
{
return NV_TRUE;
if (NvLogLogger.pBuffers[i] != NULL)
{
NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[i];
if ((pNvLogBuffer->tag == tag) &&
(DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance) &&
FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) &&
(pNvLogBuffer->pos >= pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64)))
{
return NV_TRUE;
}
}
}
return NV_FALSE;
@@ -1279,19 +1279,27 @@ NvBool isLibosPreserveLogBufferFull(LIBOS_LOG_DECODE *pLogDecode, NvU32 gpuInsta
static NvBool findPreservedNvlogBuffer(NvU32 tag, NvU32 gpuInstance, NVLOG_BUFFER_HANDLE *pHandle)
{
NVLOG_BUFFER_HANDLE handle = 0;
NV_STATUS status = nvlogGetBufferHandleFromTag(tag, &handle);
NvU64 i;
if (status != NV_OK)
return NV_FALSE;
NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[handle];
if (FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) &&
DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance &&
(pNvLogBuffer->pos < pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64)))
//
// Cannot use nvlogGetBufferHandleFromTag here since in multi GPU case,
// we can have multiple buffers with exact same tag, only differentiable
// from gpuInstance
//
for (i = 0; i < NVLOG_MAX_BUFFERS; i++)
{
*pHandle = handle;
return NV_TRUE;
if (NvLogLogger.pBuffers[i] != NULL)
{
NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[i];
if ((pNvLogBuffer->tag == tag) &&
(DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance) &&
FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) &&
(pNvLogBuffer->pos < pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64)))
{
*pHandle = i;
return NV_TRUE;
}
}
}
return NV_FALSE;

View File

@@ -772,23 +772,23 @@ static inline NV_STATUS intrRestoreIntrRegValue(OBJGPU *pGpu, struct Intr *pIntr
#define intrRestoreIntrRegValue_HAL(pGpu, pIntr, arg0, arg1, arg2) intrRestoreIntrRegValue(pGpu, pIntr, arg0, arg1, arg2)
static inline NV_STATUS intrTriggerCpuDoorbellForVF_46f6a7(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid) {
static inline NV_STATUS intrTriggerCpuDoorbellForVF_46f6a7(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid, NvBool bRearmIntr) {
return NV_ERR_NOT_SUPPORTED;
}
NV_STATUS intrTriggerCpuDoorbellForVF_TU102(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid);
NV_STATUS intrTriggerCpuDoorbellForVF_TU102(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid, NvBool bRearmIntr);
#ifdef __nvoc_intr_h_disabled
static inline NV_STATUS intrTriggerCpuDoorbellForVF(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid) {
static inline NV_STATUS intrTriggerCpuDoorbellForVF(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid, NvBool bRearmIntr) {
NV_ASSERT_FAILED_PRECOMP("Intr was disabled!");
return NV_ERR_NOT_SUPPORTED;
}
#else //__nvoc_intr_h_disabled
#define intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid) intrTriggerCpuDoorbellForVF_46f6a7(pGpu, pIntr, gfid)
#define intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid, bRearmIntr) intrTriggerCpuDoorbellForVF_46f6a7(pGpu, pIntr, gfid, bRearmIntr)
#endif //__nvoc_intr_h_disabled
#define intrTriggerCpuDoorbellForVF_HAL(pGpu, pIntr, gfid) intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid)
#define intrTriggerCpuDoorbellForVF_HAL(pGpu, pIntr, gfid, bRearmIntr) intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid, bRearmIntr)
NV_STATUS intrTriggerPrivDoorbell_TU102(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid);

View File

@@ -76,6 +76,7 @@ struct THREAD_STATE_NODE
*/
NvU32 threadSeqId;
NvBool bValid;
NvBool bUsingHeap;
THREAD_TIMEOUT_STATE timeout;
NvU32 cpuNum;
NvU32 flags;
@@ -199,6 +200,7 @@ void threadStateFreeISRLockless(THREAD_STATE_NODE *, OBJGPU*, NvU32);
void threadStateInitISRAndDeferredIntHandler(THREAD_STATE_NODE *, OBJGPU*, NvU32);
void threadStateFreeISRAndDeferredIntHandler(THREAD_STATE_NODE *, OBJGPU*, NvU32);
void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags);
THREAD_STATE_NODE* threadStateAlloc(NvU32 flags);
void threadStateFree(THREAD_STATE_NODE *pThreadNode, NvU32 flags);
NV_STATUS threadStateGetCurrent(THREAD_STATE_NODE **ppThreadNode, OBJGPU *pGpu);

View File

@@ -601,6 +601,110 @@ void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
}
}
/**
*
* @brief Allocate a heap-based threadState
* @param[in] flags Thread state flags
*
* @return Heap-allocated THREAD_STATE_NODE* on success, NULL on failure
*/
THREAD_STATE_NODE* threadStateAlloc(NvU32 flags)
{
THREAD_STATE_NODE *pHeapNode;
NV_STATUS rmStatus;
NvU64 funcAddr;
// Isrs should be using threadStateIsrInit().
NV_ASSERT((flags & (THREAD_STATE_FLAGS_IS_ISR_LOCKLESS |
THREAD_STATE_FLAGS_IS_ISR |
THREAD_STATE_FLAGS_DEFERRED_INT_HANDLER_RUNNING)) == 0);
// Check to see if ThreadState is enabled
if (!(threadStateDatabase.setupFlags & THREAD_STATE_SETUP_FLAGS_ENABLED))
return NULL;
// Allocate heap node directly
pHeapNode = portMemAllocNonPaged(sizeof(THREAD_STATE_NODE));
if (pHeapNode == NULL)
return NULL;
portMemSet(pHeapNode, 0, sizeof(*pHeapNode));
pHeapNode->threadSeqId = portAtomicIncrementU32(&threadStateDatabase.threadSeqCntr);
pHeapNode->cpuNum = osGetCurrentProcessorNumber();
pHeapNode->bUsingHeap = NV_TRUE;
pHeapNode->flags = flags;
//
// The thread state free callbacks are only supported in the non-ISR paths
// as they invoke memory allocation routines.
//
listInit(&pHeapNode->cbList, portMemAllocatorGetGlobalNonPaged());
pHeapNode->flags |= THREAD_STATE_FLAGS_STATE_FREE_CB_ENABLED;
rmStatus = _threadNodeInitTime(pHeapNode);
if (rmStatus == NV_OK)
pHeapNode->flags |= THREAD_STATE_FLAGS_TIMEOUT_INITED;
rmStatus = osGetCurrentThread(&pHeapNode->threadId);
if (rmStatus != NV_OK)
goto cleanup_heap;
NV_ASSERT_OR_GOTO(pHeapNode->cpuNum < threadStateDatabase.maxCPUs, cleanup_heap);
funcAddr = (NvU64) (NV_RETURN_ADDRESS());
portSyncSpinlockAcquire(threadStateDatabase.spinlock);
if (!mapInsertExisting(&threadStateDatabase.dbRoot, (NvU64)pHeapNode->threadId, pHeapNode))
{
rmStatus = NV_ERR_OBJECT_NOT_FOUND;
// Place in the Preempted List if threadId is already present in the API list
if (mapInsertExisting(&threadStateDatabase.dbRootPreempted, (NvU64)pHeapNode->threadId, pHeapNode))
{
pHeapNode->flags |= THREAD_STATE_FLAGS_PLACED_ON_PREEMPT_LIST;
pHeapNode->bValid = NV_TRUE;
rmStatus = NV_OK;
}
else
{
// Reset the threadId as insertion failed on both maps. bValid is already NV_FALSE
pHeapNode->threadId = 0;
portSyncSpinlockRelease(threadStateDatabase.spinlock);
goto cleanup_heap;
}
}
else
{
pHeapNode->bValid = NV_TRUE;
rmStatus = NV_OK;
}
_threadStateLogInitCaller(pHeapNode, funcAddr);
portSyncSpinlockRelease(threadStateDatabase.spinlock);
_threadStatePrintInfo(pHeapNode);
NV_ASSERT(rmStatus == NV_OK);
threadPriorityStateAlloc();
if (TLS_MIRROR_THREADSTATE)
{
THREAD_STATE_NODE **pTls = (THREAD_STATE_NODE **)tlsEntryAcquire(TLS_ENTRY_ID_THREADSTATE);
NV_ASSERT_OR_GOTO(pTls != NULL, cleanup_heap);
if (*pTls != NULL)
{
NV_PRINTF(LEVEL_WARNING,
"TLS: Nested threadState inits detected. Previous threadState node is %p, new is %p\n",
*pTls, pHeapNode);
}
*pTls = pHeapNode;
}
return pHeapNode;
cleanup_heap:
portMemFree(pHeapNode);
return NULL;
}
/**
* @brief Initialize a threadState for locked ISR and Bottom-half
*
@@ -863,6 +967,12 @@ void threadStateFree(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
r);
}
}
// Free heap memory if this node was heap-allocated
if (pThreadNode->bUsingHeap)
{
portMemFree(pThreadNode);
}
}
/**

View File

@@ -1163,6 +1163,8 @@ gsyncReadUniversalFrameCount_P2060
OBJTMR *pTmpTmr = NULL;
OBJTMR *pTmr = GPU_GET_TIMER(pGpu);
NV_CHECK_OR_RETURN(LEVEL_INFO, gsyncIsFrameLocked_P2060(pThis), NV_ERR_INVALID_STATE);
if (!(pThis->FrameCountData.iface == NV_P2060_MAX_IFACES_PER_GSYNC))
{
//
@@ -1207,7 +1209,8 @@ gsyncReadUniversalFrameCount_P2060
// P2060 refreshrate is in 0.00001 Hz, so divide by 10000 to get Hz.
// divide 1000000 by refreshRate to get the frame time in us.
//
pThis->FrameCountData.frameTime = 1000000 / (pThis->RefreshRate/10000); //in us
NV_CHECK_OR_RETURN(LEVEL_INFO, pThis->RefreshRate >= 10, NV_ERR_INVALID_STATE);
pThis->FrameCountData.frameTime = 1000*1000*1000 / (pThis->RefreshRate/10); //in us
//
// Enable FrameCountTimerService to verify FrameCountData.initialDifference.

View File

@@ -358,6 +358,7 @@ kgraphicsStateLoad_IMPL
NvU32 flags
)
{
if (pGpu->fecsCtxswLogConsumerCount > 0)
{
fecsBufferMap(pGpu, pKernelGraphics);

View File

@@ -231,6 +231,11 @@ memdescCreate
allocSize = Size;
if (allocSize == 0)
{
return NV_ERR_INVALID_ARGUMENT;
}
//
// this memdesc may have gotten forced to sysmem if no carveout,
// but for VPR it needs to be in vidmem, so check and re-direct here,
@@ -301,16 +306,7 @@ memdescCreate
// (4k >> 12 = 1). This modification helps us to avoid overflow of variable
// allocSize, in case caller of this function passes highest value of NvU64.
//
// If allocSize is passed as 0, PageCount should be returned as 0.
//
if (allocSize == 0)
{
PageCount = 0;
}
else
{
PageCount = ((allocSize - 1) >> RM_PAGE_SHIFT) + 1;
}
PageCount = ((allocSize - 1) >> RM_PAGE_SHIFT) + 1;
if (PhysicallyContiguous)
{

View File

@@ -58,6 +58,11 @@ NV_STATUS stdmemValidateParams
return NV_ERR_INVALID_ARGUMENT;
}
if (pAllocData->size == 0)
{
return NV_ERR_INVALID_ARGUMENT;
}
//
// These flags don't do anything in this path. No mapping on alloc and
// kernel map is controlled by TYPE

View File

@@ -382,9 +382,9 @@ NvU32 vgpuDevReadReg032(
OBJSYS *pSys = SYS_GET_INSTANCE();
OBJHYPERVISOR *pHypervisor = SYS_GET_HYPERVISOR(pSys);
if(!pGpu ||
!pHypervisor || !pHypervisor->bDetected || !pHypervisor->bIsHVMGuest ||
!GPU_GET_KERNEL_BIF(pGpu))
if (!pGpu || !GPU_GET_KERNEL_BIF(pGpu) ||
(!IS_VIRTUAL(pGpu) && !(pHypervisor && pHypervisor->bDetected && pHypervisor->bIsHVMGuest)))
{
*vgpuHandled = NV_FALSE;
return 0;

View File

@@ -3179,7 +3179,7 @@ cliresCtrlCmdNvdGetNvlogBufferInfo_IMPL
}
pBuffer = NvLogLogger.pBuffers[hBuffer];
NV_ASSERT_OR_RETURN(pBuffer != NULL, NV_ERR_OBJECT_NOT_FOUND);
NV_ASSERT_OR_ELSE(pBuffer != NULL, status = NV_ERR_OBJECT_NOT_FOUND; goto done);
NvBool bPause = pParams->flags & DRF_DEF(0000, _CTRL_NVD_NVLOG_BUFFER_INFO_FLAGS, _PAUSE, _YES);
nvlogPauseLoggingToBuffer(hBuffer, bPause);

View File

@@ -6289,7 +6289,7 @@ static NV_STATUS dupMemory(struct gpuDevice *device,
{
NV_STATUS status = NV_OK;
nvGpuOpsLockSet acquiredLocks;
THREAD_STATE_NODE threadState;
THREAD_STATE_NODE *pThreadState;
NvHandle dupedMemHandle;
Memory *pMemory = NULL;
PMEMORY_DESCRIPTOR pMemDesc = NULL;
@@ -6310,14 +6310,15 @@ static NV_STATUS dupMemory(struct gpuDevice *device,
NV_ASSERT((flags == NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE) || (flags == NV04_DUP_HANDLE_FLAGS_NONE));
threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
pThreadState = threadStateAlloc(THREAD_STATE_FLAGS_NONE);
if (!pThreadState)
return NV_ERR_NO_MEMORY;
// RS-TODO use dual client locking
status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, device->session->handle,
&pSessionClient, &acquiredLocks);
if (status != NV_OK)
{
threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
threadStateFree(pThreadState, THREAD_STATE_FLAGS_NONE);
return status;
}
@@ -6359,15 +6360,23 @@ static NV_STATUS dupMemory(struct gpuDevice *device,
}
// For SYSMEM or indirect peer mappings
bIsIndirectPeer = gpumgrCheckIndirectPeer(pMappingGpu, pAdjustedMemDesc->pGpu);
// Deviceless memory (NV01_MEMORY_DEVICELESS) can have a NULL pGpu. Perform targeted
// null checks before IOMMU operations that require valid GPU contexts.
bIsIndirectPeer = (pAdjustedMemDesc->pGpu != NULL) ?
gpumgrCheckIndirectPeer(pMappingGpu, pAdjustedMemDesc->pGpu) : NV_FALSE;
if (bIsIndirectPeer ||
memdescIsSysmem(pAdjustedMemDesc))
{
if (NV_UNLIKELY(pAdjustedMemDesc->pGpu == NULL))
{
status = NV_ERR_INVALID_STATE;
goto freeGpaMemdesc;
}
// For sysmem allocations, the dup done below is very shallow and in
// particular doesn't create IOMMU mappings required for the mapped GPU
// to access the memory. That's a problem if the mapped GPU is different
// from the GPU that the allocation was created under. Add them
// explicitly here and remove them when the memory is freed in n
// explicitly here and remove them when the memory is freed in
// nvGpuOpsFreeDupedHandle(). Notably memdescMapIommu() refcounts the
// mappings so it's ok to call it if the mappings are already there.
//
@@ -6436,7 +6445,7 @@ freeGpaMemdesc:
done:
_nvGpuOpsLocksRelease(&acquiredLocks);
threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
threadStateFree(pThreadState, THREAD_STATE_FLAGS_NONE);
return status;
}

View File

@@ -1,4 +1,4 @@
NVIDIA_VERSION = 535.261.03
NVIDIA_VERSION = 535.288.01
# This file.
VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))