From 66ab8e8596b58673a18d294c1b1e165fe2d55dda Mon Sep 17 00:00:00 2001 From: Maneet Singh Date: Tue, 30 Sep 2025 12:40:20 -0700 Subject: [PATCH] 535.274.02 --- README.md | 8 +- kernel-open/Kbuild | 2 +- kernel-open/conftest.sh | 37 ++++++ kernel-open/nvidia-drm/nvidia-drm-drv.c | 16 ++- kernel-open/nvidia-drm/nvidia-drm-fb.c | 6 + kernel-open/nvidia-drm/nvidia-drm-fb.h | 3 + kernel-open/nvidia-drm/nvidia-drm-modeset.c | 7 ++ kernel-open/nvidia-drm/nvidia-drm.Kbuild | 1 + kernel-open/nvidia-uvm/uvm_va_block.c | 5 + kernel-open/nvidia/nv-frontend.c | 7 +- kernel-open/nvidia/nv.c | 67 +++++++---- src/common/inc/nvBldVer.h | 20 ++-- src/common/inc/nvUnixVersion.h | 2 +- .../uproc/os/libos-v3.1.0/lib/liblogdecode.c | 64 +++++----- src/nvidia/generated/g_intr_nvoc.h | 10 +- src/nvidia/inc/kernel/core/thread_state.h | 2 + src/nvidia/src/kernel/core/thread_state.c | 110 ++++++++++++++++++ src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c | 16 +-- src/nvidia/src/kernel/mem_mgr/standard_mem.c | 5 + src/nvidia/src/kernel/rmapi/client_resource.c | 2 +- src/nvidia/src/kernel/rmapi/nv_gpu_ops.c | 23 ++-- version.mk | 2 +- 22 files changed, 318 insertions(+), 97 deletions(-) diff --git a/README.md b/README.md index 0f72e1044..2df7e707f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # NVIDIA Linux Open GPU Kernel Module Source This is the source release of the NVIDIA Linux open GPU kernel modules, -version 535.261.03. +version 535.274.02. ## How to Build @@ -17,7 +17,7 @@ as root: Note that the kernel modules built here must be used with GSP firmware and user-space NVIDIA GPU driver components from a corresponding -535.261.03 driver release. This can be achieved by installing +535.274.02 driver release. This can be achieved by installing the NVIDIA GPU driver from the .run file using the `--no-kernel-modules` option. E.g., @@ -180,7 +180,7 @@ software applications. ## Compatible GPUs The open-gpu-kernel-modules can be used on any Turing or later GPU -(see the table below). However, in the 535.261.03 release, +(see the table below). However, in the 535.274.02 release, GeForce and Workstation support is still considered alpha-quality. To enable use of the open kernel modules on GeForce and Workstation GPUs, @@ -188,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module parameter to 1. For more details, see the NVIDIA GPU driver end user README here: -https://us.download.nvidia.com/XFree86/Linux-x86_64/535.261.03/README/kernel_open.html +https://us.download.nvidia.com/XFree86/Linux-x86_64/535.274.02/README/kernel_open.html In the below table, if three IDs are listed, the first is the PCI Device ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI diff --git a/kernel-open/Kbuild b/kernel-open/Kbuild index 7e2ca0a20..70ef87192 100644 --- a/kernel-open/Kbuild +++ b/kernel-open/Kbuild @@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc ccflags-y += -I$(src) ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args ccflags-y += -D__KERNEL__ -DMODULE -DNVRM -ccflags-y += -DNV_VERSION_STRING=\"535.261.03\" +ccflags-y += -DNV_VERSION_STRING=\"535.274.02\" ifneq ($(SYSSRCHOST1X),) ccflags-y += -I$(SYSSRCHOST1X) diff --git a/kernel-open/conftest.sh b/kernel-open/conftest.sh index 79baf1855..6136cb57a 100755 --- a/kernel-open/conftest.sh +++ b/kernel-open/conftest.sh @@ -4041,6 +4041,43 @@ compile_test() { fi ;; + drm_fb_create_takes_format_info) + # + # Determine if a `struct drm_format_info *` is passed into + # the .fb_create callback. If so, it will have 4 arguments. + # This parameter was added in commit 81112eaac559 ("drm: + # Pass the format info to .fb_create") in linux-next + # (2025-07-16) + CODE=" + #include + #include + + static const struct drm_mode_config_funcs funcs; + void conftest_drm_fb_create_takes_format_info(void) { + funcs.fb_create(NULL, NULL, NULL, NULL); + }" + + compile_check_conftest "$CODE" "NV_DRM_FB_CREATE_TAKES_FORMAT_INFO" "" "types" + ;; + + drm_fill_fb_struct_takes_format_info) + # + # Determine if a `struct drm_format_info *` is passed into + # drm_helper_mode_fill_fb_struct(). If so, it will have 4 arguments. + # This parameter was added in commit a34cc7bf1034 ("drm: + # Allow the caller to pass in the format info to + # drm_helper_mode_fill_fb_struct()") in linux-next + # (2025-07-16) + CODE=" + #include + + void conftest_drm_fill_fb_struct_takes_format_info(void) { + drm_helper_mode_fill_fb_struct(NULL, NULL, NULL, NULL); + }" + + compile_check_conftest "$CODE" "NV_DRM_FILL_FB_STRUCT_TAKES_FORMAT_INFO" "" "types" + ;; + drm_connector_funcs_have_mode_in_name) # # Determine if _mode_ is present in connector function names. We diff --git a/kernel-open/nvidia-drm/nvidia-drm-drv.c b/kernel-open/nvidia-drm/nvidia-drm-drv.c index 0f0f1286f..b1835c226 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-drv.c +++ b/kernel-open/nvidia-drm/nvidia-drm-drv.c @@ -154,11 +154,14 @@ static void nv_drm_output_poll_changed(struct drm_device *dev) static struct drm_framebuffer *nv_drm_framebuffer_create( struct drm_device *dev, struct drm_file *file, - #if defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG) +#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO) + const struct drm_format_info *info, +#endif +#if defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG) const struct drm_mode_fb_cmd2 *cmd - #else +#else struct drm_mode_fb_cmd2 *cmd - #endif +#endif ) { struct drm_mode_fb_cmd2 local_cmd; @@ -169,11 +172,14 @@ static struct drm_framebuffer *nv_drm_framebuffer_create( fb = nv_drm_internal_framebuffer_create( dev, file, +#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO) + info, +#endif &local_cmd); - #if !defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG) +#if !defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG) *cmd = local_cmd; - #endif +#endif return fb; } diff --git a/kernel-open/nvidia-drm/nvidia-drm-fb.c b/kernel-open/nvidia-drm/nvidia-drm-fb.c index 2747123ff..c0c2ad1ba 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-fb.c +++ b/kernel-open/nvidia-drm/nvidia-drm-fb.c @@ -206,6 +206,9 @@ fail: struct drm_framebuffer *nv_drm_internal_framebuffer_create( struct drm_device *dev, struct drm_file *file, +#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO) + const struct drm_format_info *info, +#endif struct drm_mode_fb_cmd2 *cmd) { struct nv_drm_device *nv_dev = to_nv_device(dev); @@ -259,6 +262,9 @@ struct drm_framebuffer *nv_drm_internal_framebuffer_create( dev, #endif &nv_fb->base, +#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO) + info, +#endif cmd); /* diff --git a/kernel-open/nvidia-drm/nvidia-drm-fb.h b/kernel-open/nvidia-drm/nvidia-drm-fb.h index cf477cc73..c853559cf 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-fb.h +++ b/kernel-open/nvidia-drm/nvidia-drm-fb.h @@ -59,6 +59,9 @@ static inline struct nv_drm_framebuffer *to_nv_framebuffer( struct drm_framebuffer *nv_drm_internal_framebuffer_create( struct drm_device *dev, struct drm_file *file, +#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO) + const struct drm_format_info *info, +#endif struct drm_mode_fb_cmd2 *cmd); #endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */ diff --git a/kernel-open/nvidia-drm/nvidia-drm-modeset.c b/kernel-open/nvidia-drm/nvidia-drm-modeset.c index 4b601b085..67d077490 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-modeset.c +++ b/kernel-open/nvidia-drm/nvidia-drm-modeset.c @@ -451,6 +451,13 @@ int nv_drm_atomic_commit(struct drm_device *dev, #else drm_atomic_helper_swap_state(dev, state); #endif + /* + * Used to update legacy modeset state pointers to support UAPIs not updated + * by the core atomic modeset infrastructure. + * + * Example: /sys/class/drm//enabled + */ + drm_atomic_helper_update_legacy_modeset_state(dev, state); /* * nv_drm_atomic_commit_internal() must not return failure after diff --git a/kernel-open/nvidia-drm/nvidia-drm.Kbuild b/kernel-open/nvidia-drm/nvidia-drm.Kbuild index c9b6409b3..ccfaf97d7 100644 --- a/kernel-open/nvidia-drm/nvidia-drm.Kbuild +++ b/kernel-open/nvidia-drm/nvidia-drm.Kbuild @@ -139,3 +139,4 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_helper_funcs_mode_valid_has_const_mode_arg +NV_CONFTEST_TYPE_COMPILE_TESTS += drm_fb_create_takes_format_info diff --git a/kernel-open/nvidia-uvm/uvm_va_block.c b/kernel-open/nvidia-uvm/uvm_va_block.c index 9d7d1913c..23c1e8eb1 100644 --- a/kernel-open/nvidia-uvm/uvm_va_block.c +++ b/kernel-open/nvidia-uvm/uvm_va_block.c @@ -11519,6 +11519,11 @@ NV_STATUS uvm_va_block_evict_chunks(uvm_va_block_t *va_block, return NV_ERR_NO_MEMORY; } + if (uvm_va_block_is_hmm(va_block)) { + memset(block_context->hmm.src_pfns, 0, sizeof(block_context->hmm.src_pfns)); + memset(block_context->hmm.dst_pfns, 0, sizeof(block_context->hmm.dst_pfns)); + } + pages_to_evict = &block_context->caller_page_mask; uvm_page_mask_zero(pages_to_evict); chunk_region.outer = 0; diff --git a/kernel-open/nvidia/nv-frontend.c b/kernel-open/nvidia/nv-frontend.c index f5b871e6c..9515dd795 100644 --- a/kernel-open/nvidia/nv-frontend.c +++ b/kernel-open/nvidia/nv-frontend.c @@ -42,9 +42,12 @@ MODULE_ALIAS_CHARDEV_MAJOR(NV_MAJOR_DEVICE_NUMBER); * DMA_BUF namespace is added by commit id 16b0314aa746 * ("dma-buf: move dma-buf symbols into the DMA_BUF module namespace") in 5.16 */ +#if defined(NV_MODULE_IMPORT_NS_TAKES_CONSTANT) MODULE_IMPORT_NS(DMA_BUF); - -#endif +#else +MODULE_IMPORT_NS("DMA_BUF"); +#endif // defined(NV_MODULE_IMPORT_NS_TAKES_CONSTANT) +#endif // defined(MODULE_IMPORT_NS) static NvU32 nv_num_instances; diff --git a/kernel-open/nvidia/nv.c b/kernel-open/nvidia/nv.c index 2ec1e0801..a0b0bb2ae 100644 --- a/kernel-open/nvidia/nv.c +++ b/kernel-open/nvidia/nv.c @@ -1283,16 +1283,16 @@ static int nv_start_device(nv_state_t *nv, nvidia_stack_t *sp) { rc = os_alloc_mutex(&nvl->isr_bh_unlocked_mutex); if (rc != 0) - goto failed; + goto failed_release_irq; nv_kthread_q_item_init(&nvl->bottom_half_q_item, nvidia_isr_bh_unlocked, (void *)nv); rc = nv_kthread_q_init(&nvl->bottom_half_q, nv_device_name); if (rc != 0) - goto failed; + goto failed_release_irq; kthread_init = NV_TRUE; rc = nv_kthread_q_init(&nvl->queue.nvk, "nv_queue"); if (rc) - goto failed; + goto failed_release_irq; nv->queue = &nvl->queue; if (nv_platform_use_auto_online(nvl)) @@ -1300,33 +1300,18 @@ static int nv_start_device(nv_state_t *nv, nvidia_stack_t *sp) rc = nv_kthread_q_init(&nvl->remove_numa_memory_q, "nv_remove_numa_memory"); if (rc) - goto failed; + goto failed_release_irq; remove_numa_memory_kthread_init = NV_TRUE; } } if (!rm_init_adapter(sp, nv)) { - if (!(nv->flags & NV_FLAG_USES_MSIX) && - !(nv->flags & NV_FLAG_SOC_DISPLAY) && - !(nv->flags & NV_FLAG_SOC_IGPU)) - { - free_irq(nv->interrupt_line, (void *) nvl); - } - else if (nv->flags & NV_FLAG_SOC_DISPLAY) - { - } -#if defined(NV_LINUX_PCIE_MSI_SUPPORTED) - else - { - nv_free_msix_irq(nvl); - } -#endif NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "rm_init_adapter failed, device minor number %d\n", nvl->minor_num); rc = -EIO; - goto failed; + goto failed_release_irq; } { @@ -1360,6 +1345,26 @@ static int nv_start_device(nv_state_t *nv, nvidia_stack_t *sp) return 0; +failed_release_irq: + if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE)) + { + if (!(nv->flags & NV_FLAG_USES_MSIX) && + !(nv->flags & NV_FLAG_SOC_DISPLAY) && + !(nv->flags & NV_FLAG_SOC_IGPU)) + { + free_irq(nv->interrupt_line, (void *) nvl); + } + else if (nv->flags & NV_FLAG_SOC_DISPLAY) + { + } +#if defined(NV_LINUX_PCIE_MSI_SUPPORTED) + else + { + nv_free_msix_irq(nvl); + } +#endif + } + failed: #if defined(NV_LINUX_PCIE_MSI_SUPPORTED) if (nv->flags & NV_FLAG_USES_MSI) @@ -2141,22 +2146,34 @@ nvidia_ioctl( NV_CTL_DEVICE_ONLY(nv); - if (num_arg_gpus == 0 || nvlfp->num_attached_gpus != 0 || - arg_size % sizeof(NvU32) != 0) + if ((num_arg_gpus == 0) || (arg_size % sizeof(NvU32) != 0)) { status = -EINVAL; goto done; } + /* atomically check and alloc attached_gpus */ + down(&nvl->ldata_lock); + + if (nvlfp->num_attached_gpus != 0) + { + up(&nvl->ldata_lock); + status = -EINVAL; + goto done; + } + NV_KMALLOC(nvlfp->attached_gpus, arg_size); if (nvlfp->attached_gpus == NULL) { + up(&nvl->ldata_lock); status = -ENOMEM; goto done; } memcpy(nvlfp->attached_gpus, arg_copy, arg_size); nvlfp->num_attached_gpus = num_arg_gpus; + up(&nvl->ldata_lock); + for (i = 0; i < nvlfp->num_attached_gpus; i++) { if (nvlfp->attached_gpus[i] == 0) @@ -2171,9 +2188,15 @@ nvidia_ioctl( if (nvlfp->attached_gpus[i] != 0) nvidia_dev_put(nvlfp->attached_gpus[i], sp); } + + /* atomically free attached_gpus */ + down(&nvl->ldata_lock); + NV_KFREE(nvlfp->attached_gpus, arg_size); nvlfp->num_attached_gpus = 0; + up(&nvl->ldata_lock); + status = -EINVAL; break; } diff --git a/src/common/inc/nvBldVer.h b/src/common/inc/nvBldVer.h index 7df377443..dee30270f 100644 --- a/src/common/inc/nvBldVer.h +++ b/src/common/inc/nvBldVer.h @@ -36,25 +36,25 @@ // and then checked back in. You cannot make changes to these sections without // corresponding changes to the buildmeister script #ifndef NV_BUILD_BRANCH - #define NV_BUILD_BRANCH r539_41 + #define NV_BUILD_BRANCH r539_56 #endif #ifndef NV_PUBLIC_BRANCH - #define NV_PUBLIC_BRANCH r539_41 + #define NV_PUBLIC_BRANCH r539_56 #endif #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) -#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r539_41-927" -#define NV_BUILD_CHANGELIST_NUM (36124219) +#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r539_56-994" +#define NV_BUILD_CHANGELIST_NUM (36497304) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "rel/gpu_drv/r535/r539_41-927" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36124219) +#define NV_BUILD_NAME "rel/gpu_drv/r535/r539_56-994" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36497304) #else /* Windows builds */ -#define NV_BUILD_BRANCH_VERSION "r539_41-1" -#define NV_BUILD_CHANGELIST_NUM (36117060) +#define NV_BUILD_BRANCH_VERSION "r539_56-1" +#define NV_BUILD_CHANGELIST_NUM (36476729) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "539.42" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36117060) +#define NV_BUILD_NAME "539.57" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36476729) #define NV_BUILD_BRANCH_BASE_VERSION R535 #endif // End buildmeister python edited section diff --git a/src/common/inc/nvUnixVersion.h b/src/common/inc/nvUnixVersion.h index 2162ad3a6..befac152c 100644 --- a/src/common/inc/nvUnixVersion.h +++ b/src/common/inc/nvUnixVersion.h @@ -4,7 +4,7 @@ #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \ (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1) -#define NV_VERSION_STRING "535.261.03" +#define NV_VERSION_STRING "535.274.02" #else diff --git a/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c b/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c index 52535414c..6ab4d1da9 100644 --- a/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c +++ b/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c @@ -1253,25 +1253,25 @@ NvBool isLibosPreserveLogBufferFull(LIBOS_LOG_DECODE *pLogDecode, NvU32 gpuInsta { NvU64 i = (NvU32)(pLogDecode->numLogBuffers); NvU32 tag = LIBOS_LOG_NVLOG_BUFFER_TAG(pLogDecode->sourceName, i * 2); - NVLOG_BUFFER_HANDLE handle = 0; - NV_STATUS status = nvlogGetBufferHandleFromTag(tag, &handle); - if (status != NV_OK) + // + // Cannot use nvlogGetBufferHandleFromTag here since in multi GPU case, + // we can have multiple buffers with exact same tag, only differentiable + // from gpuInstance + // + for (i = 0; i < NVLOG_MAX_BUFFERS; i++) { - return NV_FALSE; - } - - NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[handle]; - if (pNvLogBuffer == NULL) - { - return NV_FALSE; - } - - if (FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) && - DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance && - (pNvLogBuffer->pos >= pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64))) - { - return NV_TRUE; + if (NvLogLogger.pBuffers[i] != NULL) + { + NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[i]; + if ((pNvLogBuffer->tag == tag) && + (DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance) && + FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) && + (pNvLogBuffer->pos >= pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64))) + { + return NV_TRUE; + } + } } return NV_FALSE; @@ -1279,19 +1279,27 @@ NvBool isLibosPreserveLogBufferFull(LIBOS_LOG_DECODE *pLogDecode, NvU32 gpuInsta static NvBool findPreservedNvlogBuffer(NvU32 tag, NvU32 gpuInstance, NVLOG_BUFFER_HANDLE *pHandle) { - NVLOG_BUFFER_HANDLE handle = 0; - NV_STATUS status = nvlogGetBufferHandleFromTag(tag, &handle); + NvU64 i; - if (status != NV_OK) - return NV_FALSE; - - NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[handle]; - if (FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) && - DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance && - (pNvLogBuffer->pos < pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64))) + // + // Cannot use nvlogGetBufferHandleFromTag here since in multi GPU case, + // we can have multiple buffers with exact same tag, only differentiable + // from gpuInstance + // + for (i = 0; i < NVLOG_MAX_BUFFERS; i++) { - *pHandle = handle; - return NV_TRUE; + if (NvLogLogger.pBuffers[i] != NULL) + { + NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[i]; + if ((pNvLogBuffer->tag == tag) && + (DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance) && + FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) && + (pNvLogBuffer->pos < pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64))) + { + *pHandle = i; + return NV_TRUE; + } + } } return NV_FALSE; diff --git a/src/nvidia/generated/g_intr_nvoc.h b/src/nvidia/generated/g_intr_nvoc.h index 6c49f4342..80679aacc 100644 --- a/src/nvidia/generated/g_intr_nvoc.h +++ b/src/nvidia/generated/g_intr_nvoc.h @@ -772,23 +772,23 @@ static inline NV_STATUS intrRestoreIntrRegValue(OBJGPU *pGpu, struct Intr *pIntr #define intrRestoreIntrRegValue_HAL(pGpu, pIntr, arg0, arg1, arg2) intrRestoreIntrRegValue(pGpu, pIntr, arg0, arg1, arg2) -static inline NV_STATUS intrTriggerCpuDoorbellForVF_46f6a7(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid) { +static inline NV_STATUS intrTriggerCpuDoorbellForVF_46f6a7(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid, NvBool bRearmIntr) { return NV_ERR_NOT_SUPPORTED; } -NV_STATUS intrTriggerCpuDoorbellForVF_TU102(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid); +NV_STATUS intrTriggerCpuDoorbellForVF_TU102(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid, NvBool bRearmIntr); #ifdef __nvoc_intr_h_disabled -static inline NV_STATUS intrTriggerCpuDoorbellForVF(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid) { +static inline NV_STATUS intrTriggerCpuDoorbellForVF(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid, NvBool bRearmIntr) { NV_ASSERT_FAILED_PRECOMP("Intr was disabled!"); return NV_ERR_NOT_SUPPORTED; } #else //__nvoc_intr_h_disabled -#define intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid) intrTriggerCpuDoorbellForVF_46f6a7(pGpu, pIntr, gfid) +#define intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid, bRearmIntr) intrTriggerCpuDoorbellForVF_46f6a7(pGpu, pIntr, gfid, bRearmIntr) #endif //__nvoc_intr_h_disabled -#define intrTriggerCpuDoorbellForVF_HAL(pGpu, pIntr, gfid) intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid) +#define intrTriggerCpuDoorbellForVF_HAL(pGpu, pIntr, gfid, bRearmIntr) intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid, bRearmIntr) NV_STATUS intrTriggerPrivDoorbell_TU102(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid); diff --git a/src/nvidia/inc/kernel/core/thread_state.h b/src/nvidia/inc/kernel/core/thread_state.h index 27d0880a9..4f28d277b 100644 --- a/src/nvidia/inc/kernel/core/thread_state.h +++ b/src/nvidia/inc/kernel/core/thread_state.h @@ -76,6 +76,7 @@ struct THREAD_STATE_NODE */ NvU32 threadSeqId; NvBool bValid; + NvBool bUsingHeap; THREAD_TIMEOUT_STATE timeout; NvU32 cpuNum; NvU32 flags; @@ -199,6 +200,7 @@ void threadStateFreeISRLockless(THREAD_STATE_NODE *, OBJGPU*, NvU32); void threadStateInitISRAndDeferredIntHandler(THREAD_STATE_NODE *, OBJGPU*, NvU32); void threadStateFreeISRAndDeferredIntHandler(THREAD_STATE_NODE *, OBJGPU*, NvU32); void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags); +THREAD_STATE_NODE* threadStateAlloc(NvU32 flags); void threadStateFree(THREAD_STATE_NODE *pThreadNode, NvU32 flags); NV_STATUS threadStateGetCurrent(THREAD_STATE_NODE **ppThreadNode, OBJGPU *pGpu); diff --git a/src/nvidia/src/kernel/core/thread_state.c b/src/nvidia/src/kernel/core/thread_state.c index fb7b37829..2250c4e46 100644 --- a/src/nvidia/src/kernel/core/thread_state.c +++ b/src/nvidia/src/kernel/core/thread_state.c @@ -601,6 +601,110 @@ void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags) } } +/** + * + * @brief Allocate a heap-based threadState + * @param[in] flags Thread state flags + * + * @return Heap-allocated THREAD_STATE_NODE* on success, NULL on failure + */ +THREAD_STATE_NODE* threadStateAlloc(NvU32 flags) +{ + THREAD_STATE_NODE *pHeapNode; + NV_STATUS rmStatus; + NvU64 funcAddr; + + // Isrs should be using threadStateIsrInit(). + NV_ASSERT((flags & (THREAD_STATE_FLAGS_IS_ISR_LOCKLESS | + THREAD_STATE_FLAGS_IS_ISR | + THREAD_STATE_FLAGS_DEFERRED_INT_HANDLER_RUNNING)) == 0); + + // Check to see if ThreadState is enabled + if (!(threadStateDatabase.setupFlags & THREAD_STATE_SETUP_FLAGS_ENABLED)) + return NULL; + + // Allocate heap node directly + pHeapNode = portMemAllocNonPaged(sizeof(THREAD_STATE_NODE)); + if (pHeapNode == NULL) + return NULL; + + portMemSet(pHeapNode, 0, sizeof(*pHeapNode)); + pHeapNode->threadSeqId = portAtomicIncrementU32(&threadStateDatabase.threadSeqCntr); + pHeapNode->cpuNum = osGetCurrentProcessorNumber(); + pHeapNode->bUsingHeap = NV_TRUE; + pHeapNode->flags = flags; + + // + // The thread state free callbacks are only supported in the non-ISR paths + // as they invoke memory allocation routines. + // + listInit(&pHeapNode->cbList, portMemAllocatorGetGlobalNonPaged()); + pHeapNode->flags |= THREAD_STATE_FLAGS_STATE_FREE_CB_ENABLED; + + rmStatus = _threadNodeInitTime(pHeapNode); + if (rmStatus == NV_OK) + pHeapNode->flags |= THREAD_STATE_FLAGS_TIMEOUT_INITED; + + rmStatus = osGetCurrentThread(&pHeapNode->threadId); + if (rmStatus != NV_OK) + goto cleanup_heap; + + NV_ASSERT_OR_GOTO(pHeapNode->cpuNum < threadStateDatabase.maxCPUs, cleanup_heap); + + funcAddr = (NvU64) (NV_RETURN_ADDRESS()); + + portSyncSpinlockAcquire(threadStateDatabase.spinlock); + if (!mapInsertExisting(&threadStateDatabase.dbRoot, (NvU64)pHeapNode->threadId, pHeapNode)) + { + rmStatus = NV_ERR_OBJECT_NOT_FOUND; + // Place in the Preempted List if threadId is already present in the API list + if (mapInsertExisting(&threadStateDatabase.dbRootPreempted, (NvU64)pHeapNode->threadId, pHeapNode)) + { + pHeapNode->flags |= THREAD_STATE_FLAGS_PLACED_ON_PREEMPT_LIST; + pHeapNode->bValid = NV_TRUE; + rmStatus = NV_OK; + } + else + { + // Reset the threadId as insertion failed on both maps. bValid is already NV_FALSE + pHeapNode->threadId = 0; + portSyncSpinlockRelease(threadStateDatabase.spinlock); + goto cleanup_heap; + } + } + else + { + pHeapNode->bValid = NV_TRUE; + rmStatus = NV_OK; + } + + _threadStateLogInitCaller(pHeapNode, funcAddr); + + portSyncSpinlockRelease(threadStateDatabase.spinlock); + + _threadStatePrintInfo(pHeapNode); + + NV_ASSERT(rmStatus == NV_OK); + threadPriorityStateAlloc(); + + if (TLS_MIRROR_THREADSTATE) + { + THREAD_STATE_NODE **pTls = (THREAD_STATE_NODE **)tlsEntryAcquire(TLS_ENTRY_ID_THREADSTATE); + NV_ASSERT_OR_GOTO(pTls != NULL, cleanup_heap); + if (*pTls != NULL) + { + NV_PRINTF(LEVEL_WARNING, + "TLS: Nested threadState inits detected. Previous threadState node is %p, new is %p\n", + *pTls, pHeapNode); + } + *pTls = pHeapNode; + } + return pHeapNode; + +cleanup_heap: + portMemFree(pHeapNode); + return NULL; +} /** * @brief Initialize a threadState for locked ISR and Bottom-half * @@ -863,6 +967,12 @@ void threadStateFree(THREAD_STATE_NODE *pThreadNode, NvU32 flags) r); } } + + // Free heap memory if this node was heap-allocated + if (pThreadNode->bUsingHeap) + { + portMemFree(pThreadNode); + } } /** diff --git a/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c b/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c index 5b29ac2c9..92cfc9c5b 100644 --- a/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c +++ b/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c @@ -231,6 +231,11 @@ memdescCreate allocSize = Size; + if (allocSize == 0) + { + return NV_ERR_INVALID_ARGUMENT; + } + // // this memdesc may have gotten forced to sysmem if no carveout, // but for VPR it needs to be in vidmem, so check and re-direct here, @@ -301,16 +306,7 @@ memdescCreate // (4k >> 12 = 1). This modification helps us to avoid overflow of variable // allocSize, in case caller of this function passes highest value of NvU64. // - // If allocSize is passed as 0, PageCount should be returned as 0. - // - if (allocSize == 0) - { - PageCount = 0; - } - else - { - PageCount = ((allocSize - 1) >> RM_PAGE_SHIFT) + 1; - } + PageCount = ((allocSize - 1) >> RM_PAGE_SHIFT) + 1; if (PhysicallyContiguous) { diff --git a/src/nvidia/src/kernel/mem_mgr/standard_mem.c b/src/nvidia/src/kernel/mem_mgr/standard_mem.c index cca4241c9..ee19180be 100644 --- a/src/nvidia/src/kernel/mem_mgr/standard_mem.c +++ b/src/nvidia/src/kernel/mem_mgr/standard_mem.c @@ -58,6 +58,11 @@ NV_STATUS stdmemValidateParams return NV_ERR_INVALID_ARGUMENT; } + if (pAllocData->size == 0) + { + return NV_ERR_INVALID_ARGUMENT; + } + // // These flags don't do anything in this path. No mapping on alloc and // kernel map is controlled by TYPE diff --git a/src/nvidia/src/kernel/rmapi/client_resource.c b/src/nvidia/src/kernel/rmapi/client_resource.c index 580ecae3b..35dbe1634 100644 --- a/src/nvidia/src/kernel/rmapi/client_resource.c +++ b/src/nvidia/src/kernel/rmapi/client_resource.c @@ -3179,7 +3179,7 @@ cliresCtrlCmdNvdGetNvlogBufferInfo_IMPL } pBuffer = NvLogLogger.pBuffers[hBuffer]; - NV_ASSERT_OR_RETURN(pBuffer != NULL, NV_ERR_OBJECT_NOT_FOUND); + NV_ASSERT_OR_ELSE(pBuffer != NULL, status = NV_ERR_OBJECT_NOT_FOUND; goto done); NvBool bPause = pParams->flags & DRF_DEF(0000, _CTRL_NVD_NVLOG_BUFFER_INFO_FLAGS, _PAUSE, _YES); nvlogPauseLoggingToBuffer(hBuffer, bPause); diff --git a/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c b/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c index 092e734a5..e277581b5 100644 --- a/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c +++ b/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c @@ -6289,7 +6289,7 @@ static NV_STATUS dupMemory(struct gpuDevice *device, { NV_STATUS status = NV_OK; nvGpuOpsLockSet acquiredLocks; - THREAD_STATE_NODE threadState; + THREAD_STATE_NODE *pThreadState; NvHandle dupedMemHandle; Memory *pMemory = NULL; PMEMORY_DESCRIPTOR pMemDesc = NULL; @@ -6310,14 +6310,15 @@ static NV_STATUS dupMemory(struct gpuDevice *device, NV_ASSERT((flags == NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE) || (flags == NV04_DUP_HANDLE_FLAGS_NONE)); - threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); - + pThreadState = threadStateAlloc(THREAD_STATE_FLAGS_NONE); + if (!pThreadState) + return NV_ERR_NO_MEMORY; // RS-TODO use dual client locking status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, device->session->handle, &pSessionClient, &acquiredLocks); if (status != NV_OK) { - threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); + threadStateFree(pThreadState, THREAD_STATE_FLAGS_NONE); return status; } @@ -6359,15 +6360,23 @@ static NV_STATUS dupMemory(struct gpuDevice *device, } // For SYSMEM or indirect peer mappings - bIsIndirectPeer = gpumgrCheckIndirectPeer(pMappingGpu, pAdjustedMemDesc->pGpu); + // Deviceless memory (NV01_MEMORY_DEVICELESS) can have a NULL pGpu. Perform targeted + // null checks before IOMMU operations that require valid GPU contexts. + bIsIndirectPeer = (pAdjustedMemDesc->pGpu != NULL) ? + gpumgrCheckIndirectPeer(pMappingGpu, pAdjustedMemDesc->pGpu) : NV_FALSE; if (bIsIndirectPeer || memdescIsSysmem(pAdjustedMemDesc)) { + if (NV_UNLIKELY(pAdjustedMemDesc->pGpu == NULL)) + { + status = NV_ERR_INVALID_STATE; + goto freeGpaMemdesc; + } // For sysmem allocations, the dup done below is very shallow and in // particular doesn't create IOMMU mappings required for the mapped GPU // to access the memory. That's a problem if the mapped GPU is different // from the GPU that the allocation was created under. Add them - // explicitly here and remove them when the memory is freed in n + // explicitly here and remove them when the memory is freed in // nvGpuOpsFreeDupedHandle(). Notably memdescMapIommu() refcounts the // mappings so it's ok to call it if the mappings are already there. // @@ -6436,7 +6445,7 @@ freeGpaMemdesc: done: _nvGpuOpsLocksRelease(&acquiredLocks); - threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); + threadStateFree(pThreadState, THREAD_STATE_FLAGS_NONE); return status; } diff --git a/version.mk b/version.mk index d763f1763..ad62e8636 100644 --- a/version.mk +++ b/version.mk @@ -1,4 +1,4 @@ -NVIDIA_VERSION = 535.261.03 +NVIDIA_VERSION = 535.274.02 # This file. VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))