From c700e8f91cc6dea37d934f2bd43e8de8ee28da06 Mon Sep 17 00:00:00 2001 From: Andy Ritger Date: Tue, 22 Nov 2022 10:04:21 -0800 Subject: [PATCH] 515.86.01 --- CHANGELOG.md | 4 + README.md | 22 ++++- kernel-open/Kbuild | 2 +- kernel-open/common/inc/nv.h | 8 +- kernel-open/conftest.sh | 27 +++++- .../nvidia-drm/nvidia-drm-gem-nvkms-memory.c | 6 +- .../nvidia-drm/nvidia-drm-gem-user-memory.c | 6 +- kernel-open/nvidia-drm/nvidia-drm-linux.c | 10 +++ .../nvidia-modeset/nvidia-modeset-linux.c | 8 ++ .../nvidia-modeset/nvidia-modeset.Kbuild | 1 + kernel-open/nvidia/nv-dmabuf.c | 82 +++++++++++++++---- kernel-open/nvidia/nv-mmap.c | 40 ++++++--- kernel-open/nvidia/nv.c | 4 +- kernel-open/nvidia/nvlink_linux.c | 4 +- src/common/inc/nvBldVer.h | 20 ++--- src/common/inc/nvUnixVersion.h | 2 +- .../nvidia/inc/ctrl/ctrl2080/ctrl2080perf.h | 2 +- src/nvidia-modeset/src/nvkms-evo3.c | 72 ++++++++-------- src/nvidia/arch/nvalloc/common/inc/nvcst.h | 1 + src/nvidia/arch/nvalloc/unix/include/nv.h | 4 + src/nvidia/arch/nvalloc/unix/src/osapi.c | 22 ++++- src/nvidia/generated/g_nv_name_released.h | 14 ++++ src/nvidia/kernel/inc/vgpu/sdk-structures.h | 1 + .../src/kernel/diagnostics/nv_debug_dump.c | 34 +++++++- .../src/kernel/gpu/bus/kern_bus_vbar2.c | 5 ++ .../src/kernel/gpu/fifo/kernel_channel.c | 8 +- .../gpu/gr/kernel_sm_debugger_session_ctrl.c | 18 +++- src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c | 9 +- src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c | 5 +- .../gpu/mmu/arch/ampere/kern_gmmu_ga100.c | 8 +- .../nvlink/arch/ampere/kernel_nvlink_ga100.c | 9 +- src/nvidia/src/kernel/gpu/timer/timer.c | 4 +- src/nvidia/src/kernel/rmapi/mapping_cpu.c | 12 ++- version.mk | 2 +- 34 files changed, 368 insertions(+), 108 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2949b51e..cb67efe92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## Release 515 Entries +### [515.86.01] 2022-11-22 + ### [515.76] 2022-09-20 #### Fixed @@ -9,6 +11,8 @@ - Improved compatibility with new Linux kernel releases - Fixed possible excessive GPU power draw on an idle X11 or Wayland desktop when driving high resolutions or refresh rates +### [515.65.07] 2022-10-19 + ### [515.65.01] 2022-08-02 #### Fixed diff --git a/README.md b/README.md index e40cd4c41..5de0da1ed 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # NVIDIA Linux Open GPU Kernel Module Source This is the source release of the NVIDIA Linux open GPU kernel modules, -version 515.76. +version 515.86.01. ## How to Build @@ -17,7 +17,7 @@ as root: Note that the kernel modules built here must be used with gsp.bin firmware and user-space NVIDIA GPU driver components from a corresponding -515.76 driver release. This can be achieved by installing +515.86.01 driver release. This can be achieved by installing the NVIDIA GPU driver from the .run file using the `--no-kernel-modules` option. E.g., @@ -167,7 +167,7 @@ for the target kernel. ## Compatible GPUs The open-gpu-kernel-modules can be used on any Turing or later GPU -(see the table below). However, in the 515.76 release, +(see the table below). However, in the 515.86.01 release, GeForce and Workstation support is still considered alpha-quality. To enable use of the open kernel modules on GeForce and Workstation GPUs, @@ -175,7 +175,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module parameter to 1. For more details, see the NVIDIA GPU driver end user README here: -https://us.download.nvidia.com/XFree86/Linux-x86_64/515.76/README/kernel_open.html +https://us.download.nvidia.com/XFree86/Linux-x86_64/515.86.01/README/kernel_open.html In the below table, if three IDs are listed, the first is the PCI Device ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI @@ -652,6 +652,17 @@ Subsystem Device ID. | NVIDIA PG506-232 | 20B6 10DE 1492 | | NVIDIA A30 | 20B7 10DE 1532 | | NVIDIA A100-PCIE-40GB | 20F1 10DE 145F | +| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B | +| NVIDIA A800-SXM4-80GB | 20F3 10DE 179C | +| NVIDIA A800-SXM4-80GB | 20F3 10DE 179D | +| NVIDIA A800-SXM4-80GB | 20F3 10DE 179E | +| NVIDIA A800-SXM4-80GB | 20F3 10DE 179F | +| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A0 | +| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A1 | +| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A2 | +| NVIDIA A800 80GB PCIe | 20F5 10DE 1799 | +| NVIDIA A800 80GB PCIe LC | 20F5 10DE 179A | +| NVIDIA A800 40GB PCIe | 20F6 10DE 17A3 | | NVIDIA GeForce GTX 1660 Ti | 2182 | | NVIDIA GeForce GTX 1660 | 2184 | | NVIDIA GeForce GTX 1650 SUPER | 2187 | @@ -685,6 +696,7 @@ Subsystem Device ID. | NVIDIA GeForce RTX 3090 Ti | 2203 | | NVIDIA GeForce RTX 3090 | 2204 | | NVIDIA GeForce RTX 3080 | 2206 | +| NVIDIA GeForce RTX 3070 Ti | 2207 | | NVIDIA GeForce RTX 3080 Ti | 2208 | | NVIDIA GeForce RTX 3080 | 220A | | NVIDIA CMP 90HX | 220D | @@ -736,6 +748,7 @@ Subsystem Device ID. | NVIDIA RTX A3000 12GB Laptop GPU | 24B9 | | NVIDIA RTX A4500 Laptop GPU | 24BA | | NVIDIA RTX A3000 12GB Laptop GPU | 24BB | +| NVIDIA GeForce RTX 3060 Ti | 24C9 | | NVIDIA GeForce RTX 3080 Laptop GPU | 24DC | | NVIDIA GeForce RTX 3070 Laptop GPU | 24DD | | NVIDIA GeForce RTX 3070 Ti Laptop GPU | 24E0 | @@ -751,6 +764,7 @@ Subsystem Device ID. | NVIDIA RTX A2000 | 2531 103C 151D | | NVIDIA RTX A2000 | 2531 10DE 151D | | NVIDIA RTX A2000 | 2531 17AA 151D | +| NVIDIA GeForce RTX 3060 | 2544 | | NVIDIA GeForce RTX 3060 Laptop GPU | 2560 | | NVIDIA GeForce RTX 3050 Ti Laptop GPU | 2563 | | NVIDIA RTX A2000 12GB | 2571 1028 1611 | diff --git a/kernel-open/Kbuild b/kernel-open/Kbuild index 42c14d686..17f49f349 100644 --- a/kernel-open/Kbuild +++ b/kernel-open/Kbuild @@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc EXTRA_CFLAGS += -I$(src) EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM -EXTRA_CFLAGS += -DNV_VERSION_STRING=\"515.76\" +EXTRA_CFLAGS += -DNV_VERSION_STRING=\"515.86.01\" EXTRA_CFLAGS += -Wno-unused-function diff --git a/kernel-open/common/inc/nv.h b/kernel-open/common/inc/nv.h index 568dfdf13..6f8b2cfbd 100644 --- a/kernel-open/common/inc/nv.h +++ b/kernel-open/common/inc/nv.h @@ -628,7 +628,7 @@ static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) { return ((offset >= nv->regs->cpu_address) && - + ((offset + (length - 1)) >= offset) && ((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1)))); } @@ -637,7 +637,7 @@ static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) { return ((nv->fb) && (offset >= nv->fb->cpu_address) && - + ((offset + (length - 1)) >= offset) && ((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1)))); } @@ -647,7 +647,7 @@ static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) && (offset >= nv->ud.cpu_address) && - + ((offset + (length - 1)) >= offset) && ((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1)))); } @@ -658,7 +658,7 @@ static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) (nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) && (offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) && - + ((offset + (length - 1)) >= offset) && ((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + (nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))); diff --git a/kernel-open/conftest.sh b/kernel-open/conftest.sh index 5ec66bf37..87e78fe94 100755 --- a/kernel-open/conftest.sh +++ b/kernel-open/conftest.sh @@ -5268,7 +5268,7 @@ compile_test() { # Determine if 'num_registered_fb' variable is present. # # 'num_registered_fb' was removed by commit 5727dcfd8486 - # ("fbdev: Make registered_fb[] private to fbmem.c) for + # ("fbdev: Make registered_fb[] private to fbmem.c") for # v5.20 linux-next (2022-07-27). # CODE=" @@ -5280,6 +5280,31 @@ compile_test() { compile_check_conftest "$CODE" "NV_NUM_REGISTERED_FB_PRESENT" "" "types" ;; + acpi_video_backlight_use_native) + # + # Determine if acpi_video_backlight_use_native() function is present + # + # acpi_video_backlight_use_native was added by commit 2600bfa3df99 + # (ACPI: video: Add acpi_video_backlight_use_native() helper) for + # v6.0 (2022-08-17). Note: the include directive for + # in this conftest is necessary in order to support kernels between + # commit 0b9f7d93ca61 ("ACPI / i915: ignore firmware requests for + # backlight change") for v3.16 (2014-07-07) and commit 3bd6bce369f5 + # ("ACPI / video: Port to new backlight interface selection API") + # for v4.2 (2015-07-16). Kernels within this range use the 'bool' + # type and the related 'false' value in without first + # including the definitions of that type and value. + # + CODE=" + #include + #include + void conftest_acpi_video_backglight_use_native(void) { + acpi_video_backlight_use_native(0); + }" + + compile_check_conftest "$CODE" "NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE" "" "functions" + ;; + # When adding a new conftest entry, please use the correct format for # specifying the relevant upstream Linux kernel commit. # diff --git a/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c b/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c index 1d047f2b1..8d549ec84 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c +++ b/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c @@ -95,7 +95,11 @@ static vm_fault_t __nv_drm_gem_nvkms_handle_vma_fault( pfn >>= PAGE_SHIFT; pfn += page_offset; } else { - BUG_ON(page_offset > nv_nvkms_memory->pages_count); + + BUG_ON(page_offset >= nv_nvkms_memory->pages_count); + + + pfn = page_to_pfn(nv_nvkms_memory->pages[page_offset]); } diff --git a/kernel-open/nvidia-drm/nvidia-drm-gem-user-memory.c b/kernel-open/nvidia-drm/nvidia-drm-gem-user-memory.c index e554adc27..8824daab3 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-gem-user-memory.c +++ b/kernel-open/nvidia-drm/nvidia-drm-gem-user-memory.c @@ -112,7 +112,11 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault( page_offset = vmf->pgoff - drm_vma_node_start(&gem->vma_node); - BUG_ON(page_offset > nv_user_memory->pages_count); + + BUG_ON(page_offset >= nv_user_memory->pages_count); + + + ret = vm_insert_page(vma, address, nv_user_memory->pages[page_offset]); switch (ret) { diff --git a/kernel-open/nvidia-drm/nvidia-drm-linux.c b/kernel-open/nvidia-drm/nvidia-drm-linux.c index 6bdf40a44..ffd3387e9 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-linux.c +++ b/kernel-open/nvidia-drm/nvidia-drm-linux.c @@ -47,6 +47,16 @@ module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400); void *nv_drm_calloc(size_t nmemb, size_t size) { + + size_t total_size = nmemb * size; + // + // Check for overflow. + // + if ((nmemb != 0) && ((total_size / nmemb) != size)) + { + return NULL; + } + return kzalloc(nmemb * size, GFP_KERNEL); } diff --git a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c index 383af3de7..6f9771bc0 100644 --- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c +++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c @@ -35,6 +35,8 @@ #include #include +#include + #include "nvstatus.h" #include "nv-register-module.h" @@ -1060,6 +1062,12 @@ nvkms_register_backlight(NvU32 gpu_id, NvU32 display_id, void *drv_priv, struct nvkms_backlight_device *nvkms_bd = NULL; int i; +#if defined(NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE) + if (!acpi_video_backlight_use_native()) { + return NULL; + } +#endif + gpu_info = nvkms_alloc(NV_MAX_GPUS * sizeof(*gpu_info), NV_TRUE); if (gpu_info == NULL) { return NULL; diff --git a/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild b/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild index 0475f26cf..0d927978d 100644 --- a/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild +++ b/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild @@ -96,4 +96,5 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64 NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64 +NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_kthread_create_on_node diff --git a/kernel-open/nvidia/nv-dmabuf.c b/kernel-open/nvidia/nv-dmabuf.c index 84d3146a4..bbfcf7d42 100644 --- a/kernel-open/nvidia/nv-dmabuf.c +++ b/kernel-open/nvidia/nv-dmabuf.c @@ -26,6 +26,15 @@ #if defined(CONFIG_DMA_SHARED_BUFFER) + +// +// The Linux kernel's dma_length in struct scatterlist is unsigned int +// which limits the maximum sg length to 4GB - 1. +// To get around this limitation, the BAR1 scatterlist returned by RM +// is split into (4GB - PAGE_SIZE) sized chunks to build the sg_table. +// +#define NV_DMA_BUF_SG_MAX_LEN ((NvU32)(NVBIT64(32) - PAGE_SIZE)) + typedef struct nv_dma_buf_mem_handle { NvHandle h_memory; @@ -259,26 +268,36 @@ nv_dma_buf_unmap_unlocked( nv_dma_device_t *peer_dma_dev, nv_dma_buf_file_private_t *priv, struct sg_table *sgt, - NvU32 count + NvU32 mapped_handle_count ) { NV_STATUS status; NvU32 i; NvU64 dma_len; NvU64 dma_addr; - NvU64 bar1_va; NvBool bar1_unmap_needed; struct scatterlist *sg = NULL; bar1_unmap_needed = (priv->bar1_va_ref_count == 0); - for_each_sg(sgt->sgl, sg, count, i) + sg = sgt->sgl; + for (i = 0; i < mapped_handle_count; i++) { - dma_addr = sg_dma_address(sg); - dma_len = priv->handles[i].size; - bar1_va = priv->handles[i].bar1_va; + NvU64 handle_size = priv->handles[i].size; - WARN_ON(sg_dma_len(sg) != priv->handles[i].size); + dma_addr = sg_dma_address(sg); + dma_len = 0; + + // + // Seek ahead in the scatterlist until the handle size is covered. + // IOVA unmap can then be done all at once instead of doing it + // one sg at a time. + // + while(handle_size != dma_len) + { + dma_len += sg_dma_len(sg); + sg = sg_next(sg); + } nv_dma_unmap_peer(peer_dma_dev, (dma_len / os_page_size), dma_addr); @@ -309,7 +328,8 @@ nv_dma_buf_map( nv_dma_device_t peer_dma_dev = {{ 0 }}; NvBool bar1_map_needed; NvBool bar1_unmap_needed; - NvU32 count = 0; + NvU32 mapped_handle_count = 0; + NvU32 num_sg_entries = 0; NvU32 i = 0; int rc = 0; @@ -361,13 +381,23 @@ nv_dma_buf_map( } memset(sgt, 0, sizeof(struct sg_table)); + // + // Pre-calculate number of sg entries we need based on handle size. + // This is needed to allocate sg_table. + // + for (i = 0; i < priv->num_objects; i++) + { + NvU64 count = priv->handles[i].size + NV_DMA_BUF_SG_MAX_LEN - 1; + do_div(count, NV_DMA_BUF_SG_MAX_LEN); + num_sg_entries += count; + } // // RM currently returns contiguous BAR1, so we create as many - // sg entries as the number of handles being mapped. + // sg entries as num_sg_entries calculated above. // When RM can alloc discontiguous BAR1, this code will need to be revisited. // - rc = sg_alloc_table(sgt, priv->num_objects, GFP_KERNEL); + rc = sg_alloc_table(sgt, num_sg_entries, GFP_KERNEL); if (rc != 0) { goto free_sgt; @@ -377,7 +407,8 @@ nv_dma_buf_map( peer_dma_dev.addressable_range.limit = (NvU64)dev->dma_mask; bar1_map_needed = bar1_unmap_needed = (priv->bar1_va_ref_count == 0); - for_each_sg(sgt->sgl, sg, priv->num_objects, i) + sg = sgt->sgl; + for (i = 0; i < priv->num_objects; i++) { NvU64 dma_addr; NvU64 dma_len; @@ -395,9 +426,15 @@ nv_dma_buf_map( } } + mapped_handle_count++; + dma_addr = priv->handles[i].bar1_va; dma_len = priv->handles[i].size; + // + // IOVA map the full handle at once and then breakdown the range + // (dma_addr, dma_addr + dma_len) into smaller sg entries. + // status = nv_dma_map_peer(&peer_dma_dev, priv->nv->dma_dev, 0x1, (dma_len / os_page_size), &dma_addr); if (status != NV_OK) @@ -411,14 +448,23 @@ nv_dma_buf_map( priv->handles[i].bar1_va); } + mapped_handle_count--; + // Unmap remaining memory handles goto unmap_handles; } - sg_set_page(sg, NULL, dma_len, 0); - sg_dma_address(sg) = (dma_addr_t)dma_addr; - sg_dma_len(sg) = dma_len; - count++; + while(dma_len != 0) + { + NvU32 sg_len = NV_MIN(dma_len, NV_DMA_BUF_SG_MAX_LEN); + + sg_set_page(sg, NULL, sg_len, 0); + sg_dma_address(sg) = (dma_addr_t)dma_addr; + sg_dma_len(sg) = sg_len; + dma_addr += sg_len; + dma_len -= sg_len; + sg = sg_next(sg); + } } priv->bar1_va_ref_count++; @@ -434,7 +480,7 @@ nv_dma_buf_map( return sgt; unmap_handles: - nv_dma_buf_unmap_unlocked(sp, &peer_dma_dev, priv, sgt, count); + nv_dma_buf_unmap_unlocked(sp, &peer_dma_dev, priv, sgt, mapped_handle_count); sg_free_table(sgt); @@ -821,12 +867,12 @@ nv_dma_buf_reuse( } + if ((priv->total_objects < params->numObjects) || + (params->index > (priv->total_objects - params->numObjects))) - if (params->index > (priv->total_objects - params->numObjects)) { - status = NV_ERR_INVALID_ARGUMENT; goto unlock_priv; } diff --git a/kernel-open/nvidia/nv-mmap.c b/kernel-open/nvidia/nv-mmap.c index b62719cda..85f6fae71 100644 --- a/kernel-open/nvidia/nv-mmap.c +++ b/kernel-open/nvidia/nv-mmap.c @@ -133,10 +133,10 @@ nvidia_vma_access( pageOffset = (addr & ~PAGE_MASK); - - - - + if (length < 0) + { + return -EINVAL; + } if (!mmap_context->valid) @@ -217,8 +217,19 @@ static vm_fault_t nvidia_fault( NvU64 page; NvU64 num_pages = NV_VMA_SIZE(vma) >> PAGE_SHIFT; - NvU64 pfn_start = - (nvlfp->mmap_context.mmap_start >> PAGE_SHIFT) + vma->vm_pgoff; + + NvU64 pfn_start = (nvlfp->mmap_context.mmap_start >> PAGE_SHIFT); + + + + + + + if (vma->vm_pgoff != 0) + { + return VM_FAULT_SIGBUS; + } + // Mapping revocation is only supported for GPU mappings. if (NV_IS_CTL_DEVICE(nv)) @@ -490,6 +501,13 @@ int nvidia_mmap_helper( return -EINVAL; } + + if (vma->vm_pgoff != 0) + { + return -EINVAL; + } + + NV_PRINT_VMA(NV_DBG_MEMINFO, vma); status = nv_check_gpu_state(nv); @@ -517,11 +535,11 @@ int nvidia_mmap_helper( NvU64 access_len = mmap_context->access_size; - - - - - + // validate the size + if (NV_VMA_SIZE(vma) != mmap_length) + { + return -ENXIO; + } if (IS_REG_OFFSET(nv, access_start, access_len)) { diff --git a/kernel-open/nvidia/nv.c b/kernel-open/nvidia/nv.c index b8ce6d5a9..100b956c5 100644 --- a/kernel-open/nvidia/nv.c +++ b/kernel-open/nvidia/nv.c @@ -1468,8 +1468,8 @@ static int nv_open_device(nv_state_t *nv, nvidia_stack_t *sp) } - - + if (unlikely(NV_ATOMIC_READ(nvl->usage_count) >= NV_S32_MAX)) + return -EMFILE; if ( ! (nv->flags & NV_FLAG_OPEN)) diff --git a/kernel-open/nvidia/nvlink_linux.c b/kernel-open/nvidia/nvlink_linux.c index af8a048d0..e8fd25f2e 100644 --- a/kernel-open/nvidia/nvlink_linux.c +++ b/kernel-open/nvidia/nvlink_linux.c @@ -208,8 +208,8 @@ static int nvlink_fops_release(struct inode *inode, struct file *filp) nvlink_print(NVLINK_DBG_INFO, "nvlink driver close\n"); - - + if (private == NULL) + return -ENOMEM; mutex_lock(&nvlink_drvctx.lock); diff --git a/src/common/inc/nvBldVer.h b/src/common/inc/nvBldVer.h index 3db404ab5..534826937 100644 --- a/src/common/inc/nvBldVer.h +++ b/src/common/inc/nvBldVer.h @@ -36,25 +36,25 @@ // and then checked back in. You cannot make changes to these sections without // corresponding changes to the buildmeister script #ifndef NV_BUILD_BRANCH - #define NV_BUILD_BRANCH r515_00 + #define NV_BUILD_BRANCH r517_71 #endif #ifndef NV_PUBLIC_BRANCH - #define NV_PUBLIC_BRANCH r515_00 + #define NV_PUBLIC_BRANCH r517_71 #endif #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) -#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r515/r515_00-409" -#define NV_BUILD_CHANGELIST_NUM (31799928) +#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r515/r517_71-480" +#define NV_BUILD_CHANGELIST_NUM (31976733) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "rel/gpu_drv/r515/r515_00-409" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31799928) +#define NV_BUILD_NAME "rel/gpu_drv/r515/r517_71-480" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31976733) #else /* Windows builds */ -#define NV_BUILD_BRANCH_VERSION "r515_00-323" -#define NV_BUILD_CHANGELIST_NUM (31799928) +#define NV_BUILD_BRANCH_VERSION "r517_71-1" +#define NV_BUILD_CHANGELIST_NUM (31976733) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "517.40" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31799928) +#define NV_BUILD_NAME "517.72" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31976733) #define NV_BUILD_BRANCH_BASE_VERSION R515 #endif // End buildmeister python edited section diff --git a/src/common/inc/nvUnixVersion.h b/src/common/inc/nvUnixVersion.h index d4eba52cc..faa520fbb 100644 --- a/src/common/inc/nvUnixVersion.h +++ b/src/common/inc/nvUnixVersion.h @@ -4,7 +4,7 @@ #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \ (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1) -#define NV_VERSION_STRING "515.76" +#define NV_VERSION_STRING "515.86.01" #else diff --git a/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080perf.h b/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080perf.h index 0c5d77957..8a68b31a7 100644 --- a/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080perf.h +++ b/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080perf.h @@ -379,7 +379,7 @@ typedef NV2080_CTRL_GPUMON_SAMPLES NV2080_CTRL_PERF_GET_GPUMON_PERFMON_UTIL_SAMP /*! * Number of GPU monitoring sample in their respective buffers. */ -#define NV2080_CTRL_PERF_GPUMON_SAMPLE_COUNT_PERFMON_UTIL 100 +#define NV2080_CTRL_PERF_GPUMON_SAMPLE_COUNT_PERFMON_UTIL 72 #define NV2080_CTRL_PERF_GPUMON_PERFMON_UTIL_BUFFER_SIZE \ NV_SIZEOF32(NV2080_CTRL_PERF_GPUMON_PERFMON_UTIL_SAMPLE) * \ diff --git a/src/nvidia-modeset/src/nvkms-evo3.c b/src/nvidia-modeset/src/nvkms-evo3.c index 6cb390ef8..68a969504 100644 --- a/src/nvidia-modeset/src/nvkms-evo3.c +++ b/src/nvidia-modeset/src/nvkms-evo3.c @@ -5199,11 +5199,13 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head, const NVHwModeViewPortEvo *pViewPortMin, const NVHwModeViewPortEvo *pViewPort, const NVHwModeViewPortEvo *pViewPortMax, - NVEvoUpdateState *updateState) + NVEvoUpdateState *updateState, + NvU32 setWindowUsageBounds) { const NVEvoCapabilitiesPtr pEvoCaps = &pDevEvo->gpus[0].capabilities; NVEvoChannelPtr pChannel = pDevEvo->core; struct NvKmsScalingUsageBounds scalingUsageBounds = { }; + NvU32 win; /* These methods should only apply to a single pDpy */ nvAssert(pDevEvo->subDevMaskStackDepth > 0); @@ -5249,6 +5251,35 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head, DRF_NUM(C37D, _HEAD_SET_MAX_OUTPUT_SCALE_FACTOR, _VERTICAL, scalingUsageBounds.maxVDownscaleFactor)); + /* + * Program MAX_PIXELS_FETCHED_PER_LINE window usage bounds + * for each window that is attached to the head. + * + * Precomp will clip the post-scaled window to the input viewport, reverse-scale + * this cropped size back to the input surface domain, and isohub will fetch + * this cropped size. This function assumes that there's no window scaling yet, + * so the MAX_PIXELS_FETCHED_PER_LINE will be bounded by the input viewport + * width. SetScalingUsageBoundsOneWindow5() will take care of updating + * MAX_PIXELS_FETCHED_PER_LINE, if window scaling is enabled later. + * + * Program MAX_PIXELS_FETCHED_PER_LINE for each window that is attached to + * head. For Turing+, SetScalingUsageBoundsOneWindow5() will take care of + * programming window usage bounds only for the layers/windows in use. + */ + setWindowUsageBounds |= + DRF_NUM(C37D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE, + GetMaxPixelsFetchedPerLine(pViewPort->in.width, + NV_EVO_SCALE_FACTOR_1X)); + + for (win = 0; win < pDevEvo->numWindows; win++) { + if (head != pDevEvo->headForWindow[win]) { + continue; + } + + nvDmaSetStartEvoMethod(pChannel, NVC37D_WINDOW_SET_WINDOW_USAGE_BOUNDS(win), 1); + nvDmaSetEvoMethodData(pChannel, setWindowUsageBounds); + } + return scalingUsageBounds.vUpscalingAllowed; } @@ -5259,11 +5290,10 @@ static void EvoSetViewportInOutC3(NVDevEvoPtr pDevEvo, const int head, NVEvoUpdateState *updateState) { NVEvoChannelPtr pChannel = pDevEvo->core; - NvU32 win; - NvU32 setWindowUsageBounds = NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C3; NvBool verticalUpscalingAllowed = EvoSetViewportInOut3(pDevEvo, head, pViewPortMin, pViewPort, - pViewPortMax, updateState); + pViewPortMax, updateState, + NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C3); nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_HEAD_USAGE_BOUNDS(head), 1); @@ -5273,34 +5303,6 @@ static void EvoSetViewportInOutC3(NVDevEvoPtr pDevEvo, const int head, (verticalUpscalingAllowed ? DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _TRUE) : DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _FALSE))); - /* - * Program MAX_PIXELS_FETCHED_PER_LINE window usage bounds - * for each window that is attached to the head. - * - * Precomp will clip the post-scaled window to the input viewport, reverse-scale - * this cropped size back to the input surface domain, and isohub will fetch - * this cropped size. This function assumes that there's no window scaling yet, - * so the MAX_PIXELS_FETCHED_PER_LINE will be bounded by the input viewport - * width. SetScalingUsageBoundsOneWindow5() will take care of updating - * MAX_PIXELS_FETCHED_PER_LINE, if window scaling is enabled later. - * On Volta, Program for each window that is attached to head. For turing+, - * SetScalingUsageBoundsOneWindow5() will take care of programming window - * usage bounds only for the layers/windows in use. - */ - - setWindowUsageBounds |= - DRF_NUM(C37D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE, - GetMaxPixelsFetchedPerLine(pViewPort->in.width, - NV_EVO_SCALE_FACTOR_1X)); - - for (win = 0; win < pDevEvo->numWindows; win++) { - if (head != pDevEvo->headForWindow[win]) { - continue; - } - - nvDmaSetStartEvoMethod(pChannel, NVC37D_WINDOW_SET_WINDOW_USAGE_BOUNDS(win), 1); - nvDmaSetEvoMethodData(pChannel, setWindowUsageBounds); - } } static void EvoSetViewportInOutC5(NVDevEvoPtr pDevEvo, const int head, @@ -5310,9 +5312,13 @@ static void EvoSetViewportInOutC5(NVDevEvoPtr pDevEvo, const int head, NVEvoUpdateState *updateState) { NVEvoChannelPtr pChannel = pDevEvo->core; + NvU32 setWindowUsageBounds = + (NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C5 | + DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _INPUT_SCALER_TAPS, _TAPS_2) | + DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _UPSCALING_ALLOWED, _FALSE)); NvU32 verticalUpscalingAllowed = EvoSetViewportInOut3(pDevEvo, head, pViewPortMin, pViewPort, - pViewPortMax, updateState); + pViewPortMax, updateState, setWindowUsageBounds); nvDmaSetStartEvoMethod(pChannel, NVC57D_HEAD_SET_HEAD_USAGE_BOUNDS(head), 1); diff --git a/src/nvidia/arch/nvalloc/common/inc/nvcst.h b/src/nvidia/arch/nvalloc/common/inc/nvcst.h index 66a1c2643..a4e1dd4b7 100644 --- a/src/nvidia/arch/nvalloc/common/inc/nvcst.h +++ b/src/nvidia/arch/nvalloc/common/inc/nvcst.h @@ -153,6 +153,7 @@ CSINFO chipsetInfo[] = {PCI_VENDOR_ID_INTEL, 0xA14A, CS_INTEL_A145, "SkyLake C232", Intel_A145_setupFunc}, {PCI_VENDOR_ID_INTEL, 0xA14D, CS_INTEL_A145, "SkyLake-H", Intel_A145_setupFunc}, {PCI_VENDOR_ID_INTEL, 0xA244, CS_INTEL_A145, "SkyLake C620", Intel_A145_setupFunc}, + {PCI_VENDOR_ID_INTEL, 0xA1C8, CS_INTEL_A145, "SkyLake C620", Intel_A145_setupFunc}, {PCI_VENDOR_ID_INTEL, 0x8D47, CS_INTEL_8D47, "IntelX99", Intel_8D47_setupFunc}, {PCI_VENDOR_ID_INTEL, 0x8D44, CS_INTEL_8D47, "IntelC612", Intel_8D44_setupFunc}, {PCI_VENDOR_ID_INTEL, 0xA2C5, CS_INTEL_A2C5, "IntelZ270", Intel_A2C5_setupFunc}, diff --git a/src/nvidia/arch/nvalloc/unix/include/nv.h b/src/nvidia/arch/nvalloc/unix/include/nv.h index 96324c7a2..7ec52b80f 100644 --- a/src/nvidia/arch/nvalloc/unix/include/nv.h +++ b/src/nvidia/arch/nvalloc/unix/include/nv.h @@ -622,12 +622,14 @@ typedef enum static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) { return ((offset >= nv->regs->cpu_address) && + ((offset + (length - 1)) >= offset) && ((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1)))); } static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) { return ((nv->fb) && (offset >= nv->fb->cpu_address) && + ((offset + (length - 1)) >= offset) && ((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1)))); } @@ -635,6 +637,7 @@ static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) { return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) && (offset >= nv->ud.cpu_address) && + ((offset + (length - 1)) >= offset) && ((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1)))); } @@ -643,6 +646,7 @@ static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) && (nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) && (offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) && + ((offset + (length - 1)) >= offset) && ((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + (nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))); } diff --git a/src/nvidia/arch/nvalloc/unix/src/osapi.c b/src/nvidia/arch/nvalloc/unix/src/osapi.c index b832852d4..cde3c4c3e 100644 --- a/src/nvidia/arch/nvalloc/unix/src/osapi.c +++ b/src/nvidia/arch/nvalloc/unix/src/osapi.c @@ -775,7 +775,7 @@ static NV_STATUS RmAccessRegistry( // the passed-in ParmStrLength does not account for '\0' ParmStrLength++; - if (ParmStrLength > NVOS38_MAX_REGISTRY_STRING_LENGTH) + if ((ParmStrLength == 0) || (ParmStrLength > NVOS38_MAX_REGISTRY_STRING_LENGTH)) { RmStatus = NV_ERR_INVALID_STRING_LENGTH; goto done; @@ -788,6 +788,11 @@ static NV_STATUS RmAccessRegistry( RmStatus = NV_ERR_OPERATING_SYSTEM; goto done; } + if (tmpParmStr[ParmStrLength - 1] != '\0') + { + RmStatus = NV_ERR_INVALID_ARGUMENT; + goto done; + } } if ((AccessType == NVOS38_ACCESS_TYPE_READ_BINARY) || @@ -2085,9 +2090,20 @@ static NV_STATUS RmGetAllocPrivate( if (rmStatus != NV_OK) goto done; - endingOffset = pageOffset + length; + if (!portSafeAddU64(pageOffset, length, &endingOffset)) + { + rmStatus = NV_ERR_INVALID_ARGUMENT; + goto done; + } + pageCount = (endingOffset / os_page_size); - pageCount += (*pPageIndex + ((endingOffset % os_page_size) ? 1 : 0)); + + if (!portSafeAddU64(*pPageIndex + ((endingOffset % os_page_size) ? 1 : 0), + pageCount, &pageCount)) + { + rmStatus = NV_ERR_INVALID_ARGUMENT; + goto done; + } if (pageCount > NV_RM_PAGES_TO_OS_PAGES(pMemDesc->PageCount)) { diff --git a/src/nvidia/generated/g_nv_name_released.h b/src/nvidia/generated/g_nv_name_released.h index dc5ffa1c4..312f61242 100644 --- a/src/nvidia/generated/g_nv_name_released.h +++ b/src/nvidia/generated/g_nv_name_released.h @@ -815,6 +815,17 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x20B6, 0x1492, 0x10de, "NVIDIA PG506-232" }, { 0x20B7, 0x1532, 0x10de, "NVIDIA A30" }, { 0x20F1, 0x145f, 0x10de, "NVIDIA A100-PCIE-40GB" }, + { 0x20F3, 0x179b, 0x10de, "NVIDIA A800-SXM4-80GB" }, + { 0x20F3, 0x179c, 0x10de, "NVIDIA A800-SXM4-80GB" }, + { 0x20F3, 0x179d, 0x10de, "NVIDIA A800-SXM4-80GB" }, + { 0x20F3, 0x179e, 0x10de, "NVIDIA A800-SXM4-80GB" }, + { 0x20F3, 0x179f, 0x10de, "NVIDIA A800-SXM4-80GB" }, + { 0x20F3, 0x17a0, 0x10de, "NVIDIA A800-SXM4-80GB" }, + { 0x20F3, 0x17a1, 0x10de, "NVIDIA A800-SXM4-80GB" }, + { 0x20F3, 0x17a2, 0x10de, "NVIDIA A800-SXM4-80GB" }, + { 0x20F5, 0x1799, 0x10de, "NVIDIA A800 80GB PCIe" }, + { 0x20F5, 0x179a, 0x10de, "NVIDIA A800 80GB PCIe LC" }, + { 0x20F6, 0x17a3, 0x10de, "NVIDIA A800 40GB PCIe" }, { 0x2182, 0x0000, 0x0000, "NVIDIA GeForce GTX 1660 Ti" }, { 0x2184, 0x0000, 0x0000, "NVIDIA GeForce GTX 1660" }, { 0x2187, 0x0000, 0x0000, "NVIDIA GeForce GTX 1650 SUPER" }, @@ -849,6 +860,7 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2203, 0x0000, 0x0000, "NVIDIA GeForce RTX 3090 Ti" }, { 0x2204, 0x0000, 0x0000, "NVIDIA GeForce RTX 3090" }, { 0x2206, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080" }, + { 0x2207, 0x0000, 0x0000, "NVIDIA GeForce RTX 3070 Ti" }, { 0x2208, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080 Ti" }, { 0x220A, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080" }, { 0x220D, 0x0000, 0x0000, "NVIDIA CMP 90HX" }, @@ -900,6 +912,7 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x24B9, 0x0000, 0x0000, "NVIDIA RTX A3000 12GB Laptop GPU" }, { 0x24BA, 0x0000, 0x0000, "NVIDIA RTX A4500 Laptop GPU" }, { 0x24BB, 0x0000, 0x0000, "NVIDIA RTX A3000 12GB Laptop GPU" }, + { 0x24C9, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Ti" }, { 0x24DC, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080 Laptop GPU" }, { 0x24DD, 0x0000, 0x0000, "NVIDIA GeForce RTX 3070 Laptop GPU" }, { 0x24E0, 0x0000, 0x0000, "NVIDIA GeForce RTX 3070 Ti Laptop GPU" }, @@ -915,6 +928,7 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2531, 0x151d, 0x103c, "NVIDIA RTX A2000" }, { 0x2531, 0x151d, 0x10de, "NVIDIA RTX A2000" }, { 0x2531, 0x151d, 0x17aa, "NVIDIA RTX A2000" }, + { 0x2544, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060" }, { 0x2560, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Laptop GPU" }, { 0x2563, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 Ti Laptop GPU" }, { 0x2571, 0x1611, 0x1028, "NVIDIA RTX A2000 12GB" }, diff --git a/src/nvidia/kernel/inc/vgpu/sdk-structures.h b/src/nvidia/kernel/inc/vgpu/sdk-structures.h index d69982b8f..20daf3529 100644 --- a/src/nvidia/kernel/inc/vgpu/sdk-structures.h +++ b/src/nvidia/kernel/inc/vgpu/sdk-structures.h @@ -123,6 +123,7 @@ typedef struct vmiopd_SM_info { #define NV2080_CTRL_INTERNAL_MAX_TPC_PER_GPC_COUNT_v1C_03 10 #define NV2080_CTRL_INTERNAL_GR_MAX_GPC_v1C_03 12 #define NV2080_CTRL_MC_GET_STATIC_INTR_TABLE_MAX_v1E_09 32 +#define NV2080_CTRL_PERF_GPUMON_SAMPLE_COUNT_PERFMON_UTIL_v1F_0E 72 // Defined this intermediate RM-RPC structure for making RPC call from Guest as // we have the restriction of passing max 4kb of data to plugin and the diff --git a/src/nvidia/src/kernel/diagnostics/nv_debug_dump.c b/src/nvidia/src/kernel/diagnostics/nv_debug_dump.c index 83a830168..38e383aac 100644 --- a/src/nvidia/src/kernel/diagnostics/nv_debug_dump.c +++ b/src/nvidia/src/kernel/diagnostics/nv_debug_dump.c @@ -448,6 +448,7 @@ nvdDumpDebugBuffers_IMPL NvP64 pUmdBuffer = NvP64_NULL; NvP64 priv = NvP64_NULL; NvU32 bufSize = 0; + NvU8 *dataBuffer = NULL; status = prbEncNestedStart(pPrbEnc, NVDEBUG_NVDUMP_DCL_MSG); if (status != NV_OK) @@ -463,7 +464,20 @@ nvdDumpDebugBuffers_IMPL if (status != NV_OK) break; - status = prbAppendSubMsg(pPrbEnc, pCurrent->tag, NvP64_VALUE(pUmdBuffer), bufSize); + dataBuffer = (NvU8 *) portMemAllocStackOrHeap(bufSize); + if (dataBuffer == NULL) + { + status = NV_ERR_NO_MEMORY; + break; + } + + // Copy UmdBuffer to prevent data races + portMemCopy(dataBuffer, bufSize, pUmdBuffer, bufSize); + portAtomicMemoryFenceFull(); + + status = prbAppendSubMsg(pPrbEnc, pCurrent->tag, dataBuffer, bufSize); + + portMemFreeStackOrHeap(dataBuffer); // Unmap DebugBuffer address memdescUnmap(pCurrent->pMemDesc, NV_TRUE, // Kernel mapping? @@ -522,6 +536,24 @@ prbAppendSubMsg header = (NVDUMP_SUB_ALLOC_HEADER *)pCurrent; subAlloc = pCurrent + sizeof(NVDUMP_SUB_ALLOC_HEADER); + // Check for out-of-bounds buffer access + if (pCurrent < buffer || subAlloc > (buffer + size)) + { + status = NV_ERR_INVALID_ARGUMENT; + goto done; + } + + if (!portSafeSubU16(header->end, header->start, (NvU16 *) &subMsgLen)) + { + status = NV_ERR_INVALID_ARGUMENT; + goto done; + } + + if ((subAlloc + subMsgLen) >= (buffer + size)) + { + status = NV_ERR_INSUFFICIENT_RESOURCES; + goto done; + } // If valid, copy contents if (header->flags & NVDUMP_SUB_ALLOC_VALID) { diff --git a/src/nvidia/src/kernel/gpu/bus/kern_bus_vbar2.c b/src/nvidia/src/kernel/gpu/bus/kern_bus_vbar2.c index 0b98044a3..c4b12786c 100644 --- a/src/nvidia/src/kernel/gpu/bus/kern_bus_vbar2.c +++ b/src/nvidia/src/kernel/gpu/bus/kern_bus_vbar2.c @@ -862,6 +862,11 @@ kbusMapBar2Aperture_SCRATCH NvU32 flags ) { + if (pMemDesc->Size >= NV_U32_MAX) + { + return NULL; + } + return portMemAllocNonPaged((NvU32)pMemDesc->Size); } diff --git a/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c b/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c index 8e0b03ceb..7ea5c4e65 100644 --- a/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c +++ b/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c @@ -1075,6 +1075,7 @@ kchannelMap_IMPL RmClient *pRmClient = dynamicCast(pRsClient, RmClient); GpuResource *pGpuResource; + NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_OBJECT); NV_ASSERT_OR_RETURN(!pKernelChannel->bClientAllocatedUserD, NV_ERR_INVALID_REQUEST); rmStatus = gpuresGetByDeviceOrSubdeviceHandle(pRsClient, @@ -3759,7 +3760,12 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL NV_CHECK_OR_RETURN(LEVEL_INFO, index != NV_CHANNELGPFIFO_NOTIFICATION_TYPE_ERROR, NV_ERR_INVALID_ARGUMENT); - notificationBufferSize = (index + 1) * sizeof(NvNotification); + // Check for integer overflows + if (((index + 1) < index) || + !portSafeMulU64(index + 1, sizeof(NvNotification), ¬ificationBufferSize)) + { + return NV_ERR_OUT_OF_RANGE; + } status = deviceGetByInstance(pClient, gpuGetDeviceInstance(pGpu), &pDevice); if (status != NV_OK) diff --git a/src/nvidia/src/kernel/gpu/gr/kernel_sm_debugger_session_ctrl.c b/src/nvidia/src/kernel/gpu/gr/kernel_sm_debugger_session_ctrl.c index e31aee377..a01d76041 100644 --- a/src/nvidia/src/kernel/gpu/gr/kernel_sm_debugger_session_ctrl.c +++ b/src/nvidia/src/kernel/gpu/gr/kernel_sm_debugger_session_ctrl.c @@ -229,7 +229,8 @@ _nv8deCtrlCmdReadWriteSurface } else if (traceArg.aperture == ADDR_FBMEM) { - memdescCreate(&pMemDesc, pGpu, curSize, 0, NV_TRUE, traceArg.aperture, NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE); + NV_ASSERT_OK_OR_RETURN(memdescCreate(&pMemDesc, pGpu, curSize, 0, NV_TRUE, + traceArg.aperture, NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE)); memdescDescribe(pMemDesc, traceArg.aperture, traceArg.pa, curSize); } @@ -684,6 +685,10 @@ NV_STATUS ksmdbgssnCtrlCmdDebugExecRegOps_IMPL NV_STATUS status = NV_OK; NvBool isClientGspPlugin = NV_FALSE; + NV_CHECK_OR_RETURN(LEVEL_ERROR, + pParams->regOpCount <= NV83DE_CTRL_GPU_EXEC_REG_OPS_MAX_OPS, + NV_ERR_INVALID_ARGUMENT); + // Check if User have permission to access register offset NV_CHECK_OK_OR_RETURN(LEVEL_INFO, gpuValidateRegOps(pGpu, pParams->regOps, pParams->regOpCount, @@ -725,9 +730,11 @@ ksmdbgssnCtrlCmdDebugReadBatchMemory_IMPL { NV_STATUS localStatus = NV_OK; NvP64 pData = (NvP64)(((NvU8 *)pParams->pData) + pParams->entries[i].dataOffset); + NvU32 endingOffset; NV_CHECK_OR_ELSE(LEVEL_ERROR, - pParams->entries[i].dataOffset < pParams->dataLength, + portSafeAddU32(pParams->entries[i].dataOffset, pParams->entries[i].length, &endingOffset) && + (endingOffset <= pParams->dataLength), localStatus = NV_ERR_INVALID_OFFSET; goto updateStatus; ); @@ -762,13 +769,18 @@ ksmdbgssnCtrlCmdDebugWriteBatchMemory_IMPL NV_STATUS status = NV_OK; NvU32 i; + NV_CHECK_OR_RETURN(LEVEL_ERROR, pParams->count <= MAX_ACCESS_MEMORY_OPS, + NV_ERR_INVALID_ARGUMENT); + for (i = 0; i < pParams->count; ++i) { NV_STATUS localStatus = NV_OK; NvP64 pData = (NvP64)(((NvU8 *)pParams->pData) + pParams->entries[i].dataOffset); + NvU32 endingOffset; NV_CHECK_OR_ELSE(LEVEL_ERROR, - (pParams->entries[i].dataOffset + pParams->entries[i].length) <= pParams->dataLength, + portSafeAddU32(pParams->entries[i].dataOffset, pParams->entries[i].length, &endingOffset) && + (endingOffset <= pParams->dataLength), localStatus = NV_ERR_INVALID_OFFSET; goto updateStatus; ); diff --git a/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c b/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c index d20038188..b8405293b 100644 --- a/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c +++ b/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c @@ -229,6 +229,10 @@ memdescCreate if (pMemoryManager && pMemoryManager->sysmemPageSize) { allocSize = RM_ALIGN_UP(allocSize, pMemoryManager->sysmemPageSize); + if (allocSize < Size) + { + return NV_ERR_INVALID_ARGUMENT; + } } } @@ -253,7 +257,10 @@ memdescCreate if ((AddressSpace == ADDR_SYSMEM || AddressSpace == ADDR_UNKNOWN) && PhysicallyContiguous && (Alignment > RM_PAGE_SIZE)) { - allocSize += (Alignment - RM_PAGE_SIZE); + if (!portSafeAddU64(allocSize, (Alignment - RM_PAGE_SIZE), &allocSize)) + { + return NV_ERR_INVALID_ARGUMENT; + } } } } diff --git a/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c b/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c index a2c742b58..a1981b919 100644 --- a/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c +++ b/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c @@ -1970,7 +1970,10 @@ memmgrFillComprInfo_IMPL NV_ASSERT(compTagStartOffset != ~(NvU32)0); - size = pageSize * pageCount; + if (!portSafeMulU32(pageSize, pageCount, &size)) + { + return NV_ERR_INVALID_ARGUMENT; + } pComprInfo->compPageShift = pMemorySystemConfig->comprPageShift; pComprInfo->compTagLineMin = compTagStartOffset; diff --git a/src/nvidia/src/kernel/gpu/mmu/arch/ampere/kern_gmmu_ga100.c b/src/nvidia/src/kernel/gpu/mmu/arch/ampere/kern_gmmu_ga100.c index a3cf631d8..54661aa16 100644 --- a/src/nvidia/src/kernel/gpu/mmu/arch/ampere/kern_gmmu_ga100.c +++ b/src/nvidia/src/kernel/gpu/mmu/arch/ampere/kern_gmmu_ga100.c @@ -89,6 +89,7 @@ kgmmuValidateFabricBaseAddress_GA100 OBJGPU *pGpu = ENG_GET_GPU(pKernelGmmu); MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); NvU64 fbSizeBytes; + NvU64 fbUpperLimit; fbSizeBytes = pMemoryManager->Ram.fbTotalMemSizeMb << 20; @@ -106,9 +107,14 @@ kgmmuValidateFabricBaseAddress_GA100 // Align fbSize to mapslot size. fbSizeBytes = RM_ALIGN_UP(fbSizeBytes, NVBIT64(36)); + // Check for integer overflow + if (!portSafeAddU64(fabricBaseAddr, fbSizeBytes, &fbUpperLimit)) + { + return NV_ERR_INVALID_ARGUMENT; + } // Make sure the address range doesn't go beyond the limit, (2K * 64GB). - if ((fabricBaseAddr + fbSizeBytes) > NVBIT64(47)) + if (fbUpperLimit > NVBIT64(47)) { return NV_ERR_INVALID_ARGUMENT; } diff --git a/src/nvidia/src/kernel/gpu/nvlink/arch/ampere/kernel_nvlink_ga100.c b/src/nvidia/src/kernel/gpu/nvlink/arch/ampere/kernel_nvlink_ga100.c index e706aaaa7..388b271ed 100644 --- a/src/nvidia/src/kernel/gpu/nvlink/arch/ampere/kernel_nvlink_ga100.c +++ b/src/nvidia/src/kernel/gpu/nvlink/arch/ampere/kernel_nvlink_ga100.c @@ -184,6 +184,7 @@ knvlinkValidateFabricBaseAddress_GA100 { MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); NvU64 fbSizeBytes; + NvU64 fbUpperLimit; fbSizeBytes = pMemoryManager->Ram.fbTotalMemSizeMb << 20; @@ -202,8 +203,14 @@ knvlinkValidateFabricBaseAddress_GA100 // Align fbSize to mapslot size. fbSizeBytes = RM_ALIGN_UP(fbSizeBytes, NVBIT64(36)); + // Check for integer overflow + if (!portSafeAddU64(fabricBaseAddr, fbSizeBytes, &fbUpperLimit)) + { + return NV_ERR_INVALID_ARGUMENT; + } + // Make sure the address range doesn't go beyond the limit, (2K * 64GB). - if ((fabricBaseAddr + fbSizeBytes) > NVBIT64(47)) + if (fbUpperLimit > NVBIT64(47)) { return NV_ERR_INVALID_ARGUMENT; } diff --git a/src/nvidia/src/kernel/gpu/timer/timer.c b/src/nvidia/src/kernel/gpu/timer/timer.c index 341e224ec..22ee9c603 100644 --- a/src/nvidia/src/kernel/gpu/timer/timer.c +++ b/src/nvidia/src/kernel/gpu/timer/timer.c @@ -560,7 +560,7 @@ NV_STATUS tmrEventScheduleRel_IMPL } else { - AbsTime = currentTime + RelTime; + NV_CHECK_OR_RETURN(LEVEL_ERROR, portSafeAddU64(currentTime, RelTime, &AbsTime), NV_ERR_INVALID_ARGUMENT); } return tmrEventScheduleAbs(pTmr, pEvent, AbsTime); @@ -588,7 +588,7 @@ NV_STATUS tmrScheduleCallbackRel_IMPL if (rmStatus != NV_OK) return rmStatus; - AbsTime = currentTime + RelTime; + NV_CHECK_OR_RETURN(LEVEL_ERROR, portSafeAddU64(currentTime, RelTime, &AbsTime), NV_ERR_INVALID_ARGUMENT); return tmrScheduleCallbackAbs(pTmr, Proc, Object, AbsTime, Flags, ChId); } diff --git a/src/nvidia/src/kernel/rmapi/mapping_cpu.c b/src/nvidia/src/kernel/rmapi/mapping_cpu.c index 150414847..3d186746b 100644 --- a/src/nvidia/src/kernel/rmapi/mapping_cpu.c +++ b/src/nvidia/src/kernel/rmapi/mapping_cpu.c @@ -181,6 +181,8 @@ memMap_IMPL NvBool bBroadcast; NvU64 mapLimit; NvBool bIsSysmem = NV_FALSE; + NvBool bSkipSizeCheck = (DRF_VAL(OS33, _FLAGS, _SKIP_SIZE_CHECK, pMapParams->flags) == + NVOS33_FLAGS_SKIP_SIZE_CHECK_ENABLE); NV_ASSERT_OR_RETURN(RMCFG_FEATURE_KERNEL_RM, NV_ERR_NOT_SUPPORTED); @@ -242,14 +244,18 @@ memMap_IMPL return NV_ERR_INVALID_LIMIT; } + if (bSkipSizeCheck && (pCallContext->secInfo.privLevel < RS_PRIV_LEVEL_KERNEL)) + { + return NV_ERR_INSUFFICIENT_PERMISSIONS; + } + // // See bug #140807 and #150889 - we need to pad memory mappings to past their // actual allocation size (to PAGE_SIZE+1) because of a buggy ms function so // skip the allocation size sanity check so the map operation still succeeds. // - if ((DRF_VAL(OS33, _FLAGS, _SKIP_SIZE_CHECK, pMapParams->flags) == NVOS33_FLAGS_SKIP_SIZE_CHECK_DISABLE) && - (!portSafeAddU64(pMapParams->offset, pMapParams->length, &mapLimit) || - (mapLimit > pMemoryInfo->Length))) + if (!portSafeAddU64(pMapParams->offset, pMapParams->length, &mapLimit) || + (!bSkipSizeCheck && (mapLimit > pMemoryInfo->Length))) { return NV_ERR_INVALID_LIMIT; } diff --git a/version.mk b/version.mk index f3d119595..b848fca69 100644 --- a/version.mk +++ b/version.mk @@ -1,4 +1,4 @@ -NVIDIA_VERSION = 515.76 +NVIDIA_VERSION = 515.86.01 # This file. VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))