diff --git a/README.md b/README.md index 429bd662e..89be024ee 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # NVIDIA Linux Open GPU Kernel Module Source This is the source release of the NVIDIA Linux open GPU kernel modules, -version 575.51.03. +version 575.57.08. ## How to Build @@ -17,7 +17,7 @@ as root: Note that the kernel modules built here must be used with GSP firmware and user-space NVIDIA GPU driver components from a corresponding -575.51.03 driver release. This can be achieved by installing +575.57.08 driver release. This can be achieved by installing the NVIDIA GPU driver from the .run file using the `--no-kernel-modules` option. E.g., @@ -185,7 +185,7 @@ table below). For details on feature support and limitations, see the NVIDIA GPU driver end user README here: -https://us.download.nvidia.com/XFree86/Linux-x86_64/575.51.03/README/kernel_open.html +https://us.download.nvidia.com/XFree86/Linux-x86_64/575.57.08/README/kernel_open.html For vGPU support, please refer to the README.vgpu packaged in the vGPU Host Package for more details. @@ -955,16 +955,27 @@ Subsystem Device ID. | NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 103C 204B | | NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 10DE 204B | | NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 17AA 204B | +| NVIDIA RTX PRO 5000 Blackwell | 2BB3 1028 204D | +| NVIDIA RTX PRO 5000 Blackwell | 2BB3 103C 204D | +| NVIDIA RTX PRO 5000 Blackwell | 2BB3 10DE 204D | +| NVIDIA RTX PRO 5000 Blackwell | 2BB3 17AA 204D | | NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 1028 204C | | NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 103C 204C | | NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 10DE 204C | | NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 17AA 204C | +| NVIDIA RTX PRO 6000 Blackwell Server Edition | 2BB5 10DE 204E | | NVIDIA GeForce RTX 5080 | 2C02 | | NVIDIA GeForce RTX 5070 Ti | 2C05 | | NVIDIA GeForce RTX 5090 Laptop GPU | 2C18 | | NVIDIA GeForce RTX 5080 Laptop GPU | 2C19 | | NVIDIA GeForce RTX 5090 Laptop GPU | 2C58 | | NVIDIA GeForce RTX 5080 Laptop GPU | 2C59 | +| NVIDIA GeForce RTX 5060 Ti | 2D04 | +| NVIDIA GeForce RTX 5060 | 2D05 | +| NVIDIA GeForce RTX 5070 Laptop GPU | 2D18 | +| NVIDIA GeForce RTX 5060 Laptop GPU | 2D19 | +| NVIDIA GeForce RTX 5070 Laptop GPU | 2D58 | +| NVIDIA GeForce RTX 5060 Laptop GPU | 2D59 | | NVIDIA GeForce RTX 5070 | 2F04 | | NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F18 | | NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F58 | diff --git a/kernel-open/Kbuild b/kernel-open/Kbuild index 1b24ea12f..90fcc4ed6 100644 --- a/kernel-open/Kbuild +++ b/kernel-open/Kbuild @@ -75,21 +75,14 @@ $(foreach _module, $(NV_KERNEL_MODULES), \ $(eval include $(src)/$(_module)/$(_module).Kbuild)) -# -# Define CFLAGS that apply to all the NVIDIA kernel modules. EXTRA_CFLAGS -# is deprecated since 2.6.24 in favor of ccflags-y, but we need to support -# older kernels which do not have ccflags-y. Newer kernels append -# $(EXTRA_CFLAGS) to ccflags-y for compatibility. -# - -EXTRA_CFLAGS += -I$(src)/common/inc -EXTRA_CFLAGS += -I$(src) -EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args -EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM -EXTRA_CFLAGS += -DNV_VERSION_STRING=\"575.51.03\" +ccflags-y += -I$(src)/common/inc +ccflags-y += -I$(src) +ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args +ccflags-y += -D__KERNEL__ -DMODULE -DNVRM +ccflags-y += -DNV_VERSION_STRING=\"575.57.08\" ifneq ($(SYSSRCHOST1X),) - EXTRA_CFLAGS += -I$(SYSSRCHOST1X) + ccflags-y += -I$(SYSSRCHOST1X) endif # Some Android kernels prohibit driver use of filesystem functions like @@ -99,57 +92,57 @@ endif PLATFORM_IS_ANDROID ?= 0 ifeq ($(PLATFORM_IS_ANDROID),1) - EXTRA_CFLAGS += -DNV_FILESYSTEM_ACCESS_AVAILABLE=0 + ccflags-y += -DNV_FILESYSTEM_ACCESS_AVAILABLE=0 else - EXTRA_CFLAGS += -DNV_FILESYSTEM_ACCESS_AVAILABLE=1 + ccflags-y += -DNV_FILESYSTEM_ACCESS_AVAILABLE=1 endif -EXTRA_CFLAGS += -Wno-unused-function +ccflags-y += -Wno-unused-function ifneq ($(NV_BUILD_TYPE),debug) - EXTRA_CFLAGS += -Wuninitialized + ccflags-y += -Wuninitialized endif -EXTRA_CFLAGS += -fno-strict-aliasing +ccflags-y += -fno-strict-aliasing ifeq ($(ARCH),arm64) - EXTRA_CFLAGS += -mstrict-align + ccflags-y += -mstrict-align endif ifeq ($(NV_BUILD_TYPE),debug) - EXTRA_CFLAGS += -g + ccflags-y += -g endif -EXTRA_CFLAGS += -ffreestanding +ccflags-y += -ffreestanding ifeq ($(ARCH),arm64) - EXTRA_CFLAGS += -mgeneral-regs-only -march=armv8-a - EXTRA_CFLAGS += $(call cc-option,-mno-outline-atomics,) + ccflags-y += -mgeneral-regs-only -march=armv8-a + ccflags-y += $(call cc-option,-mno-outline-atomics,) endif ifeq ($(ARCH),x86_64) - EXTRA_CFLAGS += -mno-red-zone -mcmodel=kernel + ccflags-y += -mno-red-zone -mcmodel=kernel endif ifeq ($(ARCH),powerpc) - EXTRA_CFLAGS += -mlittle-endian -mno-strict-align + ccflags-y += -mlittle-endian -mno-strict-align endif -EXTRA_CFLAGS += -DNV_UVM_ENABLE -EXTRA_CFLAGS += $(call cc-option,-Werror=undef,) -EXTRA_CFLAGS += -DNV_SPECTRE_V2=$(NV_SPECTRE_V2) -EXTRA_CFLAGS += -DNV_KERNEL_INTERFACE_LAYER +ccflags-y += -DNV_UVM_ENABLE +ccflags-y += $(call cc-option,-Werror=undef,) +ccflags-y += -DNV_SPECTRE_V2=$(NV_SPECTRE_V2) +ccflags-y += -DNV_KERNEL_INTERFACE_LAYER # # Detect SGI UV systems and apply system-specific optimizations. # ifneq ($(wildcard /proc/sgi_uv),) - EXTRA_CFLAGS += -DNV_CONFIG_X86_UV + ccflags-y += -DNV_CONFIG_X86_UV endif ifdef VGX_FORCE_VFIO_PCI_CORE - EXTRA_CFLAGS += -DNV_VGPU_FORCE_VFIO_PCI_CORE + ccflags-y += -DNV_VGPU_FORCE_VFIO_PCI_CORE endif WARNINGS_AS_ERRORS ?= @@ -183,7 +176,8 @@ NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \ NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags) -NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie +NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(ccflags-y) -fno-pie +NV_CONFTEST_CFLAGS += $(filter -std=%,$(KBUILD_CFLAGS)) NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign) NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,) NV_CONFTEST_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types,) diff --git a/kernel-open/common/inc/nv-timer.h b/kernel-open/common/inc/nv-timer.h index 6af49fb67..fc8def0c1 100644 --- a/kernel-open/common/inc/nv-timer.h +++ b/kernel-open/common/inc/nv-timer.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2017-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -63,4 +63,13 @@ static inline void nv_timer_setup(struct nv_timer *nv_timer, #endif } +static inline void nv_timer_delete_sync(struct timer_list *timer) +{ +#if !defined(NV_BSD) && NV_IS_EXPORT_SYMBOL_PRESENT_timer_delete_sync + timer_delete_sync(timer); +#else + del_timer_sync(timer); +#endif +} + #endif // __NV_TIMER_H__ diff --git a/kernel-open/common/inc/nv.h b/kernel-open/common/inc/nv.h index 507749e44..9d82f9619 100644 --- a/kernel-open/common/inc/nv.h +++ b/kernel-open/common/inc/nv.h @@ -961,7 +961,6 @@ NvBool NV_API_CALL rm_isr (nvidia_stack_t *, nv_state_t * void NV_API_CALL rm_isr_bh (nvidia_stack_t *, nv_state_t *); void NV_API_CALL rm_isr_bh_unlocked (nvidia_stack_t *, nv_state_t *); NvBool NV_API_CALL rm_is_msix_allowed (nvidia_stack_t *, nv_state_t *); -NvBool NV_API_CALL rm_wait_for_bar_firewall (nvidia_stack_t *, NvU32 domain, NvU8 bus, NvU8 device, NvU8 function, NvU16 devId); NV_STATUS NV_API_CALL rm_power_management (nvidia_stack_t *, nv_state_t *, nv_pm_action_t); NV_STATUS NV_API_CALL rm_stop_user_channels (nvidia_stack_t *, nv_state_t *); NV_STATUS NV_API_CALL rm_restart_user_channels (nvidia_stack_t *, nv_state_t *); diff --git a/kernel-open/conftest.sh b/kernel-open/conftest.sh index 40b7ce84f..796e892d9 100755 --- a/kernel-open/conftest.sh +++ b/kernel-open/conftest.sh @@ -3149,6 +3149,21 @@ compile_test() { compile_check_conftest "$CODE" "NV_FOLL_LONGTERM_PRESENT" "" "types" ;; + has_enum_pidtype_tgid) + # Determine if PIDTYPE_TGID is present in the kernel as an enum + # + # Added by commit 6883f81aac6f ("pid: Implement PIDTYPE_TGID") + # in v4.19 + # + CODE=" + #include + + enum pid_type type = PIDTYPE_TGID; + " + + compile_check_conftest "$CODE" "NV_HAS_ENUM_PIDTYPE_TGID" "" "types" + ;; + vfio_pin_pages_has_vfio_device_arg) # # Determine if vfio_pin_pages() kABI accepts "struct vfio_device *" @@ -7579,6 +7594,22 @@ compile_test() { compile_check_conftest "$CODE" "NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA" "" "types" ;; + page_pgmap) + # + # Determine if the page_pgmap() function is present. + # + # Added by commit 82ba975e4c43 ("mm: allow compound zone device + # pages") in v6.14 + # + CODE=" + #include + int conftest_page_pgmap(void) { + return page_pgmap(NULL); + }" + + compile_check_conftest "$CODE" "NV_PAGE_PGMAP_PRESENT" "" "functions" + ;; + folio_test_swapcache) # # Determine if the folio_test_swapcache() function is present. diff --git a/kernel-open/nvidia-drm/nvidia-drm-fence.c b/kernel-open/nvidia-drm/nvidia-drm-fence.c index 81db734d9..3b243eef2 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-fence.c +++ b/kernel-open/nvidia-drm/nvidia-drm-fence.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2025, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -1132,7 +1132,7 @@ static void __nv_drm_semsurf_fence_ctx_destroy( */ nv_drm_workthread_shutdown(&ctx->worker); - nv_drm_del_timer_sync(&ctx->timer); + nv_timer_delete_sync(&ctx->timer.kernel_timer); /* * The semaphore surface could still be sending callbacks, so it is still diff --git a/kernel-open/nvidia-drm/nvidia-drm-os-interface.c b/kernel-open/nvidia-drm/nvidia-drm-os-interface.c index 7617476d6..c7eb797c6 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-os-interface.c +++ b/kernel-open/nvidia-drm/nvidia-drm-os-interface.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2023, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -236,15 +236,6 @@ unsigned long nv_drm_timeout_from_ms(NvU64 relative_timeout_ms) return jiffies + msecs_to_jiffies(relative_timeout_ms); } -bool nv_drm_del_timer_sync(nv_drm_timer *timer) -{ - if (del_timer_sync(&timer->kernel_timer)) { - return true; - } else { - return false; - } -} - #if defined(NV_DRM_FENCE_AVAILABLE) int nv_drm_create_sync_file(nv_dma_fence_t *fence) { diff --git a/kernel-open/nvidia-drm/nvidia-drm-os-interface.h b/kernel-open/nvidia-drm/nvidia-drm-os-interface.h index 0e4960766..e4a9aa05b 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-os-interface.h +++ b/kernel-open/nvidia-drm/nvidia-drm-os-interface.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -101,8 +101,6 @@ void nv_drm_timer_setup(nv_drm_timer *timer, void nv_drm_mod_timer(nv_drm_timer *timer, unsigned long relative_timeout_ms); -bool nv_drm_del_timer_sync(nv_drm_timer *timer); - unsigned long nv_drm_timer_now(void); unsigned long nv_drm_timeout_from_ms(NvU64 relative_timeout_ms); diff --git a/kernel-open/nvidia-drm/nvidia-drm-sources.mk b/kernel-open/nvidia-drm/nvidia-drm-sources.mk index 9c0c6034f..6365cf5d6 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-sources.mk +++ b/kernel-open/nvidia-drm/nvidia-drm-sources.mk @@ -37,6 +37,7 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_dec_and_test NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_alpha_blending_available NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_fd_to_handle NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_handle_to_fd +NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group diff --git a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c index 29fbf0160..55b40a962 100644 --- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c +++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2015-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2015-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -53,6 +53,7 @@ #include "nv-procfs.h" #include "nv-kthread-q.h" #include "nv-time.h" +#include "nv-timer.h" #include "nv-lock.h" #include "nv-chardev-numbers.h" @@ -748,7 +749,7 @@ static void nvkms_kthread_q_callback(void *arg) * pending timers and than waiting for workqueue callbacks. */ if (timer->kernel_timer_created) { - del_timer_sync(&timer->kernel_timer); + nv_timer_delete_sync(&timer->kernel_timer); } /* @@ -1932,7 +1933,11 @@ restart: * completion, and we wait for queue completion with * nv_kthread_q_stop below. */ +#if !defined(NV_BSD) && NV_IS_EXPORT_SYMBOL_PRESENT_timer_delete_sync + if (timer_delete_sync(&timer->kernel_timer) == 1) { +#else if (del_timer_sync(&timer->kernel_timer) == 1) { +#endif /* We've deactivated timer so we need to clean after it */ list_del(&timer->timers_list); diff --git a/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild b/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild index c170c6aa7..452a7c4ef 100644 --- a/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild +++ b/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild @@ -103,4 +103,5 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64 NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64 NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_register_backlight +NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync NV_CONFTEST_FUNCTION_COMPILE_TESTS += kernel_read_has_pointer_pos_arg diff --git a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild index f5b4ec8ef..3cb3e9980 100644 --- a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild +++ b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild @@ -62,6 +62,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += find_next_bit_wrap NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_is_dma_domain NV_CONFTEST_FUNCTION_COMPILE_TESTS += for_each_sgtable_dma_page NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache +NV_CONFTEST_FUNCTION_COMPILE_TESTS += page_pgmap NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_invalidate_range @@ -80,3 +81,5 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += struct_page_has_zone_device_data NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup +NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present___iowrite64_lo_hi +NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_make_device_exclusive diff --git a/kernel-open/nvidia-uvm/uvm_ats_sva.c b/kernel-open/nvidia-uvm/uvm_ats_sva.c index a1256f265..f33fab037 100644 --- a/kernel-open/nvidia-uvm/uvm_ats_sva.c +++ b/kernel-open/nvidia-uvm/uvm_ats_sva.c @@ -139,7 +139,11 @@ static NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg) static void smmu_vcmdq_write64(void __iomem *smmu_cmdqv_base, int reg, NvU64 val) { +#if NV_IS_EXPORT_SYMBOL_PRESENT___iowrite64_lo_hi + __iowrite64_lo_hi(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg); +#else iowrite64(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg); +#endif } // Fix for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU diff --git a/kernel-open/nvidia-uvm/uvm_blackwell_host.c b/kernel-open/nvidia-uvm/uvm_blackwell_host.c index a95a76f6c..130b73ecd 100644 --- a/kernel-open/nvidia-uvm/uvm_blackwell_host.c +++ b/kernel-open/nvidia-uvm/uvm_blackwell_host.c @@ -254,3 +254,31 @@ void uvm_hal_blackwell_host_tlb_invalidate_test(uvm_push_t *push, HWVALUE(C96F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi)); } } + +uvm_access_counter_clear_op_t +uvm_hal_blackwell_access_counter_query_clear_op_gb100(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries) +{ + if (parent_gpu->rm_info.accessCntrBufferCount > 1) { + NvU32 i; + + for (i = 0; i < num_entries; i++) { + const uvm_access_counter_buffer_entry_t *entry = buffer_entries[i]; + + // The LSb identifies the die ID. + if ((entry->tag & 0x1) == 1) + return UVM_ACCESS_COUNTER_CLEAR_OP_ALL; + } + } + + return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED; +} + +uvm_access_counter_clear_op_t +uvm_hal_blackwell_access_counter_query_clear_op_gb20x(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries) +{ + return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED; +} diff --git a/kernel-open/nvidia-uvm/uvm_gpu.c b/kernel-open/nvidia-uvm/uvm_gpu.c index 9167e5aa9..991239c25 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu.c +++ b/kernel-open/nvidia-uvm/uvm_gpu.c @@ -1194,6 +1194,8 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid, uvm_sema_init(&parent_gpu->isr.replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR); uvm_sema_init(&parent_gpu->isr.non_replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR); uvm_mutex_init(&parent_gpu->access_counters_enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS); + uvm_mutex_init(&parent_gpu->access_counters_clear_tracker_lock, UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS); + uvm_tracker_init(&parent_gpu->access_counters_clear_tracker); uvm_spin_lock_irqsave_init(&parent_gpu->isr.interrupts_lock, UVM_LOCK_ORDER_LEAF); uvm_spin_lock_init(&parent_gpu->instance_ptr_table_lock, UVM_LOCK_ORDER_LEAF); uvm_rb_tree_init(&parent_gpu->instance_ptr_table); @@ -1211,6 +1213,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid, return NV_OK; cleanup: + uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker); uvm_kvfree(parent_gpu); return status; @@ -1656,19 +1659,12 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu, // Sync the access counter clear tracker too. if (parent_gpu->access_counters_supported && parent_gpu->access_counter_buffer) { - NvU32 notif_buf_index; - for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) { - uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[notif_buf_index]; + uvm_mutex_lock(&parent_gpu->access_counters_clear_tracker_lock); + status = uvm_tracker_wait(&parent_gpu->access_counters_clear_tracker); + uvm_mutex_unlock(&parent_gpu->access_counters_clear_tracker_lock); - if (access_counters->rm_info.accessCntrBufferHandle != 0) { - uvm_access_counters_isr_lock(access_counters); - status = uvm_tracker_wait(&access_counters->clear_tracker); - uvm_access_counters_isr_unlock(access_counters); - - if (status != NV_OK) - UVM_ASSERT(status == uvm_global_get_status()); - } - } + if (status != NV_OK) + UVM_ASSERT(status == uvm_global_get_status()); } } @@ -1801,6 +1797,8 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref) for_each_sub_processor_index(sub_processor_index) UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]); + uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker); + uvm_kvfree(parent_gpu); } @@ -2895,6 +2893,10 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid, if (status != NV_OK) goto error_unregister; + // TODO: Bug 5262806: Remove this WAR once the bug is fixed. + if (gpu_info->accessCntrBufferCount > 1) + gpu_info->accessCntrBufferCount = 1; + if (parent_gpu != NULL) { // If the UUID has been seen before, and if SMC is enabled, then check // if this specific partition has been seen previously. The UUID-based diff --git a/kernel-open/nvidia-uvm/uvm_gpu.h b/kernel-open/nvidia-uvm/uvm_gpu.h index ba0bf7115..d83f5d544 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu.h +++ b/kernel-open/nvidia-uvm/uvm_gpu.h @@ -522,10 +522,6 @@ struct uvm_access_counter_buffer_struct // PCIe NvU32 cached_put; - // Tracker used to aggregate access counters clear operations, needed for - // GPU removal - uvm_tracker_t clear_tracker; - // Current access counter configuration. During normal operation this // information is computed once during GPU initialization. However, tests // may override it to try different configuration values. @@ -1213,6 +1209,11 @@ struct uvm_parent_gpu_struct uvm_access_counter_buffer_t *access_counter_buffer; uvm_mutex_t access_counters_enablement_lock; + // Tracker used to aggregate access counters clear operations, needed for + // GPU removal. It is only used when supports_access_counters is set. + uvm_tracker_t access_counters_clear_tracker; + uvm_mutex_t access_counters_clear_tracker_lock; + // Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs. NvU32 utlb_per_gpc_count; diff --git a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c index 30470ac09..f815c83b1 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c +++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c @@ -217,38 +217,19 @@ static NV_STATUS config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY gran return NV_OK; } -// Clear the access counter notifications and add it to the per-GPU -// per-notification-buffer clear tracker. -static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu, - uvm_access_counter_buffer_t *access_counters, - uvm_access_counter_buffer_entry_t **notification_start, - NvU32 num_notifications) +static NV_STATUS parent_gpu_clear_tracker_wait(uvm_parent_gpu_t *parent_gpu) { - NvU32 i; NV_STATUS status; - uvm_push_t push; - status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch"); - if (status != NV_OK) { - UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n", - nvstatusToString(status), - uvm_gpu_name(gpu), - access_counters->index); - return status; - } + uvm_mutex_lock(&parent_gpu->access_counters_clear_tracker_lock); + status = uvm_tracker_wait(&parent_gpu->access_counters_clear_tracker); + uvm_mutex_unlock(&parent_gpu->access_counters_clear_tracker_lock); - for (i = 0; i < num_notifications; i++) - gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]); - - uvm_push_end(&push); - - uvm_tracker_remove_completed(&access_counters->clear_tracker); - - return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push); + return status; } -// Clear all access counters and add the operation to the per-GPU -// per-notification-buffer clear tracker +// Clear all access counters and add the operation to the per-GPU clear +// tracker. static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu, uvm_access_counter_buffer_t *access_counters) { NV_STATUS status; @@ -270,8 +251,52 @@ static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu, uvm_access_counter_buf uvm_push_end(&push); - uvm_tracker_remove_completed(&access_counters->clear_tracker); - return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push); + uvm_mutex_lock(&gpu->parent->access_counters_clear_tracker_lock); + uvm_tracker_remove_completed(&gpu->parent->access_counters_clear_tracker); + status = uvm_tracker_add_push_safe(&gpu->parent->access_counters_clear_tracker, &push); + uvm_mutex_unlock(&gpu->parent->access_counters_clear_tracker_lock); + + return status; +} + +// Clear the access counter notifications and add it to the per-GPU clear +// tracker. +static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu, + uvm_access_counter_buffer_t *access_counters, + uvm_access_counter_buffer_entry_t **notification_start, + NvU32 num_notifications) +{ + NvU32 i; + NV_STATUS status; + uvm_push_t push; + uvm_access_counter_clear_op_t clear_op; + + clear_op = gpu->parent->host_hal->access_counter_query_clear_op(gpu->parent, notification_start, num_notifications); + if (clear_op == UVM_ACCESS_COUNTER_CLEAR_OP_ALL) + return access_counter_clear_all(gpu, access_counters); + + UVM_ASSERT(clear_op == UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED); + + status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch"); + if (status != NV_OK) { + UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n", + nvstatusToString(status), + uvm_gpu_name(gpu), + access_counters->index); + return status; + } + + for (i = 0; i < num_notifications; i++) + gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]); + + uvm_push_end(&push); + + uvm_mutex_lock(&gpu->parent->access_counters_clear_tracker_lock); + uvm_tracker_remove_completed(&gpu->parent->access_counters_clear_tracker); + status = uvm_tracker_add_push_safe(&gpu->parent->access_counters_clear_tracker, &push); + uvm_mutex_unlock(&gpu->parent->access_counters_clear_tracker_lock); + + return status; } bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index) @@ -374,8 +399,6 @@ NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU3 access_counters->notifications_ignored_count = 0; access_counters->test.reconfiguration_owner = NULL; - uvm_tracker_init(&access_counters->clear_tracker); - access_counters->max_notifications = access_counters->rm_info.bufferSize / parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu); @@ -443,8 +466,6 @@ void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 n UVM_ASSERT(status == NV_OK); access_counters->rm_info.accessCntrBufferHandle = 0; - uvm_tracker_deinit(&access_counters->clear_tracker); - uvm_kvfree(batch_context->notification_cache); uvm_kvfree(batch_context->notifications); batch_context->notification_cache = NULL; @@ -488,7 +509,7 @@ static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, NvU32 index, con if (status != NV_OK) goto error; - status = uvm_tracker_wait(&access_counters->clear_tracker); + status = parent_gpu_clear_tracker_wait(gpu->parent); if (status != NV_OK) goto error; @@ -522,7 +543,7 @@ static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu, NvU32 UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[index].service_lock)); // Wait for any pending clear operation before releasing ownership - status = uvm_tracker_wait(&access_counters->clear_tracker); + status = parent_gpu_clear_tracker_wait(parent_gpu); if (status != NV_OK) UVM_ASSERT(status == uvm_global_get_status()); @@ -1751,28 +1772,21 @@ NV_STATUS uvm_api_clear_all_access_counters(UVM_CLEAR_ALL_ACCESS_COUNTERS_PARAMS uvm_va_space_up_read(va_space); for_each_gpu_in_mask(gpu, retained_gpus) { - NvU32 notif_buf_index; + uvm_access_counter_buffer_t *access_counters; if (!gpu->parent->access_counters_supported) continue; - for (notif_buf_index = 0; notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount; notif_buf_index++) { - uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent, - notif_buf_index); - uvm_access_counters_isr_lock(access_counters); + // clear_all affects all the notification buffers, we issue it for + // the notif_buf_index 0. + access_counters = parent_gpu_access_counter_buffer_get(gpu->parent, 0); + status = access_counter_clear_all(gpu, access_counters); + if (status == NV_OK) + status = parent_gpu_clear_tracker_wait(gpu->parent); - // Access counters are not enabled. Nothing to clear. - if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count) { - status = access_counter_clear_all(gpu, access_counters); - if (status == NV_OK) - status = uvm_tracker_wait(&access_counters->clear_tracker); - } - - uvm_access_counters_isr_unlock(access_counters); - - if (status != NV_OK) - break; - } + // Break the loop if clear_all failed in any of the retained gpus. + if (status != NV_OK) + break; } for_each_gpu_in_mask(gpu, retained_gpus) @@ -2055,7 +2069,9 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS * NV_STATUS status = NV_OK; uvm_gpu_t *gpu = NULL; uvm_va_space_t *va_space = uvm_va_space_get(filp); + uvm_access_counter_buffer_t *access_counters; NvU32 notif_buf_index; + NvBool index0_state; if (params->mode >= UVM_TEST_ACCESS_COUNTER_RESET_MODE_MAX) return NV_ERR_INVALID_ARGUMENT; @@ -2069,51 +2085,52 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS * goto exit_release_gpu; } - for (notif_buf_index = 0; - notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount && status == NV_OK; - notif_buf_index++) { - uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent, - notif_buf_index); + uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock); - uvm_access_counters_isr_lock(access_counters); + // Access counters not enabled. Nothing to reset + if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) { + uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock); + goto exit_release_gpu; + } - // Access counters not enabled. Nothing to reset - if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0) - goto exit_isr_unlock; + uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock); - if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) { - status = access_counter_clear_all(gpu, access_counters); - } - else { - uvm_access_counter_buffer_entry_t entry = { 0 }; - uvm_access_counter_buffer_entry_t *notification = &entry; + // Clear operations affect all notification buffers, we use the + // notif_buf_index = 0; + notif_buf_index = 0; + access_counters = parent_gpu_access_counter_buffer_get(gpu->parent, notif_buf_index); - entry.bank = params->bank; - entry.tag = params->tag; + uvm_access_counters_isr_lock(access_counters); - status = access_counter_clear_notifications(gpu, access_counters, ¬ification, 1); - } + // Recheck access counters are enabled. + index0_state = gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0; + if (index0_state) { + NvU32 i; - if (status == NV_OK) - status = uvm_tracker_wait(&access_counters->clear_tracker); + for (i = notif_buf_index + 1; i < gpu->parent->rm_info.accessCntrBufferCount; i++) + UVM_ASSERT((gpu->parent->isr.access_counters[i].handling_ref_count == 0) == index0_state); + + goto exit_isr_unlock; + } + + if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) { + status = access_counter_clear_all(gpu, access_counters); + } + else { + uvm_access_counter_buffer_entry_t entry = { 0 }; + uvm_access_counter_buffer_entry_t *notification = &entry; + + entry.bank = params->bank; + entry.tag = params->tag; + + status = access_counter_clear_notifications(gpu, access_counters, ¬ification, 1); + } + + if (status == NV_OK) + status = parent_gpu_clear_tracker_wait(gpu->parent); exit_isr_unlock: - uvm_access_counters_isr_unlock(access_counters); - - // We only need to clear_all() once. - if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) { - NvU32 i; - - // Early exit of the main loop; since we only need to clear_all() - // once. Check that all the remaining notification buffers have - // access counters in same state. - NvBool index0_state = (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0); - for (i = notif_buf_index + 1; i < gpu->parent->rm_info.accessCntrBufferCount; i++) - UVM_ASSERT((gpu->parent->isr.access_counters[i].handling_ref_count == 0) == index0_state); - - break; - } - } + uvm_access_counters_isr_unlock(access_counters); exit_release_gpu: uvm_gpu_release(gpu); diff --git a/kernel-open/nvidia-uvm/uvm_hal.c b/kernel-open/nvidia-uvm/uvm_hal.c index 5857a697f..662031ac2 100644 --- a/kernel-open/nvidia-uvm/uvm_hal.c +++ b/kernel-open/nvidia-uvm/uvm_hal.c @@ -218,6 +218,7 @@ static uvm_hal_class_ops_t host_table[] = .clear_faulted_channel_register = uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported, .access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported, .access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported, + .access_counter_query_clear_op = uvm_hal_maxwell_access_counter_query_clear_op_unsupported, .get_time = uvm_hal_maxwell_get_time, } }, @@ -269,6 +270,7 @@ static uvm_hal_class_ops_t host_table[] = .tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test, .access_counter_clear_all = uvm_hal_turing_access_counter_clear_all, .access_counter_clear_targeted = uvm_hal_turing_access_counter_clear_targeted, + .access_counter_query_clear_op = uvm_hal_turing_access_counter_query_clear_op, } }, { @@ -308,12 +310,15 @@ static uvm_hal_class_ops_t host_table[] = .tlb_invalidate_all = uvm_hal_blackwell_host_tlb_invalidate_all, .tlb_invalidate_va = uvm_hal_blackwell_host_tlb_invalidate_va, .tlb_invalidate_test = uvm_hal_blackwell_host_tlb_invalidate_test, + .access_counter_query_clear_op = uvm_hal_blackwell_access_counter_query_clear_op_gb100, } }, { .id = BLACKWELL_CHANNEL_GPFIFO_B, .parent_id = BLACKWELL_CHANNEL_GPFIFO_A, - .u.host_ops = {} + .u.host_ops = { + .access_counter_query_clear_op = uvm_hal_blackwell_access_counter_query_clear_op_gb20x + } }, }; diff --git a/kernel-open/nvidia-uvm/uvm_hal.h b/kernel-open/nvidia-uvm/uvm_hal.h index d8edee20a..5970b1c11 100644 --- a/kernel-open/nvidia-uvm/uvm_hal.h +++ b/kernel-open/nvidia-uvm/uvm_hal.h @@ -704,6 +704,10 @@ typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *pa typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push); typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push, const uvm_access_counter_buffer_entry_t *buffer_entry); +typedef uvm_access_counter_clear_op_t + (*uvm_hal_access_counter_query_clear_op_t)(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries); void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters); void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters); @@ -720,6 +724,10 @@ NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gp void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push); void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push, const uvm_access_counter_buffer_entry_t *buffer_entry); +uvm_access_counter_clear_op_t +uvm_hal_maxwell_access_counter_query_clear_op_unsupported(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries); void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters); void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters); @@ -733,6 +741,18 @@ NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_g void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push); void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push, const uvm_access_counter_buffer_entry_t *buffer_entry); +uvm_access_counter_clear_op_t +uvm_hal_turing_access_counter_query_clear_op(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries); +uvm_access_counter_clear_op_t +uvm_hal_blackwell_access_counter_query_clear_op_gb100(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries); +uvm_access_counter_clear_op_t +uvm_hal_blackwell_access_counter_query_clear_op_gb20x(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries); // The source and destination addresses must be 16-byte aligned. Note that the // best performance is achieved with 256-byte alignment. The decrypt size must @@ -786,6 +806,7 @@ struct uvm_host_hal_struct uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register; uvm_hal_access_counter_clear_all_t access_counter_clear_all; uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted; + uvm_hal_access_counter_query_clear_op_t access_counter_query_clear_op; uvm_hal_get_time_t get_time; }; diff --git a/kernel-open/nvidia-uvm/uvm_hal_types.h b/kernel-open/nvidia-uvm/uvm_hal_types.h index 0763e7115..f84289c14 100644 --- a/kernel-open/nvidia-uvm/uvm_hal_types.h +++ b/kernel-open/nvidia-uvm/uvm_hal_types.h @@ -471,6 +471,13 @@ static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2) return max(membar_1, membar_2); } +typedef enum +{ + UVM_ACCESS_COUNTER_CLEAR_OP_NONE = 0, + UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED, + UVM_ACCESS_COUNTER_CLEAR_OP_ALL +} uvm_access_counter_clear_op_t; + struct uvm_access_counter_buffer_entry_struct { // Address of the region for which a notification was sent diff --git a/kernel-open/nvidia-uvm/uvm_hmm.c b/kernel-open/nvidia-uvm/uvm_hmm.c index ff24a0127..b6bcb40a3 100644 --- a/kernel-open/nvidia-uvm/uvm_hmm.c +++ b/kernel-open/nvidia-uvm/uvm_hmm.c @@ -1992,7 +1992,7 @@ static void fill_dst_pfn(uvm_va_block_t *va_block, dpage = pfn_to_page(pfn); UVM_ASSERT(is_device_private_page(dpage)); - UVM_ASSERT(dpage->pgmap->owner == &g_uvm_global); + UVM_ASSERT(page_pgmap(dpage)->owner == &g_uvm_global); hmm_mark_gpu_chunk_referenced(va_block, gpu, gpu_chunk); UVM_ASSERT(!page_count(dpage)); @@ -2438,6 +2438,39 @@ static void hmm_release_atomic_pages(uvm_va_block_t *va_block, } } +static int hmm_make_device_exclusive_range(struct mm_struct *mm, + unsigned long start, + unsigned long end, + struct page **pages) +{ +#if NV_IS_EXPORT_SYMBOL_PRESENT_make_device_exclusive + unsigned long addr; + int npages = 0; + + for (addr = start; addr < end; addr += PAGE_SIZE) { + struct folio *folio; + struct page *page; + + page = make_device_exclusive(mm, addr, &g_uvm_global, &folio); + if (IS_ERR(page)) { + while (npages) { + page = pages[--npages]; + unlock_page(page); + put_page(page); + } + npages = PTR_ERR(page); + break; + } + + pages[npages++] = page; + } + + return npages; +#else + return make_device_exclusive_range(mm, start, end, pages, &g_uvm_global); +#endif +} + static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id, uvm_va_block_t *va_block, uvm_va_block_retry_t *va_block_retry, @@ -2491,11 +2524,10 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id, uvm_mutex_unlock(&va_block->lock); - npages = make_device_exclusive_range(service_context->block_context->mm, + npages = hmm_make_device_exclusive_range(service_context->block_context->mm, uvm_va_block_cpu_page_address(va_block, region.first), uvm_va_block_cpu_page_address(va_block, region.outer - 1) + PAGE_SIZE, - pages + region.first, - &g_uvm_global); + pages + region.first); uvm_mutex_lock(&va_block->lock); diff --git a/kernel-open/nvidia-uvm/uvm_linux.h b/kernel-open/nvidia-uvm/uvm_linux.h index 373345507..fc422de0f 100644 --- a/kernel-open/nvidia-uvm/uvm_linux.h +++ b/kernel-open/nvidia-uvm/uvm_linux.h @@ -427,4 +427,8 @@ static inline pgprot_t uvm_pgprot_decrypted(pgprot_t prot) #define UVM_FOR_EACH_SGTABLE_DMA_PAGE_PRESENT() 1 #endif +#ifndef NV_PAGE_PGMAP_PRESENT +#define page_pgmap(page) (page)->pgmap +#endif + #endif // _UVM_LINUX_H diff --git a/kernel-open/nvidia-uvm/uvm_lock.c b/kernel-open/nvidia-uvm/uvm_lock.c index 1ee17044f..5ba3face9 100644 --- a/kernel-open/nvidia-uvm/uvm_lock.c +++ b/kernel-open/nvidia-uvm/uvm_lock.c @@ -27,7 +27,7 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order) { - BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 37); + BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 38); switch (lock_order) { UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID); @@ -58,6 +58,7 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order) UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_PMA); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_ROOT_CHUNK); + UVM_ENUM_STRING_CASE(UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHANNEL); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_WLC_CHANNEL); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST); @@ -352,15 +353,7 @@ bool __uvm_thread_check_all_unlocked(void) NV_STATUS uvm_bit_locks_init(uvm_bit_locks_t *bit_locks, size_t count, uvm_lock_order_t lock_order) { - // TODO: Bug 1772140: Notably bit locks currently do not work on memory - // allocated through vmalloc() (including big allocations created with - // uvm_kvmalloc()). The problem is the bit_waitqueue() helper used by the - // kernel internally that uses virt_to_page(). - // To prevent us from using kmalloc() for a huge allocation, warn if the - // allocation size gets bigger than what we are comfortable with for - // kmalloc() in uvm_kvmalloc(). size_t size = sizeof(unsigned long) * BITS_TO_LONGS(count); - WARN_ON_ONCE(size > UVM_KMALLOC_THRESHOLD); bit_locks->bits = kzalloc(size, NV_UVM_GFP_FLAGS); if (!bit_locks->bits) diff --git a/kernel-open/nvidia-uvm/uvm_lock.h b/kernel-open/nvidia-uvm/uvm_lock.h index 2371e0f8c..6286f50e7 100644 --- a/kernel-open/nvidia-uvm/uvm_lock.h +++ b/kernel-open/nvidia-uvm/uvm_lock.h @@ -432,6 +432,11 @@ // Order: UVM_LOCK_ORDER_PMM_ROOT_CHUNK // Exclusive bitlock (mutex) per each root chunk internal to PMM. // +// - Access counters clear operations +// Order: UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS +// +// It protects the parent_gpu's access counters clear tracker. +// // - Channel lock // Order: UVM_LOCK_ORDER_CHANNEL // Spinlock (uvm_spinlock_t) or exclusive lock (mutex) @@ -477,7 +482,7 @@ // // CE semaphore payloads are encrypted, and require to take the CSL lock // (UVM_LOCK_ORDER_LEAF) to decrypt the payload. - +// // - CSL Context // Order: UVM_LOCK_ORDER_CSL_CTX // When the Confidential Computing feature is enabled, encrypt/decrypt @@ -523,6 +528,7 @@ typedef enum UVM_LOCK_ORDER_PMM, UVM_LOCK_ORDER_PMM_PMA, UVM_LOCK_ORDER_PMM_ROOT_CHUNK, + UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS, UVM_LOCK_ORDER_CHANNEL, UVM_LOCK_ORDER_WLC_CHANNEL, UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST, diff --git a/kernel-open/nvidia-uvm/uvm_maxwell_host.c b/kernel-open/nvidia-uvm/uvm_maxwell_host.c index 8b580fd4b..6caee098b 100644 --- a/kernel-open/nvidia-uvm/uvm_maxwell_host.c +++ b/kernel-open/nvidia-uvm/uvm_maxwell_host.c @@ -336,6 +336,15 @@ void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push, UVM_ASSERT_MSG(false, "host access_counter_clear_targeted called on Maxwell GPU\n"); } +uvm_access_counter_clear_op_t +uvm_hal_maxwell_access_counter_query_clear_op_unsupported(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries) +{ + UVM_ASSERT_MSG(false, "host access_counter_query_clear_op called on Maxwell GPU\n"); + return UVM_ACCESS_COUNTER_CLEAR_OP_NONE; +} + NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu) { NvU32 time0; diff --git a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c index fb3dfdfc1..9da6bbc27 100644 --- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c +++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c @@ -419,8 +419,9 @@ static void chunk_pin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk) // The passed-in subchunk is not the root chunk so the root chunk has to be // split. - UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, "chunk state %s\n", - uvm_pmm_gpu_chunk_state_string(chunk->state)); + UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, + "chunk state %s\n", + uvm_pmm_gpu_chunk_state_string(chunk->state)); chunk->suballoc->pinned_leaf_chunks++; } @@ -448,8 +449,9 @@ static void chunk_unpin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_pmm_gpu_ // The passed-in subchunk is not the root chunk so the root chunk has to be // split. - UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, "chunk state %s\n", - uvm_pmm_gpu_chunk_state_string(chunk->state)); + UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, + "chunk state %s\n", + uvm_pmm_gpu_chunk_state_string(chunk->state)); UVM_ASSERT(chunk->suballoc->pinned_leaf_chunks != 0); chunk->suballoc->pinned_leaf_chunks--; @@ -774,8 +776,10 @@ static bool assert_chunk_mergeable(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk) UVM_ASSERT(chunk->suballoc->allocated == 0); } else { - UVM_ASSERT_MSG(chunk->suballoc->allocated == num_subchunks(chunk), "%u != %u\n", - chunk->suballoc->allocated, num_subchunks(chunk)); + UVM_ASSERT_MSG(chunk->suballoc->allocated == num_subchunks(chunk), + "%u != %u\n", + chunk->suballoc->allocated, + num_subchunks(chunk)); } return true; @@ -1263,11 +1267,13 @@ static NV_STATUS find_and_retain_va_block_to_evict(uvm_pmm_gpu_t *pmm, uvm_gpu_c uvm_spin_lock(&pmm->list_lock); - // All free chunks should have been pinned already by pin_free_chunks_func(). + // All free chunks should have been pinned already by + // pin_free_chunks_func(). UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED || chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED || chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, - "state %s\n", uvm_pmm_gpu_chunk_state_string(chunk->state)); + "state %s\n", + uvm_pmm_gpu_chunk_state_string(chunk->state)); if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) { UVM_ASSERT(chunk->va_block); @@ -1754,8 +1760,10 @@ static NV_STATUS alloc_chunk_with_splits(uvm_pmm_gpu_t *pmm, UVM_ASSERT(chunk->parent->suballoc); UVM_ASSERT(uvm_gpu_chunk_get_size(chunk->parent) == uvm_chunk_find_next_size(chunk_sizes, cur_size)); UVM_ASSERT(chunk->parent->type == type); - UVM_ASSERT_MSG(chunk->parent->suballoc->allocated <= num_subchunks(chunk->parent), "allocated %u num %u\n", - chunk->parent->suballoc->allocated, num_subchunks(chunk->parent)); + UVM_ASSERT_MSG(chunk->parent->suballoc->allocated <= num_subchunks(chunk->parent), + "allocated %u num %u\n", + chunk->parent->suballoc->allocated, + num_subchunks(chunk->parent)); } if (cur_size == chunk_size) { @@ -2373,8 +2381,8 @@ static void free_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk) try_free = is_root; } else { - // Freeing a chunk can only fail if it requires merging. Take the PMM lock - // and free it with merges supported. + // Freeing a chunk can only fail if it requires merging. Take the PMM + // lock and free it with merges supported. uvm_mutex_lock(&pmm->lock); free_chunk_with_merges(pmm, chunk); uvm_mutex_unlock(&pmm->lock); @@ -3333,7 +3341,7 @@ void uvm_pmm_gpu_device_p2p_init(uvm_gpu_t *gpu) // TODO: Bug 4672502: [Linux Upstream][UVM] Allow drivers to manage and // allocate PCI P2PDMA pages directly p2p_page = pfn_to_page(pci_start_pfn); - p2p_page->pgmap->ops = &uvm_device_p2p_pgmap_ops; + page_pgmap(p2p_page)->ops = &uvm_device_p2p_pgmap_ops; for (; page_to_pfn(p2p_page) < pci_end_pfn; p2p_page++) p2p_page->zone_device_data = NULL; @@ -3348,7 +3356,7 @@ void uvm_pmm_gpu_device_p2p_deinit(uvm_gpu_t *gpu) if (gpu->device_p2p_initialised && !uvm_parent_gpu_is_coherent(gpu->parent)) { p2p_page = pfn_to_page(pci_start_pfn); - devm_memunmap_pages(&gpu->parent->pci_dev->dev, p2p_page->pgmap); + devm_memunmap_pages(&gpu->parent->pci_dev->dev, page_pgmap(p2p_page)); } gpu->device_p2p_initialised = false; @@ -3437,6 +3445,7 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm) for (i = 0; i < UVM_PMM_GPU_MEMORY_TYPE_COUNT; i++) { pmm->chunk_sizes[i] = 0; + // Add the common root chunk size to all memory types pmm->chunk_sizes[i] |= UVM_CHUNK_SIZE_MAX; for (j = 0; j < ARRAY_SIZE(chunk_size_init); j++) @@ -3444,7 +3453,9 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm) UVM_ASSERT(pmm->chunk_sizes[i] < UVM_CHUNK_SIZE_INVALID); UVM_ASSERT_MSG(hweight_long(pmm->chunk_sizes[i]) <= UVM_MAX_CHUNK_SIZES, - "chunk sizes %lu, max chunk sizes %u\n", hweight_long(pmm->chunk_sizes[i]), UVM_MAX_CHUNK_SIZES); + "chunk sizes %lu, max chunk sizes %u\n", + hweight_long(pmm->chunk_sizes[i]), + UVM_MAX_CHUNK_SIZES); } status = init_caches(pmm); @@ -3452,9 +3463,9 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm) goto cleanup; // Assert that max physical address of the GPU is not unreasonably big for - // creating the flat array of root chunks. 256GB should provide a reasonable - // amount of future-proofing and results in 128K chunks which is still - // manageable. + // creating the flat array of root chunks. UVM_GPU_MAX_PHYS_MEM should + // provide a reasonable amount of future-proofing and results in 512K chunks + // which is still manageable. UVM_ASSERT_MSG(gpu->mem_info.max_allocatable_address < UVM_GPU_MAX_PHYS_MEM, "Max physical address 0x%llx exceeds limit of 0x%llx\n", gpu->mem_info.max_allocatable_address, diff --git a/kernel-open/nvidia-uvm/uvm_turing_host.c b/kernel-open/nvidia-uvm/uvm_turing_host.c index 0d31e3b89..2363d36b5 100644 --- a/kernel-open/nvidia-uvm/uvm_turing_host.c +++ b/kernel-open/nvidia-uvm/uvm_turing_host.c @@ -1,5 +1,5 @@ /******************************************************************************* - Copyright (c) 2017-2024 NVIDIA Corporation + Copyright (c) 2017-2025 NVIDIA Corporation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to @@ -382,3 +382,11 @@ void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push, HWCONST(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_TYPE, MIMC) | HWVALUE(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_BANK, buffer_entry->bank)); } + +uvm_access_counter_clear_op_t +uvm_hal_turing_access_counter_query_clear_op(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries) +{ + return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED; +} diff --git a/kernel-open/nvidia/nv-acpi.c b/kernel-open/nvidia/nv-acpi.c index 48efa1ffd..94ab90f92 100644 --- a/kernel-open/nvidia/nv-acpi.c +++ b/kernel-open/nvidia/nv-acpi.c @@ -133,6 +133,7 @@ NV_STATUS NV_API_CALL nv_acpi_get_powersource(NvU32 *ac_plugged) return NV_OK; } +#define ACPI_POWER_SOURCE_BUS_CHANGE_EVENT 0x00 #define ACPI_POWER_SOURCE_CHANGE_EVENT 0x80 static void nv_acpi_powersource_hotplug_event(acpi_handle handle, u32 event_type, void *data) { @@ -143,7 +144,7 @@ static void nv_acpi_powersource_hotplug_event(acpi_handle handle, u32 event_type nv_acpi_t *pNvAcpiObject = data; u32 ac_plugged = 0; - if (event_type == ACPI_POWER_SOURCE_CHANGE_EVENT) + if (event_type == ACPI_POWER_SOURCE_CHANGE_EVENT || event_type == ACPI_POWER_SOURCE_BUS_CHANGE_EVENT) { if (nv_acpi_get_powersource(&ac_plugged) != NV_OK) return; @@ -201,8 +202,16 @@ static nv_acpi_t* nv_install_notifier( pNvAcpiObject->sp = sp; pNvAcpiObject->notifier_data = notifier_data; - status = acpi_install_notify_handler(handle, ACPI_DEVICE_NOTIFY, - handler, pNvAcpiObject); + if (handle == psr_device_handle) + { + status = acpi_install_notify_handler(handle, ACPI_ALL_NOTIFY, + handler, pNvAcpiObject); + } + else + { + status = acpi_install_notify_handler(handle, ACPI_DEVICE_NOTIFY, + handler, pNvAcpiObject); + } if (!ACPI_FAILURE(status)) { pNvAcpiObject->notify_handler_installed = 1; @@ -225,7 +234,14 @@ static void nv_uninstall_notifier(nv_acpi_t *pNvAcpiObject, acpi_notify_handler if (pNvAcpiObject && pNvAcpiObject->notify_handler_installed) { - status = acpi_remove_notify_handler(pNvAcpiObject->handle, ACPI_DEVICE_NOTIFY, handler); + if (pNvAcpiObject->handle == psr_device_handle) + { + status = acpi_remove_notify_handler(pNvAcpiObject->handle, ACPI_ALL_NOTIFY, handler); + } + else + { + status = acpi_remove_notify_handler(pNvAcpiObject->handle, ACPI_DEVICE_NOTIFY, handler); + } if (ACPI_FAILURE(status)) { nv_printf(NV_DBG_INFO, diff --git a/kernel-open/nvidia/nv-nano-timer.c b/kernel-open/nvidia/nv-nano-timer.c index 0ca78e1f3..47e9a8cbe 100644 --- a/kernel-open/nvidia/nv-nano-timer.c +++ b/kernel-open/nvidia/nv-nano-timer.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -150,8 +150,13 @@ void NV_API_CALL nv_create_nano_timer( nv_nstimer->nv_nano_timer_callback = nvidia_nano_timer_callback; #if NV_NANO_TIMER_USE_HRTIMER +#if NV_IS_EXPORT_SYMBOL_PRESENT_hrtimer_setup + hrtimer_setup(&nv_nstimer->hr_timer, &nv_nano_timer_callback_typed_data, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); +#else hrtimer_init(&nv_nstimer->hr_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); nv_nstimer->hr_timer.function = nv_nano_timer_callback_typed_data; +#endif // NV_IS_EXPORT_SYMBOL_PRESENT_hrtimer_setup #else #if defined(NV_TIMER_SETUP_PRESENT) timer_setup(&nv_nstimer->jiffy_timer, nv_jiffy_timer_callback_typed_data, 0); @@ -203,7 +208,7 @@ void NV_API_CALL nv_cancel_nano_timer( #if NV_NANO_TIMER_USE_HRTIMER hrtimer_cancel(&nv_nstimer->hr_timer); #else - del_timer_sync(&nv_nstimer->jiffy_timer); + nv_timer_delete_sync(&nv_nstimer->jiffy_timer); #endif } diff --git a/kernel-open/nvidia/nv-pci.c b/kernel-open/nvidia/nv-pci.c index 42c5dc6a2..6210ba134 100644 --- a/kernel-open/nvidia/nv-pci.c +++ b/kernel-open/nvidia/nv-pci.c @@ -614,19 +614,6 @@ nv_pci_probe } #endif /* NV_PCI_SRIOV_SUPPORT */ - if (!rm_wait_for_bar_firewall( - sp, - NV_PCI_DOMAIN_NUMBER(pci_dev), - NV_PCI_BUS_NUMBER(pci_dev), - NV_PCI_SLOT_NUMBER(pci_dev), - PCI_FUNC(pci_dev->devfn), - pci_dev->device)) - { - nv_printf(NV_DBG_ERRORS, - "NVRM: failed to wait for bar firewall to lower\n"); - goto failed; - } - if (!rm_is_supported_pci_device( (pci_dev->class >> 16) & 0xFF, (pci_dev->class >> 8) & 0xFF, diff --git a/kernel-open/nvidia/nv.c b/kernel-open/nvidia/nv.c index a3a36d3cc..cf5a3258a 100644 --- a/kernel-open/nvidia/nv.c +++ b/kernel-open/nvidia/nv.c @@ -4189,7 +4189,7 @@ int NV_API_CALL nv_stop_rc_timer( nv_printf(NV_DBG_INFO, "NVRM: stopping rc timer\n"); nv->rc_timer_enabled = 0; - del_timer_sync(&nvl->rc_timer.kernel_timer); + nv_timer_delete_sync(&nvl->rc_timer.kernel_timer); nv_printf(NV_DBG_INFO, "NVRM: rc timer stopped\n"); return 0; @@ -4233,7 +4233,7 @@ void NV_API_CALL nv_stop_snapshot_timer(void) NV_SPIN_UNLOCK_IRQRESTORE(&nvl->snapshot_timer_lock, flags); if (timer_active) - del_timer_sync(&nvl->snapshot_timer.kernel_timer); + nv_timer_delete_sync(&nvl->snapshot_timer.kernel_timer); } void NV_API_CALL nv_flush_snapshot_timer(void) diff --git a/kernel-open/nvidia/nvidia.Kbuild b/kernel-open/nvidia/nvidia.Kbuild index 29f4adea0..ccc047e4d 100644 --- a/kernel-open/nvidia/nvidia.Kbuild +++ b/kernel-open/nvidia/nvidia.Kbuild @@ -238,6 +238,8 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_set_memory_encrypted NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_set_memory_decrypted NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl___platform_driver_register NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present___platform_driver_register +NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_hrtimer_setup +NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync NV_CONFTEST_TYPE_COMPILE_TESTS += dma_ops NV_CONFTEST_TYPE_COMPILE_TESTS += swiotlb_dma_ops @@ -266,6 +268,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += of_property_for_each_u32_has_internal_args NV_CONFTEST_TYPE_COMPILE_TESTS += platform_driver_struct_remove_returns_void NV_CONFTEST_TYPE_COMPILE_TESTS += class_create_has_no_owner_arg NV_CONFTEST_TYPE_COMPILE_TESTS += class_devnode_has_const_arg +NV_CONFTEST_TYPE_COMPILE_TESTS += has_enum_pidtype_tgid NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build diff --git a/kernel-open/nvidia/os-interface.c b/kernel-open/nvidia/os-interface.c index 9657c51d9..ef28c90ca 100644 --- a/kernel-open/nvidia/os-interface.c +++ b/kernel-open/nvidia/os-interface.c @@ -2672,7 +2672,11 @@ NV_STATUS NV_API_CALL os_offline_page_at_address void* NV_API_CALL os_get_pid_info(void) { - return get_task_pid(current, PIDTYPE_PID); +#if defined(NV_HAS_ENUM_PIDTYPE_TGID) + return get_task_pid(current, PIDTYPE_TGID); +#else + return get_task_pid(current->group_leader, PIDTYPE_PID); +#endif } void NV_API_CALL os_put_pid_info(void *pid_info) diff --git a/src/common/displayport/inc/dp_connector.h b/src/common/displayport/inc/dp_connector.h index f9220cad2..939af2682 100644 --- a/src/common/displayport/inc/dp_connector.h +++ b/src/common/displayport/inc/dp_connector.h @@ -333,6 +333,9 @@ namespace DisplayPort virtual bool getDeviceSpecificData(NvU8 *oui, NvU8 *deviceIdString, NvU8 *hwRevision, NvU8 *swMajorRevision, NvU8 *swMinorRevision) = 0; + virtual bool getParentSpecificData(NvU8 *oui, NvU8 *deviceIdString, + NvU8 *hwRevision, NvU8 *swMajorRevision, + NvU8 *swMinorRevision) = 0; virtual bool setModeList(DisplayPort::DpModesetParams *pModeList, unsigned numModes) = 0; diff --git a/src/common/displayport/inc/dp_deviceimpl.h b/src/common/displayport/inc/dp_deviceimpl.h index 8cc9ac24f..0fad3ed7a 100644 --- a/src/common/displayport/inc/dp_deviceimpl.h +++ b/src/common/displayport/inc/dp_deviceimpl.h @@ -508,6 +508,9 @@ namespace DisplayPort virtual bool getDeviceSpecificData(NvU8 *oui, NvU8 *deviceIdString, NvU8 *hwRevision, NvU8 *swMajorRevision, NvU8 *swMinorRevision); + virtual bool getParentSpecificData(NvU8 *oui, NvU8 *deviceIdString, + NvU8 *hwRevision, NvU8 *swMajorRevision, + NvU8 *swMinorRevision); virtual bool setModeList(DisplayPort::DpModesetParams *pModeList, unsigned numModes); }; diff --git a/src/common/displayport/src/dp_connectorimpl2x.cpp b/src/common/displayport/src/dp_connectorimpl2x.cpp index 77db61343..c984086f1 100644 --- a/src/common/displayport/src/dp_connectorimpl2x.cpp +++ b/src/common/displayport/src/dp_connectorimpl2x.cpp @@ -561,12 +561,22 @@ bool ConnectorImpl2x::compoundQueryAttachMSTGeneric(Group * target, tail->bandwidth.compound_query_state.totalTimeSlots) { compoundQueryResult = false; + if(this->bEnableLowerBppCheckForDsc) + { + tail->bandwidth.compound_query_state.timeslots_used_by_query -= linkConfig->slotsForPBN(base_pbn); + tail->bandwidth.compound_query_state.bandwidthAllocatedForIndex &= ~(1 << compoundQueryCount); + } SET_DP_IMP_ERROR(pErrorCode, DP_IMP_ERROR_INSUFFICIENT_BANDWIDTH) } } tail = (DeviceImpl*)tail->getParent(); } } + // If the compoundQueryResult is false, we need to reset the compoundQueryLocalLinkPBN + if (!compoundQueryResult && this->bEnableLowerBppCheckForDsc) + { + compoundQueryLocalLinkPBN -= slots_pbn; + } } else { @@ -1261,8 +1271,8 @@ void ConnectorImpl2x::notifyDetachBegin(Group *target) Device *newDev = target->enumDevices(0); DeviceImpl *dev = (DeviceImpl *)newDev; - GroupImpl *group = (GroupImpl*)target; - + GroupImpl *group = (GroupImpl*)target; + if (dev != NULL && dev->bApplyPclkWarBug4949066 == true) { EvoInterface *provider = ((EvoMainLink *)main)->getProvider(); @@ -1840,11 +1850,14 @@ void ConnectorImpl2x::handleEdidWARs(Edid & edid, DiscoveryManager::Device & dev if (edid.WARFlags.bDisableDscMaxBppLimit) { bDisableDscMaxBppLimit = true; - } + } if (edid.WARFlags.bForceHeadShutdownOnModeTransition) { bForceHeadShutdownOnModeTransition = true; } + if (edid.WARFlags.bDisableDownspread) + { + setDisableDownspread(true); + } } - diff --git a/src/common/displayport/src/dp_deviceimpl.cpp b/src/common/displayport/src/dp_deviceimpl.cpp index 484f8ddc5..3a7daab6b 100644 --- a/src/common/displayport/src/dp_deviceimpl.cpp +++ b/src/common/displayport/src/dp_deviceimpl.cpp @@ -3232,6 +3232,19 @@ bool DeviceImpl::getDeviceSpecificData(NvU8 *oui, NvU8 *devIdString, return true; } +bool DeviceImpl::getParentSpecificData(NvU8 *oui, NvU8 *devIdString, + NvU8 *hwRevision, NvU8 *swMajorRevision, + NvU8 *swMinorRevision) +{ + if (this->parent == NULL) + { + return false; + } + + return this->parent->getDeviceSpecificData(oui, devIdString, hwRevision, + swMajorRevision, swMinorRevision); +} + bool DeviceImpl::setModeList(DisplayPort::DpModesetParams *modeList, unsigned numModes) { // Create a dummy group for compoundQuery diff --git a/src/common/displayport/src/dp_wardatabase.cpp b/src/common/displayport/src/dp_wardatabase.cpp index ac7c92f9e..99b9f15c9 100644 --- a/src/common/displayport/src/dp_wardatabase.cpp +++ b/src/common/displayport/src/dp_wardatabase.cpp @@ -600,18 +600,18 @@ void Edid::applyEdidWorkArounds(NvU32 warFlag, const DpMonitorDenylistData *pDen { this->WARFlags.bDisableDscMaxBppLimit = true; DP_PRINTF(DP_NOTICE, "DP-WAR> Disable DSC max BPP limit of 16 for DSC."); - } - else if (ProductID == 0x5CA7) + } + else if (ProductID == 0x5CA7 || ProductID == 0x9E9D || ProductID == 0x9EA0 || ProductID == 0x9EA5 || ProductID == 0x5CB7 || + ProductID == 0x9EA8 || ProductID == 0x9EAF || ProductID == 0x7846 || ProductID == 0x7849 || ProductID == 0x5CB5) { this->WARFlags.bForceHeadShutdownOnModeTransition = true; DP_PRINTF(DP_NOTICE, "DP-WAR> Force head shutdown on Mode transition."); } break; - // Gigabyte - case 0x541C: - if (ProductID == 0x3215) + case 0xB306: + if (ProductID == 0x3228) { - // Gigabyte AORUS FO32U2P does not set DPCD 0x2217 to reflect correct CableID. + // ASUS PG32UQXR does not set DPCD 0x2217 to reflect correct CableID. this->WARFlags.bSkipCableIdCheck = true; DP_PRINTF(DP_NOTICE, "DP-WAR> Panel does not expose cable capability. Ignoring it. Bug 4968411"); } @@ -643,7 +643,13 @@ void Edid::applyEdidWorkArounds(NvU32 warFlag, const DpMonitorDenylistData *pDen this->WARFlags.bDisableDownspread = true; } break; - + case 0x545A: // VRT Varjo XR3 + if (ProductID == 0x5841 || ProductID == 0x5842 || ProductID == 0x5843) + { + this->WARFlags.bDisableDownspread = true; + DP_PRINTF(DP_NOTICE, "DP-WAR> VRT monitor does not work with GB20x when downspread is enabled. Disabling downspread."); + } + break; default: break; } diff --git a/src/common/inc/nvBldVer.h b/src/common/inc/nvBldVer.h index 03791690d..99b7ce51a 100644 --- a/src/common/inc/nvBldVer.h +++ b/src/common/inc/nvBldVer.h @@ -36,25 +36,25 @@ // and then checked back in. You cannot make changes to these sections without // corresponding changes to the buildmeister script #ifndef NV_BUILD_BRANCH - #define NV_BUILD_BRANCH r575_92 + #define NV_BUILD_BRANCH r576_41 #endif #ifndef NV_PUBLIC_BRANCH - #define NV_PUBLIC_BRANCH r575_92 + #define NV_PUBLIC_BRANCH r576_41 #endif #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) -#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r575/r575_92-148" -#define NV_BUILD_CHANGELIST_NUM (35843743) +#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r575/r576_41-183" +#define NV_BUILD_CHANGELIST_NUM (36029171) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "rel/gpu_drv/r575/r575_92-148" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35843743) +#define NV_BUILD_NAME "rel/gpu_drv/r575/r576_41-183" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36029171) #else /* Windows builds */ -#define NV_BUILD_BRANCH_VERSION "r575_92-7" -#define NV_BUILD_CHANGELIST_NUM (35824637) +#define NV_BUILD_BRANCH_VERSION "r576_41-8" +#define NV_BUILD_CHANGELIST_NUM (36020778) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "576.02" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35824637) +#define NV_BUILD_NAME "576.60" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36020778) #define NV_BUILD_BRANCH_BASE_VERSION R575 #endif // End buildmeister python edited section diff --git a/src/common/inc/nvUnixVersion.h b/src/common/inc/nvUnixVersion.h index 57ee7d026..61809a8b8 100644 --- a/src/common/inc/nvUnixVersion.h +++ b/src/common/inc/nvUnixVersion.h @@ -4,7 +4,7 @@ #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \ (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1) -#define NV_VERSION_STRING "575.51.03" +#define NV_VERSION_STRING "575.57.08" #else diff --git a/src/common/nvswitch/kernel/ls10/minion_ls10.c b/src/common/nvswitch/kernel/ls10/minion_ls10.c index 69cfe07f6..e40370682 100644 --- a/src/common/nvswitch/kernel/ls10/minion_ls10.c +++ b/src/common/nvswitch/kernel/ls10/minion_ls10.c @@ -941,7 +941,6 @@ cleanup: if (tempStatus != NVL_SUCCESS) { NVSWITCH_PRINT(device, ERROR, "Link %d Inband Buffer transfer for RX_BUFFER_CLEAR\n", linkId); - return; } if (device->link[linkId].inbandData.message != NULL) { diff --git a/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000system.h b/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000system.h index 5fb371d58..cbc5ed66e 100644 --- a/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000system.h +++ b/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000system.h @@ -2139,6 +2139,16 @@ typedef struct NV0000_CTRL_CMD_SYSTEM_GET_SYSTEM_POWER_LIMIT { NvU32 shortTimescaleBatteryCurrentLimitmA; } NV0000_CTRL_CMD_SYSTEM_GET_SYSTEM_POWER_LIMIT; +/*! + * States for the Battery CPU TDP Control ability. + * _CPU_TDP_CONTROL_TYPE_DC_ONLY :==> Legacy setting for DC only CPU TDP Control + * _CPU_TDP_CONTROL_TYPE_DC_AC :==> AC and DC both support CPU TDP Control + */ +typedef enum QBOOST_CPU_TDP_CONTROL_TYPE { + QBOOST_CPU_TDP_CONTROL_TYPE_DC_ONLY = 0, + QBOOST_CPU_TDP_CONTROL_TYPE_DC_AC = 1, +} QBOOST_CPU_TDP_CONTROL_TYPE; + #define NV0000_CTRL_SYSTEM_NVPCF_GET_POWER_MODE_INFO_PARAMS_MESSAGE_ID (0x3BU) typedef struct NV0000_CTRL_SYSTEM_NVPCF_GET_POWER_MODE_INFO_PARAMS { @@ -2277,6 +2287,9 @@ typedef struct NV0000_CTRL_SYSTEM_NVPCF_GET_POWER_MODE_INFO_PARAMS { /* CPU TDP Limit to be set (milliwatts) */ NvU32 cpuTdpmw; + + /* CPU TDP Control Support */ + QBOOST_CPU_TDP_CONTROL_TYPE cpuTdpControlType; } NV0000_CTRL_SYSTEM_NVPCF_GET_POWER_MODE_INFO_PARAMS; /* Define the filter types */ diff --git a/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080bus.h b/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080bus.h index d6b05ff37..00d93849a 100644 --- a/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080bus.h +++ b/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080bus.h @@ -1694,3 +1694,5 @@ typedef struct NV2080_CTRL_CMD_BUS_SET_C2C_LPWR_STATE_VOTE_PARAMS { NvU32 c2cLpwrStateId; NvBool bAllowed; } NV2080_CTRL_CMD_BUS_SET_C2C_LPWR_STATE_VOTE_PARAMS; + + diff --git a/src/common/sdk/nvidia/inc/nvos.h b/src/common/sdk/nvidia/inc/nvos.h index c84de7282..69415b27d 100644 --- a/src/common/sdk/nvidia/inc/nvos.h +++ b/src/common/sdk/nvidia/inc/nvos.h @@ -79,6 +79,7 @@ extern "C" { #define NVOS_STATUS_ERROR_ILLEGAL_ACTION NV_ERR_ILLEGAL_ACTION #define NVOS_STATUS_ERROR_IN_USE NV_ERR_STATE_IN_USE #define NVOS_STATUS_ERROR_INSUFFICIENT_RESOURCES NV_ERR_INSUFFICIENT_RESOURCES +#define NVOS_STATUS_ERROR_INSUFFICIENT_ZBC_ENTRY NV_ERR_INSUFFICIENT_ZBC_ENTRY #define NVOS_STATUS_ERROR_INVALID_ACCESS_TYPE NV_ERR_INVALID_ACCESS_TYPE #define NVOS_STATUS_ERROR_INVALID_ARGUMENT NV_ERR_INVALID_ARGUMENT #define NVOS_STATUS_ERROR_INVALID_BASE NV_ERR_INVALID_BASE diff --git a/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c b/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c index 031f59d41..f92a35796 100644 --- a/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c +++ b/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c @@ -569,7 +569,7 @@ static int libos_printf_a( // Prefix every line with T:nnnn GPUn Ucode-task: filename(lineNumber): len = snprintf( logDecode->curLineBufPtr, remain, - "T:%llu ", pRec->timeStamp); + "T:%llu ", pRec->timeStamp + pRec->log->localToGlobalTimerDelta); if (len < 0) { return -1; @@ -590,7 +590,7 @@ static int libos_printf_a( { struct tm tmStruct; // Libos timestamp is a PTIMER value, which is UNIX time in ns - time_t timeSec = pRec->timeStamp / 1000000000; + time_t timeSec = (pRec->timeStamp + pRec->log->localToGlobalTimerDelta) / 1000000000; #if NVOS_IS_WINDOWS // "The implementation of localtime_s in Microsoft CRT is incompatible with the // C standard since it has reversed parameter order and returns errno_t." @@ -1324,7 +1324,6 @@ static void libosExtractLogs_decode(LIBOS_LOG_DECODE *logDecode) recSize * sizeof(NvU64)); pPrevRec = (LIBOS_LOG_DECODE_RECORD *)&logDecode->scratchBuffer[dst]; - pPrevRec->timeStamp += pLog->localToGlobalTimerDelta; } // Read in the next record from the log we just copied. diff --git a/src/nvidia/arch/nvalloc/unix/include/nv.h b/src/nvidia/arch/nvalloc/unix/include/nv.h index bdbefe9a3..ba9907d27 100644 --- a/src/nvidia/arch/nvalloc/unix/include/nv.h +++ b/src/nvidia/arch/nvalloc/unix/include/nv.h @@ -961,7 +961,6 @@ NvBool NV_API_CALL rm_isr (nvidia_stack_t *, nv_state_t * void NV_API_CALL rm_isr_bh (nvidia_stack_t *, nv_state_t *); void NV_API_CALL rm_isr_bh_unlocked (nvidia_stack_t *, nv_state_t *); NvBool NV_API_CALL rm_is_msix_allowed (nvidia_stack_t *, nv_state_t *); -NvBool NV_API_CALL rm_wait_for_bar_firewall (nvidia_stack_t *, NvU32 domain, NvU8 bus, NvU8 device, NvU8 function, NvU16 devId); NV_STATUS NV_API_CALL rm_power_management (nvidia_stack_t *, nv_state_t *, nv_pm_action_t); NV_STATUS NV_API_CALL rm_stop_user_channels (nvidia_stack_t *, nv_state_t *); NV_STATUS NV_API_CALL rm_restart_user_channels (nvidia_stack_t *, nv_state_t *); diff --git a/src/nvidia/arch/nvalloc/unix/src/osapi.c b/src/nvidia/arch/nvalloc/unix/src/osapi.c index 49ea01656..a6494dfa8 100644 --- a/src/nvidia/arch/nvalloc/unix/src/osapi.c +++ b/src/nvidia/arch/nvalloc/unix/src/osapi.c @@ -5979,24 +5979,3 @@ void NV_API_CALL rm_notify_gpu_removal( rm_notify_gpu_addition_removal_helper(nv, NV_FALSE); NV_EXIT_RM_RUNTIME(sp,fp); } - -NvBool NV_API_CALL rm_wait_for_bar_firewall( - nvidia_stack_t * sp, - NvU32 domain, - NvU8 bus, - NvU8 device, - NvU8 function, - NvU16 devId -) -{ - NvBool ret; - void *fp = NULL; - - // no state set up yet for threadstate or RM locks - NV_ENTER_RM_RUNTIME(sp,fp); - ret = gpumgrWaitForBarFirewall(domain, bus, device, function, devId); - NV_EXIT_RM_RUNTIME(sp,fp); - - return ret; -} - diff --git a/src/nvidia/exports_link_command.txt b/src/nvidia/exports_link_command.txt index cb1c510fe..4c6fc8ce7 100644 --- a/src/nvidia/exports_link_command.txt +++ b/src/nvidia/exports_link_command.txt @@ -23,7 +23,6 @@ --undefined=rm_isr_bh --undefined=rm_isr_bh_unlocked --undefined=rm_is_msix_allowed ---undefined=rm_wait_for_bar_firewall --undefined=rm_perform_version_check --undefined=rm_power_management --undefined=rm_stop_user_channels diff --git a/src/nvidia/generated/g_gpu_access_nvoc.h b/src/nvidia/generated/g_gpu_access_nvoc.h index 8ce054298..3b9b20b2d 100644 --- a/src/nvidia/generated/g_gpu_access_nvoc.h +++ b/src/nvidia/generated/g_gpu_access_nvoc.h @@ -231,6 +231,13 @@ void regCheckAndLogReadFailure(RegisterAccess *, NvU32 addr, NvU32 mask, NvU32 v // Get the address of a register given the Aperture and offset. #define REG_GET_ADDR(ap, offset) ioaprtGetRegAddr(ap, offset) +// +// These UNCHECKED macros are provided for extenuating circumstances to avoid the 0xbadf +// sanity checking done by the usual register read utilities and must not be used generally +// +#define GPU_REG_RD08_UNCHECKED(g,a) osDevReadReg008(g, gpuGetDeviceMapping(g, DEVICE_INDEX_GPU, 0), a) +#define GPU_REG_RD32_UNCHECKED(g,a) osDevReadReg032(g, gpuGetDeviceMapping(g, DEVICE_INDEX_GPU, 0), a) + // GPU macros defined in terms of DEV_ macros #define GPU_REG_RD08(g,a) REG_INST_RD08(g,GPU,0,a) #define GPU_REG_RD16(g,a) REG_INST_RD16(g,GPU,0,a) diff --git a/src/nvidia/generated/g_gpu_nvoc.c b/src/nvidia/generated/g_gpu_nvoc.c index 711c833fd..2ddc44a5f 100644 --- a/src/nvidia/generated/g_gpu_nvoc.c +++ b/src/nvidia/generated/g_gpu_nvoc.c @@ -438,6 +438,18 @@ void __nvoc_init_dataField_OBJGPU(OBJGPU *pThis) { pThis->setProperty(pThis, PDB_PROP_GPU_FASTPATH_SEQ_ENABLED, NV_FALSE); pThis->setProperty(pThis, PDB_PROP_GPU_RECOVERY_DRAIN_P2P_REQUIRED, NV_FALSE); + // NVOC Property Hal field -- PDB_PROP_GPU_REUSE_INIT_CONTING_MEM + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xe0000000UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000007e6UL) )) /* ChipHal: GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B */ + { + pThis->setProperty(pThis, PDB_PROP_GPU_REUSE_INIT_CONTING_MEM, NV_TRUE); + } + // default + else + { + pThis->setProperty(pThis, PDB_PROP_GPU_REUSE_INIT_CONTING_MEM, NV_FALSE); + } + // NVOC Property Hal field -- PDB_PROP_GPU_RUSD_POLLING_SUPPORT_MONOLITHIC if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x71f0f800UL) ) || ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000003e6UL) )) /* ChipHal: GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 */ diff --git a/src/nvidia/generated/g_gpu_nvoc.h b/src/nvidia/generated/g_gpu_nvoc.h index b2d4eb099..49d8163d0 100644 --- a/src/nvidia/generated/g_gpu_nvoc.h +++ b/src/nvidia/generated/g_gpu_nvoc.h @@ -1123,7 +1123,7 @@ struct OBJGPU { NvBool (*__gpuRequireGrCePresence__)(struct OBJGPU * /*this*/, ENGDESCRIPTOR); // halified (3 hals) body NvBool (*__gpuGetIsCmpSku__)(struct OBJGPU * /*this*/); // halified (2 hals) body - // 117 PDB properties + // 118 PDB properties NvBool PDB_PROP_GPU_HIGH_SPEED_BRIDGE_CONNECTED; NvBool PDB_PROP_GPU_IN_STANDBY; NvBool PDB_PROP_GPU_IN_HIBERNATE; @@ -1239,6 +1239,7 @@ struct OBJGPU { NvBool PDB_PROP_GPU_FASTPATH_SEQ_ENABLED; NvBool PDB_PROP_GPU_PREPARING_FULLCHIP_RESET; NvBool PDB_PROP_GPU_RECOVERY_DRAIN_P2P_REQUIRED; + NvBool PDB_PROP_GPU_REUSE_INIT_CONTING_MEM; NvBool PDB_PROP_GPU_RUSD_POLLING_SUPPORT_MONOLITHIC; NvBool PDB_PROP_GPU_RECOVERY_REBOOT_REQUIRED; @@ -1489,6 +1490,8 @@ extern const struct NVOC_CLASS_DEF __nvoc_class_def_OBJGPU; #define PDB_PROP_GPU_IS_VIRTUALIZATION_MODE_HOST_VGPU_BASE_NAME PDB_PROP_GPU_IS_VIRTUALIZATION_MODE_HOST_VGPU #define PDB_PROP_GPU_SKIP_TABLE_CE_MAP_BASE_CAST #define PDB_PROP_GPU_SKIP_TABLE_CE_MAP_BASE_NAME PDB_PROP_GPU_SKIP_TABLE_CE_MAP +#define PDB_PROP_GPU_REUSE_INIT_CONTING_MEM_BASE_CAST +#define PDB_PROP_GPU_REUSE_INIT_CONTING_MEM_BASE_NAME PDB_PROP_GPU_REUSE_INIT_CONTING_MEM #define PDB_PROP_GPU_IN_FATAL_ERROR_BASE_CAST #define PDB_PROP_GPU_IN_FATAL_ERROR_BASE_NAME PDB_PROP_GPU_IN_FATAL_ERROR #define PDB_PROP_GPU_VGA_ENABLED_BASE_CAST diff --git a/src/nvidia/generated/g_gpu_vaspace_nvoc.h b/src/nvidia/generated/g_gpu_vaspace_nvoc.h index 228fca65f..ec51fafa4 100644 --- a/src/nvidia/generated/g_gpu_vaspace_nvoc.h +++ b/src/nvidia/generated/g_gpu_vaspace_nvoc.h @@ -182,6 +182,10 @@ typedef struct GVAS_GPU_STATE * List head of 4K page cache used for suballocating BPTs */ MEMORY_DESCRIPTOR_LIST unpackedMemDescList; + /*! + * Memory pool for client page tables + */ + RM_POOL_ALLOC_MEM_RESERVE_INFO *pPageTableMemPool; /*! * Reserved page table entries for the GVA space. */ @@ -277,7 +281,6 @@ struct OBJGVASPACE { NvBool bRMInternalRestrictedVaRange; NvU64 vaStartServerRMOwned; NvU64 vaLimitServerRMOwned; - RM_POOL_ALLOC_MEM_RESERVE_INFO *pPageTableMemPool; }; diff --git a/src/nvidia/generated/g_kern_disp_nvoc.h b/src/nvidia/generated/g_kern_disp_nvoc.h index c131899ef..03daba1da 100644 --- a/src/nvidia/generated/g_kern_disp_nvoc.h +++ b/src/nvidia/generated/g_kern_disp_nvoc.h @@ -232,11 +232,12 @@ struct KernelDisplay { NV_STATUS (*__kdispGetChnStatusRegs__)(struct KernelDisplay * /*this*/, DISPCHNCLASS, NvU32, NvU32 *); // halified (2 hals) body void (*__kdispApplyWarForBug3385499__)(OBJGPU *, struct KernelDisplay * /*this*/, DISPCHNCLASS, NvU32, NvU32); // halified (2 hals) body - // 5 PDB properties + // 6 PDB properties NvBool PDB_PROP_KDISP_IMP_ALLOC_BW_IN_KERNEL_RM_DEF; NvBool PDB_PROP_KDISP_FEATURE_STRETCH_VBLANK_CAPABLE; NvBool PDB_PROP_KDISP_IN_AWAKEN_INTR; NvBool PDB_PROP_KDISP_HAS_SEPARATE_LOW_LATENCY_LINE; + NvBool PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED; // Data members struct DisplayInstanceMemory *pInst; @@ -326,6 +327,8 @@ extern const struct NVOC_CLASS_DEF __nvoc_class_def_KernelDisplay; #define PDB_PROP_KDISP_IMP_ALLOC_BW_IN_KERNEL_RM_DEF_BASE_NAME PDB_PROP_KDISP_IMP_ALLOC_BW_IN_KERNEL_RM_DEF #define PDB_PROP_KDISP_FEATURE_STRETCH_VBLANK_CAPABLE_BASE_CAST #define PDB_PROP_KDISP_FEATURE_STRETCH_VBLANK_CAPABLE_BASE_NAME PDB_PROP_KDISP_FEATURE_STRETCH_VBLANK_CAPABLE +#define PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED_BASE_CAST +#define PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED_BASE_NAME PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED NV_STATUS __nvoc_objCreateDynamic_KernelDisplay(KernelDisplay**, Dynamic*, NvU32, va_list); diff --git a/src/nvidia/generated/g_nv_name_released.h b/src/nvidia/generated/g_nv_name_released.h index 4ff0a96f7..b0325df66 100644 --- a/src/nvidia/generated/g_nv_name_released.h +++ b/src/nvidia/generated/g_nv_name_released.h @@ -5420,16 +5420,27 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2BB1, 0x204b, 0x103c, "NVIDIA RTX PRO 6000 Blackwell Workstation Edition" }, { 0x2BB1, 0x204b, 0x10de, "NVIDIA RTX PRO 6000 Blackwell Workstation Edition" }, { 0x2BB1, 0x204b, 0x17aa, "NVIDIA RTX PRO 6000 Blackwell Workstation Edition" }, + { 0x2BB3, 0x204d, 0x1028, "NVIDIA RTX PRO 5000 Blackwell" }, + { 0x2BB3, 0x204d, 0x103c, "NVIDIA RTX PRO 5000 Blackwell" }, + { 0x2BB3, 0x204d, 0x10de, "NVIDIA RTX PRO 5000 Blackwell" }, + { 0x2BB3, 0x204d, 0x17aa, "NVIDIA RTX PRO 5000 Blackwell" }, { 0x2BB4, 0x204c, 0x1028, "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition" }, { 0x2BB4, 0x204c, 0x103c, "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition" }, { 0x2BB4, 0x204c, 0x10de, "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition" }, { 0x2BB4, 0x204c, 0x17aa, "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition" }, + { 0x2BB5, 0x204e, 0x10de, "NVIDIA RTX PRO 6000 Blackwell Server Edition" }, { 0x2C02, 0x0000, 0x0000, "NVIDIA GeForce RTX 5080" }, { 0x2C05, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070 Ti" }, { 0x2C18, 0x0000, 0x0000, "NVIDIA GeForce RTX 5090 Laptop GPU" }, { 0x2C19, 0x0000, 0x0000, "NVIDIA GeForce RTX 5080 Laptop GPU" }, { 0x2C58, 0x0000, 0x0000, "NVIDIA GeForce RTX 5090 Laptop GPU" }, { 0x2C59, 0x0000, 0x0000, "NVIDIA GeForce RTX 5080 Laptop GPU" }, + { 0x2D04, 0x0000, 0x0000, "NVIDIA GeForce RTX 5060 Ti" }, + { 0x2D05, 0x0000, 0x0000, "NVIDIA GeForce RTX 5060" }, + { 0x2D18, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070 Laptop GPU" }, + { 0x2D19, 0x0000, 0x0000, "NVIDIA GeForce RTX 5060 Laptop GPU" }, + { 0x2D58, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070 Laptop GPU" }, + { 0x2D59, 0x0000, 0x0000, "NVIDIA GeForce RTX 5060 Laptop GPU" }, { 0x2F04, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070" }, { 0x2F18, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070 Ti Laptop GPU" }, { 0x2F58, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070 Ti Laptop GPU" }, diff --git a/src/nvidia/inc/kernel/platform/nvpcf.h b/src/nvidia/inc/kernel/platform/nvpcf.h index 4f7d10973..02a9a9f50 100644 --- a/src/nvidia/inc/kernel/platform/nvpcf.h +++ b/src/nvidia/inc/kernel/platform/nvpcf.h @@ -83,6 +83,7 @@ typedef struct #define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_22 (0x22) #define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_23 (0x23) #define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_24 (0x24) +#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_25 (0x25) #define NVPCF_CONTROLLER_STATIC_TABLE_MAX_ENTRIES (8) // format for 2.0 and 2.1 @@ -184,8 +185,9 @@ typedef struct #define NVPCF_CONTROLLER_STATIC_TABLE_ENTRY_V20_FILTER_RESERVED 31:16 // Param0 -#define NVPCF_CONTROLLER_STATIC_TABLE_ENTRY_V20_PARAM0_QBOOST_INCREASE_GAIN 15:0 +#define NVPCF_CONTROLLER_STATIC_TABLE_ENTRY_V20_PARAM0_QBOOST_INCREASE_GAIN 15:0 #define NVPCF_CONTROLLER_STATIC_TABLE_ENTRY_V20_PARAM0_QBOOST_DECREASE_GAIN 31:16 +#define NVPCF_CONTROLLER_STATIC_TABLE_ENTRY_V25_PARAM0_CPU_TDP_TYPE 4:1 // Param1 #define NVPCF_CONTROLLER_STATIC_TABLE_ENTRY_V20_PARAM1_QBOOST_DC_SUPPORT 0:0 @@ -196,6 +198,10 @@ typedef struct #define NVPCF_CONTROLLER_STATIC_TABLE_ENTRY_V22_SIZE_05 (0x05U) #define NVPCF_CONTROLLER_STATIC_TABLE_ENTRY_V22_FMT_SIZE_05 ("1b1d") +// Param0 +#define NVPCF_CONTROLLER_SBIOS_TABLE_CPU_TDP_CONTROL_DC_ONLY (0x00) +#define NVPCF_CONTROLLER_SBIOS_TABLE_CPU_TDP_CONTROL_DC_AC (0x01) + /*! * Static system controller header table v2.2, unpacked */ diff --git a/src/nvidia/interface/nvrm_registry.h b/src/nvidia/interface/nvrm_registry.h index bbf86d127..dc4b689ab 100644 --- a/src/nvidia/interface/nvrm_registry.h +++ b/src/nvidia/interface/nvrm_registry.h @@ -1345,6 +1345,11 @@ #define NV_REG_STR_RM_INTR_LOCKING_MODE_DEFAULT (0x00000000) #define NV_REG_STR_RM_INTR_LOCKING_MODE_INTR_MASK (0x00000001) +#define NV_REG_INTERNAL_PANEL_DISCONNECTED "RMInternalPanelDisconnected" +#define NV_REG_INTERNAL_PANEL_DISCONNECTED_DISABLE 0x00000000 +#define NV_REG_INTERNAL_PANEL_DISCONNECTED_ENABLE 0x00000001 +#define NV_REG_INTERNAL_PANEL_DISCONNECTED_DEFAULT RM_REG_INTERNAL_PANEL_DISCONNECTED_DISABLE + #define NV_REG_STR_RM_PER_INTR_DPC_QUEUING "RMDisablePerIntrDPCQueueing" // Type DWORD // This regkey is used to disable per interrupt DPC queuing. @@ -2667,6 +2672,19 @@ #define NV_REG_STR_RM_RUSD_POLLING_INTERVAL_MIN 100 #define NV_REG_STR_RM_RUSD_POLLING_INTERVAL_MAX 1000 +// +// Type: DWORD +// This regkey enables some path to reuse the initially allocated continguous memory +// instead of freeing it dynamically which causes memory fragmentation and +// under low available memory configuration the allocation could fail and leads to BSOD +// Enable the key to reuse the initially allocated mem withoug free and reallocate +// - Default for Blackwell and up +// Disable the key to free and allocate dynamically +// +#define NV_REG_STR_RM_INIT_MEM_REUSE "RmInitMemReuse" +#define NV_REG_STR_RM_INIT_MEM_REUSE_ENABLE 0x00000001 +#define NV_REG_STR_RM_INIT_MEM_REUSE_DISABLE 0x00000000 + // // Type DWORD (Boolean) // This regkey controls the use of BAR1 SPA instead of GPA for p2p subsystems diff --git a/src/nvidia/src/kernel/compute/imex_session_api.c b/src/nvidia/src/kernel/compute/imex_session_api.c index 1eaac97db..091735401 100644 --- a/src/nvidia/src/kernel/compute/imex_session_api.c +++ b/src/nvidia/src/kernel/compute/imex_session_api.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -339,7 +339,7 @@ imexsessionapiDestruct_IMPL if (_checkDanglingExports(RES_GET_CLIENT(pImexSessionApi))) { fabricDisableMemAlloc(pFabric); - NV_PRINTF(LEVEL_ERROR, "Abrupt nvidia-imex daemon shutdown detected, disabled fabric allocations!\n"); + NV_PRINTF(LEVEL_ERROR, "Abrupt nvidia-imex daemon shutdown detected, disabled fabric allocations\n"); } // Invalidate export cache to block future imports on this node ID. @@ -361,7 +361,7 @@ imexsessionapiDestruct_IMPL { if (rcAndDisableOutstandingClientsWithImportedMemory(NULL, NV_FABRIC_INVALID_NODE_ID)) { - NV_PRINTF(LEVEL_ERROR, "Abrupt nvidia-imex daemon shutdown detected, robust channel recovery invoked!\n"); + NV_PRINTF(LEVEL_ERROR, "Abrupt nvidia-imex daemon shutdown detected, robust channel recovery invoked\n"); } } } @@ -426,7 +426,10 @@ imexsessionapiCtrlCmdDisableImporters_IMPL return NV_ERR_NOT_SUPPORTED; if (rcAndDisableOutstandingClientsWithImportedMemory(NULL, pParams->nodeId)) - NV_PRINTF(LEVEL_ERROR, "nvidia-imex daemon has invoked robust channel recovery!\n"); + { + NV_PRINTF(LEVEL_ERROR, "nvidia-imex daemon has invoked robust channel recovery for remote node: %u\n", + pParams->nodeId); + } return NV_OK; } diff --git a/src/nvidia/src/kernel/gpu/arch/hopper/kern_gpu_gh100.c b/src/nvidia/src/kernel/gpu/arch/hopper/kern_gpu_gh100.c index 55b8f5eaf..d3ff54c50 100644 --- a/src/nvidia/src/kernel/gpu/arch/hopper/kern_gpu_gh100.c +++ b/src/nvidia/src/kernel/gpu/arch/hopper/kern_gpu_gh100.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -22,6 +22,7 @@ */ #include "gpu/gpu.h" +#include "gpu/gpu_access.h" #include "gpu/gpu_child_class_defs.h" #include "os/os.h" #include "nverror.h" @@ -102,7 +103,7 @@ gpuReadPassThruConfigReg_GH100 NvU32 *pData ) { - *pData = GPU_REG_RD32(pGpu, DEVICE_BASE(NV_EP_PCFGM) + index); + *pData = GPU_REG_RD32_UNCHECKED(pGpu, DEVICE_BASE(NV_EP_PCFGM) + index); return NV_OK; } diff --git a/src/nvidia/src/kernel/gpu/conf_compute/ccsl.c b/src/nvidia/src/kernel/gpu/conf_compute/ccsl.c index 0ce41c854..883fd4e8d 100644 --- a/src/nvidia/src/kernel/gpu/conf_compute/ccsl.c +++ b/src/nvidia/src/kernel/gpu/conf_compute/ccsl.c @@ -447,6 +447,7 @@ ccslContextInitViaChannel_IMPL } *ppCtx = pCtx; + pCtx->msgCounterSize = CSL_MSG_CTR_32; pCtx->openrmCtx = NULL; pCtx->pDecryptBundles = NULL; @@ -505,7 +506,6 @@ ccslContextInitViaChannel_IMPL pCtx->pEncStatsBuffer = pKernelChannel->pEncStatsBuf; pCtx->pMemDesc = pMemDesc; - pCtx->msgCounterSize = CSL_MSG_CTR_32; // Set values only used for GSP keys to invalid pCtx->globalKeyIdIn = CC_GKEYID_GEN(CC_KEYSPACE_SIZE, 0); diff --git a/src/nvidia/src/kernel/gpu/disp/kern_disp.c b/src/nvidia/src/kernel/gpu/disp/kern_disp.c index 7adf9e5c8..4178df11e 100644 --- a/src/nvidia/src/kernel/gpu/disp/kern_disp.c +++ b/src/nvidia/src/kernel/gpu/disp/kern_disp.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -107,6 +107,7 @@ kdispConstructEngine_IMPL(OBJGPU *pGpu, ENGDESCRIPTOR engDesc) { NV_STATUS status; + NvU32 data; // // NOTE: DO NOT call IpVersion _HAL functions in ConstructEngine. @@ -145,6 +146,20 @@ kdispConstructEngine_IMPL(OBJGPU *pGpu, pKernelDisplay->pLowLatencySpinLock = (PORT_SPINLOCK *) portSyncSpinlockCreate(portMemAllocatorGetGlobalNonPaged()); NV_ASSERT_OR_RETURN((pKernelDisplay->pLowLatencySpinLock != NULL), NV_ERR_INSUFFICIENT_RESOURCES); + if ((osReadRegistryDword(pGpu, NV_REG_INTERNAL_PANEL_DISCONNECTED, &data) == NV_OK) + && (data == NV_REG_INTERNAL_PANEL_DISCONNECTED_ENABLE)) + { + pKernelDisplay->setProperty(pKernelDisplay, + PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED, + NV_TRUE); + } + else + { + pKernelDisplay->setProperty(pKernelDisplay, + PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED, + NV_FALSE); + } + return status; } @@ -347,14 +362,17 @@ kdispInitBrightcStateLoad_IMPL(OBJGPU *pGpu, portMemSet(pBrightcInfo, 0, sizeof(*pBrightcInfo)); pBrightcInfo->status = status; - if ((pKernelDisplay != NULL) && (pKernelDisplay->pStaticInfo->internalDispActiveMask != 0) && !bInternalSkuFuseEnabled) + if ((pKernelDisplay != NULL) + && (pKernelDisplay->pStaticInfo->internalDispActiveMask != 0) + && !(bInternalSkuFuseEnabled + || (pKernelDisplay->getProperty(pKernelDisplay, PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED)))) { // Fill in the Backlight Method Data. pBrightcInfo->backLightDataSize = sizeof(pBrightcInfo->backLightData); status = osCallACPI_DSM(pGpu, ACPI_DSM_FUNCTION_CURRENT, NV_ACPI_GENERIC_FUNC_GETBACKLIGHT, (NvU32 *)(pBrightcInfo->backLightData), &pBrightcInfo->backLightDataSize); - pBrightcInfo->status = status; + pBrightcInfo->status = status; } status = pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice, @@ -1367,6 +1385,11 @@ kdispServiceLowLatencyIntrs_KERNEL for (i = 0; i < kdispGetNumHeads(pKernelDisplay); i++) { pKernelHead = KDISP_GET_HEAD(pKernelDisplay, i); + // Only reset the heads which we have serviced. + if ((pending & NVBIT(i)) == 0) + { + continue; + } kheadResetPendingLastData_HAL(pGpu, pKernelHead, pThreadState); } } diff --git a/src/nvidia/src/kernel/gpu/fifo/arch/volta/kernel_channel_group_gv100.c b/src/nvidia/src/kernel/gpu/fifo/arch/volta/kernel_channel_group_gv100.c index 2457fe0d3..88c188763 100644 --- a/src/nvidia/src/kernel/gpu/fifo/arch/volta/kernel_channel_group_gv100.c +++ b/src/nvidia/src/kernel/gpu/fifo/arch/volta/kernel_channel_group_gv100.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -52,6 +52,7 @@ kchangrpAllocFaultMethodBuffers_GV100 HW_ENG_FAULT_METHOD_BUFFER *pFaultMthdBuf = NULL; NvU32 gfid = pKernelChannelGroup->gfid; TRANSFER_SURFACE surf = {0}; + NvBool bReUseInitMem = pGpu->getProperty(pGpu, PDB_PROP_GPU_REUSE_INIT_CONTING_MEM); // // Allocate method buffer if applicable @@ -91,6 +92,7 @@ kchangrpAllocFaultMethodBuffers_GV100 { // Get the right aperture/attribute faultBufApert = ADDR_SYSMEM; + faultBufAttr = NV_MEMORY_CACHED; memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC_3, _FAULT_METHOD_BUFFER, pGpu->instLocOverrides3), "fault method buffer", &faultBufApert, &faultBufAttr); @@ -103,6 +105,7 @@ kchangrpAllocFaultMethodBuffers_GV100 { pFaultMthdBuf = &(pKernelChannelGroup->pMthdBuffers[index]); +retryInFB: // Allocate and initialize MEMDESC status = memdescCreate(&(pFaultMthdBuf->pMemDesc), pGpu, bufSizeInBytes, 0, NV_TRUE, faultBufApert, faultBufAttr, memDescFlags); @@ -112,13 +115,19 @@ kchangrpAllocFaultMethodBuffers_GV100 goto fail; } - memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_34, + memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_34, pFaultMthdBuf->pMemDesc); if (status != NV_OK) { - DBG_BREAKPOINT(); memdescDestroy(pFaultMthdBuf->pMemDesc); pFaultMthdBuf->pMemDesc = NULL; + if (bReUseInitMem && (faultBufApert == ADDR_SYSMEM)) + { + faultBufApert = ADDR_FBMEM; + memDescFlags |= MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE; + goto retryInFB; + } + DBG_BREAKPOINT(); goto fail; } diff --git a/src/nvidia/src/kernel/gpu/fsp/arch/hopper/kern_fsp_gh100.c b/src/nvidia/src/kernel/gpu/fsp/arch/hopper/kern_fsp_gh100.c index a69bfe7be..6b1f694b5 100644 --- a/src/nvidia/src/kernel/gpu/fsp/arch/hopper/kern_fsp_gh100.c +++ b/src/nvidia/src/kernel/gpu/fsp/arch/hopper/kern_fsp_gh100.c @@ -963,7 +963,6 @@ kfspGetGspBootArgs ) { NV_STATUS status = NV_OK; - ConfidentialCompute *pCC = GPU_GET_CONF_COMPUTE(pGpu); NV_ASSERT(pCC != NULL); @@ -1004,6 +1003,7 @@ kfspSetupGspImages PBINDATA_STORAGE pGspImageHash; PBINDATA_STORAGE pGspImageSignature; PBINDATA_STORAGE pGspImagePublicKey; + NvBool bReUseInitMem = pGpu->getProperty(pGpu, PDB_PROP_GPU_REUSE_INIT_CONTING_MEM); NvU32 pGspImageSize; NvU32 pGspImageMapSize; NvP64 pVaKernel = NULL; @@ -1040,15 +1040,17 @@ kfspSetupGspImages pGspImageSize = bindataGetBufferSize(pGspImage); pGspImageMapSize = NV_ALIGN_UP(pGspImageSize, 0x1000); + if ((pKernelFsp->pGspFmcMemdesc == NULL) || !bReUseInitMem) + { + NV_ASSERT(pKernelFsp->pGspFmcMemdesc == NULL); // If we assert the pointer becomes a zombie. + status = memdescCreate(&pKernelFsp->pGspFmcMemdesc, pGpu, pGspImageMapSize, + 0, NV_TRUE, ADDR_SYSMEM, NV_MEMORY_CACHED, flags); + NV_ASSERT_OR_GOTO(status == NV_OK, failed); - status = memdescCreate(&pKernelFsp->pGspFmcMemdesc, pGpu, pGspImageMapSize, - 0, NV_TRUE, ADDR_SYSMEM, NV_MEMORY_CACHED, flags); - NV_ASSERT_OR_GOTO(status == NV_OK, failed); - - memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_7, - pKernelFsp->pGspFmcMemdesc); - NV_ASSERT_OR_GOTO(status == NV_OK, failed); - + memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_7, + pKernelFsp->pGspFmcMemdesc); + NV_ASSERT_OR_GOTO(status == NV_OK, failed); + } status = memdescMap(pKernelFsp->pGspFmcMemdesc, 0, pGspImageMapSize, NV_TRUE, NV_PROTECT_READ_WRITE, &pVaKernel, &pPrivKernel); NV_ASSERT_OR_GOTO(status == NV_OK, failed); @@ -1301,6 +1303,7 @@ kfspPrepareBootCommands_GH100 NvP64 pVaKernel = NULL; NvP64 pPrivKernel = NULL; NvBool bIsKeepWPRGc6 = IS_GPU_GC6_STATE_EXITING(pGpu); + NvBool bReUseInitMem = pGpu->getProperty(pGpu, PDB_PROP_GPU_REUSE_INIT_CONTING_MEM); statusBoot = kfspWaitForSecureBoot_HAL(pGpu, pKernelFsp); @@ -1383,13 +1386,17 @@ kfspPrepareBootCommands_GH100 // FSP (an unit inside GPU) and hence placed in unprotected sysmem // flags = MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY; - status = memdescCreate(&pKernelFsp->pSysmemFrtsMemdesc, pGpu, frtsSize, - 0, NV_TRUE, ADDR_SYSMEM, NV_MEMORY_CACHED, flags); - NV_ASSERT_OR_GOTO(status == NV_OK, failed); + if ((pKernelFsp->pSysmemFrtsMemdesc == NULL) || !bReUseInitMem) + { + NV_ASSERT(pKernelFsp->pSysmemFrtsMemdesc == NULL); // If we assert the pointer becomes a zombie. + status = memdescCreate(&pKernelFsp->pSysmemFrtsMemdesc, pGpu, frtsSize, + 0, NV_TRUE, ADDR_SYSMEM, NV_MEMORY_CACHED, flags); + NV_ASSERT_OR_GOTO(status == NV_OK, failed); - memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_8, - pKernelFsp->pSysmemFrtsMemdesc); - NV_ASSERT_OR_GOTO(status == NV_OK, failed); + memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_8, + pKernelFsp->pSysmemFrtsMemdesc); + NV_ASSERT_OR_GOTO(status == NV_OK, failed); + } // Set up a kernel mapping for future use in RM status = memdescMap(pKernelFsp->pSysmemFrtsMemdesc, 0, frtsSize, NV_TRUE, diff --git a/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c b/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c index 7cfd3b050..a0c0a1cfc 100644 --- a/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c +++ b/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c @@ -248,7 +248,6 @@ kfspStateUnload_IMPL NvU32 flags ) { - kfspReleaseProxyImage(pGpu, pKernelFsp); return NV_OK; } diff --git a/src/nvidia/src/kernel/gpu/gpu.c b/src/nvidia/src/kernel/gpu/gpu.c index 2bf0a63fa..a9c01fb49 100644 --- a/src/nvidia/src/kernel/gpu/gpu.c +++ b/src/nvidia/src/kernel/gpu/gpu.c @@ -6437,7 +6437,11 @@ gpuLogOobXidMessage_KERNEL { RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); NV_STATUS status; + NvBool bGspFatalError = NV_FALSE; NV2080_CTRL_INTERNAL_LOG_OOB_XID_PARAMS params = {0}; + KernelGsp *pKernelGsp = GPU_GET_KERNEL_GSP(pGpu); + + bGspFatalError = pKernelGsp->bFatalError; // Exclude conditions that indicate issues with GSP communication. if ((xid == GSP_ERROR) || @@ -6447,7 +6451,8 @@ gpuLogOobXidMessage_KERNEL !pGpu->gspRmInitialized || pGpu->getProperty(pGpu, PDB_PROP_GPU_PREPARING_FULLCHIP_RESET) || pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST) || - !pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_CONNECTED)) + !pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_CONNECTED) || + bGspFatalError) { return; } diff --git a/src/nvidia/src/kernel/gpu/gpu_registry.c b/src/nvidia/src/kernel/gpu/gpu_registry.c index d7651cdd5..65d7303ee 100644 --- a/src/nvidia/src/kernel/gpu/gpu_registry.c +++ b/src/nvidia/src/kernel/gpu/gpu_registry.c @@ -225,6 +225,12 @@ gpuInitRegistryOverrides_KERNEL pGpu->userSharedData.pollingFrequencyMs = NV_REG_STR_RM_RUSD_POLLING_INTERVAL_DEFAULT; pGpu->userSharedData.bPollFrequencyOverridden = NV_FALSE; } + + if ((osReadRegistryDword(pGpu, NV_REG_STR_RM_INIT_MEM_REUSE, &data32) == NV_OK) && + (data32 == NV_REG_STR_RM_INIT_MEM_REUSE_DISABLE)) + { + pGpu->setProperty(pGpu, PDB_PROP_GPU_REUSE_INIT_CONTING_MEM, NV_FALSE); + } return NV_OK; } diff --git a/src/nvidia/src/kernel/gpu/gsp/arch/blackwell/kernel_gsp_gb100.c b/src/nvidia/src/kernel/gpu/gsp/arch/blackwell/kernel_gsp_gb100.c index 4291413dd..27a87a879 100644 --- a/src/nvidia/src/kernel/gpu/gsp/arch/blackwell/kernel_gsp_gb100.c +++ b/src/nvidia/src/kernel/gpu/gsp/arch/blackwell/kernel_gsp_gb100.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -169,6 +169,8 @@ kgspServiceFatalHwError_GB100 NV_PRINTF(LEVEL_ERROR, "NV_PGSP_FALCON_IRQSTAT_FATAL_ERROR PENDING error_code 0x%x\n", errorCode); MODS_ARCH_ERROR_PRINTF("NV_PGSP_FALCON_IRQSTAT_FATAL_ERROR=0x%x\n", errorCode); + pKernelGsp->bFatalError = NV_TRUE; + // Poison error if (FLD_TEST_DRF(_PGSP, _RISCV_FAULT_CONTAINMENT_SRCSTAT, _GLOBAL_MEM, _FAULTED, errorCode)) { @@ -190,9 +192,8 @@ kgspServiceFatalHwError_GB100 else { nvErrorLog_va((void *)pGpu, ROBUST_CHANNEL_CONTAINED_ERROR, "GSP-RISCV instance 0 fatal error"); + NV_ASSERT_OK(gpuMarkDeviceForReset(pGpu)); } - pKernelGsp->bFatalError = NV_TRUE; kgspRcAndNotifyAllChannels(pGpu, pKernelGsp, ROBUST_CHANNEL_CONTAINED_ERROR, NV_TRUE); - NV_ASSERT_OK(gpuMarkDeviceForReset(pGpu)); } diff --git a/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c b/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c index 55bd541c0..ef302c777 100644 --- a/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c +++ b/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c @@ -375,6 +375,13 @@ memmgrTestCeUtils NvU32 vidmemData = 0xAABBCCDD; NvU32 sysmemData = 0x11223345; NV_STATUS status; + NvBool bReUseInitMem = pGpu->getProperty(pGpu, PDB_PROP_GPU_REUSE_INIT_CONTING_MEM); + + // Under low sysmem case we skip the testing since it requries extra memory + if (bReUseInitMem) + { + return NV_OK; + } NV_ASSERT_OR_RETURN(pMemoryManager->pCeUtils != NULL, NV_ERR_INVALID_STATE); diff --git a/src/nvidia/src/kernel/gpu/mmu/gmmu_walk.c b/src/nvidia/src/kernel/gpu/mmu/gmmu_walk.c index 3ad52bc57..e4bc7fe99 100644 --- a/src/nvidia/src/kernel/gpu/mmu/gmmu_walk.c +++ b/src/nvidia/src/kernel/gpu/mmu/gmmu_walk.c @@ -328,16 +328,18 @@ _gmmuWalkCBLevelAlloc { case ADDR_FBMEM: if (RMCFG_FEATURE_PMA && - (pGVAS->flags & VASPACE_FLAGS_PTETABLE_PMA_MANAGED) && - (pGVAS->pPageTableMemPool != NULL)) + (pGVAS->flags & VASPACE_FLAGS_PTETABLE_PMA_MANAGED)) { + NV_ASSERT_OR_RETURN(pUserCtx->pGpuState->pPageTableMemPool != NULL, + NV_ERR_INVALID_STATE); + pMemDescTemp->ActualSize = RM_ALIGN_UP(newMemSize, alignment); - status = rmMemPoolAllocate(pGVAS->pPageTableMemPool, + status = rmMemPoolAllocate(pUserCtx->pGpuState->pPageTableMemPool, (RM_POOL_ALLOC_MEMDESC*)pMemDescTemp); break; } case ADDR_SYSMEM: - memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_143, + memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_143, pMemDescTemp); break; default: @@ -609,13 +611,11 @@ _gmmuWalkCBLevelFree } else { - if (RMCFG_FEATURE_PMA && - (pUserCtx->pGVAS->flags & VASPACE_FLAGS_PTETABLE_PMA_MANAGED) && - (pMemDesc[i]->pPageHandleList != NULL) && + if ((pMemDesc[i]->pPageHandleList != NULL) && (listCount(pMemDesc[i]->pPageHandleList) != 0) && - (pUserCtx->pGVAS->pPageTableMemPool != NULL)) + (pUserCtx->pGpuState->pPageTableMemPool != NULL)) { - rmMemPoolFree(pUserCtx->pGVAS->pPageTableMemPool, + rmMemPoolFree(pUserCtx->pGpuState->pPageTableMemPool, (RM_POOL_ALLOC_MEMDESC*)pMemDesc[i], pUserCtx->pGVAS->flags); } diff --git a/src/nvidia/src/kernel/gpu/perf/kern_perf_ctrl.c b/src/nvidia/src/kernel/gpu/perf/kern_perf_ctrl.c index 41927931f..9feef391a 100644 --- a/src/nvidia/src/kernel/gpu/perf/kern_perf_ctrl.c +++ b/src/nvidia/src/kernel/gpu/perf/kern_perf_ctrl.c @@ -111,6 +111,10 @@ subdeviceCtrlCmdPerfGetGpumonPerfmonUtilSamplesV2_KERNEL pParams, sizeof(*pParams))); + // Skip translation if the request from root namespace. + if (osIsInitNs()) + return NV_OK; + // Now translate the sample's procId. numEntries = pParams->bufSize / sizeof (NV2080_CTRL_PERF_GPUMON_PERFMON_UTIL_SAMPLE); NV_ASSERT_OR_RETURN(numEntries <= NV2080_CTRL_PERF_GPUMON_SAMPLE_COUNT_PERFMON_UTIL, diff --git a/src/nvidia/src/kernel/mem_mgr/gpu_vaspace.c b/src/nvidia/src/kernel/mem_mgr/gpu_vaspace.c index f4fb8edc2..d9aaf2d7a 100644 --- a/src/nvidia/src/kernel/mem_mgr/gpu_vaspace.c +++ b/src/nvidia/src/kernel/mem_mgr/gpu_vaspace.c @@ -344,25 +344,6 @@ _gvaspaceReserveVaForClientRm pGVAS->vaLimitServerRMOwned); NV_ASSERT_OR_GOTO(status == NV_OK, done); - if (pGVAS->flags & VASPACE_FLAGS_PTETABLE_PMA_MANAGED) - { - // Loop over each GPU associated with VAS. - FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask) - { - MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); - - if (pMemoryManager->pPageLevelReserve == NULL) - { - NV_ASSERT(0); - status = NV_ERR_INVALID_STATE; - break; - } - } - FOR_EACH_GPU_IN_MASK_UC_END - - NV_ASSERT_OR_GOTO(status == NV_OK, done); - } - // Loop over each GPU associated with VAS. FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask) { @@ -1008,19 +989,6 @@ gvaspaceDestruct_IMPL(OBJGVASPACE *pGVAS) } FOR_EACH_GPU_IN_MASK_UC_END - FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask) - { - MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); - - if (RMCFG_FEATURE_PMA && - pMemoryManager->pPageLevelReserve != NULL) - { - if (pGVAS->pPageTableMemPool != NULL) - rmMemPoolRelease(pGVAS->pPageTableMemPool, pGVAS->flags); - } - } - FOR_EACH_GPU_IN_MASK_UC_END - portMemFree(pGVAS->pGpuStates); pGVAS->pGpuStates = NULL; } @@ -1092,6 +1060,29 @@ _gvaspaceGpuStateConstruct // Must be in UC. NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE); + if (RMCFG_FEATURE_PMA && + (flags & VASPACE_FLAGS_PTETABLE_PMA_MANAGED)) + { + MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); + CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); + RsResourceRef *pDeviceRef = pCallContext->pResourceRef; + Device *pDevice; + + NV_ASSERT_OR_RETURN(pCallContext != NULL, NV_ERR_INVALID_STATE); + + pDeviceRef = pCallContext->pResourceRef; + if (pDeviceRef->internalClassId != classId(Device)) + { + NV_ASSERT_OK_OR_RETURN(refFindAncestorOfType(pDeviceRef, classId(Device), &pDeviceRef)); + } + + pDevice = dynamicCast(pDeviceRef->pResource, Device); + NV_ASSERT_OR_RETURN(pDevice != NULL, NV_ERR_INVALID_STATE); + + NV_ASSERT_OK_OR_RETURN( + memmgrPageLevelPoolsGetInfo(pGpu, pMemoryManager, pDevice, &pGpuState->pPageTableMemPool)); + } + // Get GMMU format for this GPU. pFmt = kgmmuFmtGet(pKernelGmmu, GMMU_FMT_VERSION_DEFAULT, reqBigPageSize); NV_ASSERT_OR_RETURN(NULL != pFmt, NV_ERR_NOT_SUPPORTED); @@ -1156,7 +1147,6 @@ _gvaspaceGpuStateConstruct vaLimitInt = vaLimitExt; } - // // Shared management external limit is aligned to root PDE coverage. // This allows KMD/OS to hook external PDEs beneath an RM-allocated root. @@ -1267,6 +1257,10 @@ _gvaspaceGpuStateDestruct _gvaspaceForceFreePageLevelInstances(pGVAS, pGpu, pGpuState); mmuWalkDestroy(pGpuState->pWalk); + + if (pGpuState->pPageTableMemPool != NULL) + rmMemPoolRelease(pGpuState->pPageTableMemPool, pGVAS->flags); + pGpuState->pWalk = NULL; NV_ASSERT(NULL == pGpuState->pMirroredRoot); @@ -3311,7 +3305,7 @@ gvaspaceExternalRootDirRevoke_IMPL // Free the RM memory used to hold the memdesc struct. memdescDestroy(pMemDesc); - + return status; } @@ -5278,86 +5272,69 @@ gvaspaceReserveMempool_IMPL NvU32 flags ) { - NV_STATUS status = NV_OK; - RM_POOL_ALLOC_MEM_RESERVE_INFO *pMemPool = NULL; + NvBool bRetryInSys = !!(pGVAS->flags & VASPACE_FLAGS_RETRY_PTE_ALLOC_IN_SYS); + GVAS_GPU_STATE *pGpuState; + KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); + const GMMU_FMT *pFmt = kgmmuFmtGet(pKernelGmmu, GMMU_FMT_VERSION_DEFAULT, 0); + NV_STATUS status; + NvU64 poolSize; - if (RMCFG_FEATURE_PMA && - pGVAS->flags & VASPACE_FLAGS_PTETABLE_PMA_MANAGED) + if ((pGVAS->flags & VASPACE_FLAGS_PTETABLE_PMA_MANAGED) == 0) + return NV_OK; + + pGpuState = gvaspaceGetGpuState(pGVAS, pGpu); + if ((pGpuState == NULL) || + (pGpuState->pPageTableMemPool == NULL)) + return NV_OK; + + // + // Always assume worst case of 4K mapping even if client has + // requested bigger page size. This is to ensure that we have + // sufficient memory in pools. Some MODS tests query for free + // framebuffer and allocate the entire available. In such cases + // we can run into OOM errors during page table allocation when + // the test tries to map a big surface and the pools are short + // of memory. + // + if (ONEBITSET(pageSizeLockMask)) { - KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); - MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); - const GMMU_FMT *pFmt = kgmmuFmtGet(pKernelGmmu, GMMU_FMT_VERSION_DEFAULT, 0); - // - // Always assume worst case of 4K mapping even if client has - // requested bigger page size. This is to ensure that we have - // sufficient memory in pools. Some MODS tests query for free - // framebuffer and allocate the entire available. In such cases - // we can run into OOM errors during page table allocation when - // the test tries to map a big surface and the pools are short - // of memory. + // There is a requirement of serial ATS enabled vaspaces to have + // both small and big page tables allocated at the same time. This + // is required for the 4K not valid feature. This is irrespective + // of the actual page size requested by the client. // - if (ONEBITSET(pageSizeLockMask)) + if (gvaspaceIsAtsEnabled(pGVAS)) { - // - // There is a requirement of serial ATS enabled vaspaces to have - // both small and big page tables allocated at the same time. This - // is required for the 4K not valid feature. This is irrespective - // of the actual page size requested by the client. - // - if (gvaspaceIsAtsEnabled(pGVAS)) - { - pageSizeLockMask = RM_PAGE_SIZE | pGVAS->bigPageSize; - } - else if (!(flags & VASPACE_RESERVE_FLAGS_ALLOC_UPTO_TARGET_LEVEL_ONLY)) - { - pageSizeLockMask = RM_PAGE_SIZE; - } + pageSizeLockMask = RM_PAGE_SIZE | pGVAS->bigPageSize; } - else + else if (!(flags & VASPACE_RESERVE_FLAGS_ALLOC_UPTO_TARGET_LEVEL_ONLY)) { - NV_ASSERT_OR_RETURN(((pageSizeLockMask & RM_PAGE_SIZE) != 0), - NV_ERR_INVALID_ARGUMENT); + pageSizeLockMask = RM_PAGE_SIZE; } + } + else + { + NV_ASSERT_OR_RETURN(((pageSizeLockMask & RM_PAGE_SIZE) != 0), + NV_ERR_INVALID_ARGUMENT); + } - NvU64 poolSize = kgmmuGetSizeOfPageDirs(pGpu, pKernelGmmu, pFmt, 0, size - 1, - pageSizeLockMask) + - kgmmuGetSizeOfPageTables(pGpu, pKernelGmmu, pFmt, 0, size - 1, - pageSizeLockMask); + poolSize = kgmmuGetSizeOfPageDirs(pGpu, pKernelGmmu, pFmt, 0, size - 1, pageSizeLockMask) + + kgmmuGetSizeOfPageTables(pGpu, pKernelGmmu, pFmt, 0, size - 1, pageSizeLockMask); - NV_ASSERT_OK_OR_RETURN(memmgrPageLevelPoolsGetInfo(pGpu, pMemoryManager, pDevice, &pMemPool)); - status = rmMemPoolReserve(pMemPool, poolSize, pGVAS->flags); - if ((pGVAS->flags & VASPACE_FLAGS_RETRY_PTE_ALLOC_IN_SYS) && - (status == NV_ERR_NO_MEMORY)) - { - // - // It is okay to change the status to NV_OK here since it is understood that - // we may run out of video memory at some time. The RETRY_PTE_ALLOC_IN_SYS - // flag ensures that RM retries allocating the page tables in sysmem if such - // a situation arises. So, running out of video memory here need not be fatal. - // It may be fatal if allocation in sysmem also fails. In that case RM will - // return an error from elsewhere. - // - status = NV_OK; - } - else - { - NV_ASSERT_OR_RETURN((NV_OK == status), status); + status = rmMemPoolReserve(pGpuState->pPageTableMemPool, poolSize, pGVAS->flags); - // setup page table pool in VA space if reservation to pool succeeds - if (pGVAS->pPageTableMemPool != NULL) - { - if (pGVAS->pPageTableMemPool != pMemPool) - { - rmMemPoolRelease(pMemPool, pGVAS->flags); - NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_STATE); - } - } - else - { - pGVAS->pPageTableMemPool = pMemPool; - } - } + if ((status == NV_ERR_NO_MEMORY) && bRetryInSys) + { + // + // It is okay to change the status to NV_OK here since it is understood that + // we may run out of video memory at some time. The RETRY_PTE_ALLOC_IN_SYS + // flag ensures that RM retries allocating the page tables in sysmem if such + // a situation arises. So, running out of video memory here need not be fatal. + // It may be fatal if allocation in sysmem also fails. In that case RM will + // return an error from elsewhere. + // + status = NV_OK; } return status; diff --git a/src/nvidia/src/kernel/platform/acpi_common.c b/src/nvidia/src/kernel/platform/acpi_common.c index ec15f9a42..8403694f9 100644 --- a/src/nvidia/src/kernel/platform/acpi_common.c +++ b/src/nvidia/src/kernel/platform/acpi_common.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2000-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2000-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -40,6 +40,9 @@ #include "mxm_spec.h" #include "gpu/gsp/gsp_static_config.h" #include "platform/nbsi/nbsi_read.h" +#include "nvrm_registry.h" + +#include "gpu/disp/kern_disp.h" // // DSM ACPI Routines common routines for Linux @@ -706,6 +709,17 @@ checkDsmCall NV_ASSERT_OR_RETURN(pInOut, NV_ERR_INVALID_ARGUMENT); NV_ASSERT_OR_RETURN(pSize, NV_ERR_INVALID_ARGUMENT); + KernelDisplay *pKernelDisplay = GPU_GET_KERNEL_DISPLAY(pGpu); + + if (pKernelDisplay != NULL + && pKernelDisplay->getProperty(pKernelDisplay, PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED)) + { + if (*pAcpiDsmFunction == ACPI_DSM_FUNCTION_NBCI) + { + return NV_ERR_NOT_SUPPORTED; + } + } + // Do any remapping of subfunction if function is current if (remapDsmFunctionAndSubFunction(pGpu, pAcpiDsmFunction, pAcpiDsmSubFunction) != NV_OK) { diff --git a/src/nvidia/src/kernel/rmapi/client_resource.c b/src/nvidia/src/kernel/rmapi/client_resource.c index 10c81565a..be546d0c5 100644 --- a/src/nvidia/src/kernel/rmapi/client_resource.c +++ b/src/nvidia/src/kernel/rmapi/client_resource.c @@ -2556,6 +2556,7 @@ _controllerParseStaticTable_v22 switch (header.version) { + case NVPCF_CONTROLLER_STATIC_TABLE_VERSION_25: case NVPCF_CONTROLLER_STATIC_TABLE_VERSION_24: case NVPCF_CONTROLLER_STATIC_TABLE_VERSION_23: case NVPCF_CONTROLLER_STATIC_TABLE_VERSION_22: @@ -2594,10 +2595,12 @@ _controllerParseStaticTable_v22 } } + CONTROLLER_STATIC_TABLE_ENTRY_V22 entry = { 0 }; + // Parse each entry for (loop = 0; loop < header.entryCount; loop++) { - CONTROLLER_STATIC_TABLE_ENTRY_V22 entry = { 0 }; + portMemSet(&entry, 0, sizeof(entry)); NvU32 offset = header.headerSize + (loop * NVPCF_CONTROLLER_STATIC_TABLE_ENTRY_V22_SIZE_05); @@ -2626,6 +2629,28 @@ _controllerParseStaticTable_v22 } } + if (header.version == NVPCF_CONTROLLER_STATIC_TABLE_VERSION_25) + { + switch(DRF_VAL(PCF_CONTROLLER_STATIC_TABLE_ENTRY_V25, _PARAM0, _CPU_TDP_TYPE,entry.flags0)) + { + case NVPCF_CONTROLLER_SBIOS_TABLE_CPU_TDP_CONTROL_DC_ONLY: + { + pParams->cpuTdpControlType = QBOOST_CPU_TDP_CONTROL_TYPE_DC_ONLY; + break; + } + case NVPCF_CONTROLLER_SBIOS_TABLE_CPU_TDP_CONTROL_DC_AC: + { + pParams->cpuTdpControlType = QBOOST_CPU_TDP_CONTROL_TYPE_DC_AC; + break; + } + default: + { + pParams->cpuTdpControlType = QBOOST_CPU_TDP_CONTROL_TYPE_DC_ONLY; + break; + } + } + } + pParams->version = (NvU8)header.version; pParams->samplingPeriodmS = CONTROLLER_GRP_DEFAULT_BASE_SAMPLING_PERIOD_MS; *pEntryCount = (NvU8)header.entryCount; diff --git a/src/nvidia/src/kernel/vgpu/rpc.c b/src/nvidia/src/kernel/vgpu/rpc.c index 633392484..4fb3ddee4 100644 --- a/src/nvidia/src/kernel/vgpu/rpc.c +++ b/src/nvidia/src/kernel/vgpu/rpc.c @@ -10260,6 +10260,8 @@ NV_STATUS rpcDumpProtobufComponent_v18_12 if (IS_GSP_CLIENT(pGpu)) { rpc_dump_protobuf_component_v18_12 *rpc_params = &rpc_message->dump_protobuf_component_v18_12; + const NvU32 fixed_param_size = sizeof(rpc_message_header_v) + sizeof(*rpc_params); + NV_ASSERT_OR_RETURN(fixed_param_size <= pRpc->maxRpcSize, NV_ERR_INVALID_STATE); status = rpcWriteCommonHeader(pGpu, pRpc, NV_VGPU_MSG_FUNCTION_DUMP_PROTOBUF_COMPONENT, sizeof(*rpc_params)); @@ -10271,7 +10273,7 @@ NV_STATUS rpcDumpProtobufComponent_v18_12 rpc_params->countOnly = ((pPrbEnc->flags & PRB_COUNT_ONLY) != 0); rpc_params->bugCheckCode = pNvDumpState->bugCheckCode; rpc_params->internalCode = pNvDumpState->internalCode; - rpc_params->bufferSize = NV_MIN(pRpc->maxRpcSize, prbEncBufLeft(pPrbEnc)); + rpc_params->bufferSize = NV_MIN(pRpc->maxRpcSize - fixed_param_size, prbEncBufLeft(pPrbEnc)); status = _issueRpcAndWait(pGpu, pRpc); diff --git a/version.mk b/version.mk index 35d17728c..fa95f2d64 100644 --- a/version.mk +++ b/version.mk @@ -1,5 +1,5 @@ -NVIDIA_VERSION = 575.51.03 -NVIDIA_NVID_VERSION = 575.51.03 +NVIDIA_VERSION = 575.57.08 +NVIDIA_NVID_VERSION = 575.57.08 NVIDIA_NVID_EXTRA = # This file.