From af31543aaa3352478414fc2392559af2c9ad871a Mon Sep 17 00:00:00 2001 From: Maneet Singh Date: Thu, 29 May 2025 22:59:14 -0700 Subject: [PATCH] 570.148.08 --- README.md | 7 +- kernel-open/Kbuild | 2 +- kernel-open/conftest.sh | 15 ++ kernel-open/nvidia-uvm/uvm_blackwell_host.c | 28 +++ kernel-open/nvidia-uvm/uvm_gpu.c | 25 +-- kernel-open/nvidia-uvm/uvm_gpu.h | 9 +- .../nvidia-uvm/uvm_gpu_access_counters.c | 193 ++++++++++-------- kernel-open/nvidia-uvm/uvm_hal.c | 7 +- kernel-open/nvidia-uvm/uvm_hal.h | 21 ++ kernel-open/nvidia-uvm/uvm_hal_types.h | 7 + kernel-open/nvidia-uvm/uvm_lock.c | 3 +- kernel-open/nvidia-uvm/uvm_lock.h | 8 +- kernel-open/nvidia-uvm/uvm_maxwell_host.c | 9 + kernel-open/nvidia-uvm/uvm_turing_host.c | 10 +- kernel-open/nvidia/nvidia.Kbuild | 1 + kernel-open/nvidia/os-interface.c | 6 +- .../displayport/src/dp_connectorimpl2x.cpp | 10 + src/common/inc/nvBldVer.h | 22 +- src/common/inc/nvUnixVersion.h | 2 +- src/common/nvswitch/kernel/ls10/intr_ls10.c | 3 +- src/common/nvswitch/kernel/ls10/minion_ls10.c | 1 - src/common/sdk/nvidia/inc/nvos.h | 1 + src/nvidia/generated/g_chipset_nvoc.h | 3 + src/nvidia/generated/g_gpu_access_nvoc.h | 8 + src/nvidia/generated/g_kern_disp_nvoc.h | 7 +- src/nvidia/generated/g_nv_name_released.h | 1 + src/nvidia/interface/nvrm_registry.h | 35 +++- src/nvidia/kernel/inc/gpuvideo/rmifvideng.h | 16 +- .../src/kernel/compute/imex_session_api.c | 11 +- .../kernel/gpu/arch/hopper/kern_gpu_gh100.c | 5 +- src/nvidia/src/kernel/gpu/disp/kern_disp.c | 24 ++- .../src/kernel/gpu/perf/kern_perf_ctrl.c | 4 + src/nvidia/src/kernel/gpu/rc/kernel_rc.c | 17 +- src/nvidia/src/kernel/platform/acpi_common.c | 16 +- src/nvidia/src/kernel/vgpu/rpc.c | 4 +- version.mk | 2 +- 36 files changed, 384 insertions(+), 159 deletions(-) diff --git a/README.md b/README.md index 28bb26757..6b2c73b84 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # NVIDIA Linux Open GPU Kernel Module Source This is the source release of the NVIDIA Linux open GPU kernel modules, -version 570.144. +version 570.148.08. ## How to Build @@ -17,7 +17,7 @@ as root: Note that the kernel modules built here must be used with GSP firmware and user-space NVIDIA GPU driver components from a corresponding -570.144 driver release. This can be achieved by installing +570.148.08 driver release. This can be achieved by installing the NVIDIA GPU driver from the .run file using the `--no-kernel-modules` option. E.g., @@ -185,7 +185,7 @@ table below). For details on feature support and limitations, see the NVIDIA GPU driver end user README here: -https://us.download.nvidia.com/XFree86/Linux-x86_64/570.144/README/kernel_open.html +https://us.download.nvidia.com/XFree86/Linux-x86_64/570.148.08/README/kernel_open.html For vGPU support, please refer to the README.vgpu packaged in the vGPU Host Package for more details. @@ -964,6 +964,7 @@ Subsystem Device ID. | NVIDIA GeForce RTX 5080 Laptop GPU | 2C19 | | NVIDIA GeForce RTX 5090 Laptop GPU | 2C58 | | NVIDIA GeForce RTX 5080 Laptop GPU | 2C59 | +| NVIDIA GeForce RTX 5060 Ti | 2D04 | | NVIDIA GeForce RTX 5070 | 2F04 | | NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F18 | | NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F58 | diff --git a/kernel-open/Kbuild b/kernel-open/Kbuild index bf06edb75..afcfb536a 100644 --- a/kernel-open/Kbuild +++ b/kernel-open/Kbuild @@ -86,7 +86,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc EXTRA_CFLAGS += -I$(src) EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM -EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.144\" +EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.148.08\" ifneq ($(SYSSRCHOST1X),) EXTRA_CFLAGS += -I$(SYSSRCHOST1X) diff --git a/kernel-open/conftest.sh b/kernel-open/conftest.sh index 20f694ecb..99484d826 100755 --- a/kernel-open/conftest.sh +++ b/kernel-open/conftest.sh @@ -3132,6 +3132,21 @@ compile_test() { compile_check_conftest "$CODE" "NV_FOLL_LONGTERM_PRESENT" "" "types" ;; + has_enum_pidtype_tgid) + # Determine if PIDTYPE_TGID is present in the kernel as an enum + # + # Added by commit 6883f81aac6f ("pid: Implement PIDTYPE_TGID") + # in v4.19 + # + CODE=" + #include + + enum pid_type type = PIDTYPE_TGID; + " + + compile_check_conftest "$CODE" "NV_HAS_ENUM_PIDTYPE_TGID" "" "types" + ;; + vfio_pin_pages_has_vfio_device_arg) # # Determine if vfio_pin_pages() kABI accepts "struct vfio_device *" diff --git a/kernel-open/nvidia-uvm/uvm_blackwell_host.c b/kernel-open/nvidia-uvm/uvm_blackwell_host.c index a95a76f6c..130b73ecd 100644 --- a/kernel-open/nvidia-uvm/uvm_blackwell_host.c +++ b/kernel-open/nvidia-uvm/uvm_blackwell_host.c @@ -254,3 +254,31 @@ void uvm_hal_blackwell_host_tlb_invalidate_test(uvm_push_t *push, HWVALUE(C96F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi)); } } + +uvm_access_counter_clear_op_t +uvm_hal_blackwell_access_counter_query_clear_op_gb100(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries) +{ + if (parent_gpu->rm_info.accessCntrBufferCount > 1) { + NvU32 i; + + for (i = 0; i < num_entries; i++) { + const uvm_access_counter_buffer_entry_t *entry = buffer_entries[i]; + + // The LSb identifies the die ID. + if ((entry->tag & 0x1) == 1) + return UVM_ACCESS_COUNTER_CLEAR_OP_ALL; + } + } + + return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED; +} + +uvm_access_counter_clear_op_t +uvm_hal_blackwell_access_counter_query_clear_op_gb20x(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries) +{ + return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED; +} diff --git a/kernel-open/nvidia-uvm/uvm_gpu.c b/kernel-open/nvidia-uvm/uvm_gpu.c index 7344b9c1f..ebb35d3f6 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu.c +++ b/kernel-open/nvidia-uvm/uvm_gpu.c @@ -1197,6 +1197,8 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid, uvm_sema_init(&parent_gpu->isr.replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR); uvm_sema_init(&parent_gpu->isr.non_replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR); uvm_mutex_init(&parent_gpu->access_counters_enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS); + uvm_mutex_init(&parent_gpu->access_counters_clear_tracker_lock, UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS); + uvm_tracker_init(&parent_gpu->access_counters_clear_tracker); uvm_spin_lock_irqsave_init(&parent_gpu->isr.interrupts_lock, UVM_LOCK_ORDER_LEAF); uvm_spin_lock_init(&parent_gpu->instance_ptr_table_lock, UVM_LOCK_ORDER_LEAF); uvm_rb_tree_init(&parent_gpu->instance_ptr_table); @@ -1214,6 +1216,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid, return NV_OK; cleanup: + uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker); uvm_kvfree(parent_gpu); return status; @@ -1644,19 +1647,12 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu, // Sync the access counter clear tracker too. if (parent_gpu->access_counters_supported && parent_gpu->access_counter_buffer) { - NvU32 notif_buf_index; - for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) { - uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[notif_buf_index]; + uvm_mutex_lock(&parent_gpu->access_counters_clear_tracker_lock); + status = uvm_tracker_wait(&parent_gpu->access_counters_clear_tracker); + uvm_mutex_unlock(&parent_gpu->access_counters_clear_tracker_lock); - if (access_counters->rm_info.accessCntrBufferHandle != 0) { - uvm_access_counters_isr_lock(access_counters); - status = uvm_tracker_wait(&access_counters->clear_tracker); - uvm_access_counters_isr_unlock(access_counters); - - if (status != NV_OK) - UVM_ASSERT(status == uvm_global_get_status()); - } - } + if (status != NV_OK) + UVM_ASSERT(status == uvm_global_get_status()); } } @@ -1787,6 +1783,8 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref) for_each_sub_processor_index(sub_processor_index) UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]); + uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker); + uvm_kvfree(parent_gpu); } @@ -2881,6 +2879,9 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid, if (status != NV_OK) goto error_unregister; + if (gpu_info->accessCntrBufferCount > 1) + gpu_info->accessCntrBufferCount = 1; + if (parent_gpu != NULL) { // If the UUID has been seen before, and if SMC is enabled, then check // if this specific partition has been seen previously. The UUID-based diff --git a/kernel-open/nvidia-uvm/uvm_gpu.h b/kernel-open/nvidia-uvm/uvm_gpu.h index 7e1b8110e..d0977f981 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu.h +++ b/kernel-open/nvidia-uvm/uvm_gpu.h @@ -522,10 +522,6 @@ struct uvm_access_counter_buffer_struct // PCIe NvU32 cached_put; - // Tracker used to aggregate access counters clear operations, needed for - // GPU removal - uvm_tracker_t clear_tracker; - // Current access counter configuration. During normal operation this // information is computed once during GPU initialization. However, tests // may override it to try different configuration values. @@ -1205,6 +1201,11 @@ struct uvm_parent_gpu_struct uvm_access_counter_buffer_t *access_counter_buffer; uvm_mutex_t access_counters_enablement_lock; + // Tracker used to aggregate access counters clear operations, needed for + // GPU removal. It is only used when supports_access_counters is set. + uvm_tracker_t access_counters_clear_tracker; + uvm_mutex_t access_counters_clear_tracker_lock; + // Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs. NvU32 utlb_per_gpc_count; diff --git a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c index c1584ff9e..bac8dce25 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c +++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c @@ -216,38 +216,19 @@ static NV_STATUS config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY gran return NV_OK; } -// Clear the access counter notifications and add it to the per-GPU -// per-notification-buffer clear tracker. -static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu, - uvm_access_counter_buffer_t *access_counters, - uvm_access_counter_buffer_entry_t **notification_start, - NvU32 num_notifications) +static NV_STATUS parent_gpu_clear_tracker_wait(uvm_parent_gpu_t *parent_gpu) { - NvU32 i; NV_STATUS status; - uvm_push_t push; - status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch"); - if (status != NV_OK) { - UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n", - nvstatusToString(status), - uvm_gpu_name(gpu), - access_counters->index); - return status; - } + uvm_mutex_lock(&parent_gpu->access_counters_clear_tracker_lock); + status = uvm_tracker_wait(&parent_gpu->access_counters_clear_tracker); + uvm_mutex_unlock(&parent_gpu->access_counters_clear_tracker_lock); - for (i = 0; i < num_notifications; i++) - gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]); - - uvm_push_end(&push); - - uvm_tracker_remove_completed(&access_counters->clear_tracker); - - return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push); + return status; } -// Clear all access counters and add the operation to the per-GPU -// per-notification-buffer clear tracker +// Clear all access counters and add the operation to the per-GPU clear +// tracker. static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu, uvm_access_counter_buffer_t *access_counters) { NV_STATUS status; @@ -269,8 +250,52 @@ static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu, uvm_access_counter_buf uvm_push_end(&push); - uvm_tracker_remove_completed(&access_counters->clear_tracker); - return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push); + uvm_mutex_lock(&gpu->parent->access_counters_clear_tracker_lock); + uvm_tracker_remove_completed(&gpu->parent->access_counters_clear_tracker); + status = uvm_tracker_add_push_safe(&gpu->parent->access_counters_clear_tracker, &push); + uvm_mutex_unlock(&gpu->parent->access_counters_clear_tracker_lock); + + return status; +} + +// Clear the access counter notifications and add it to the per-GPU clear +// tracker. +static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu, + uvm_access_counter_buffer_t *access_counters, + uvm_access_counter_buffer_entry_t **notification_start, + NvU32 num_notifications) +{ + NvU32 i; + NV_STATUS status; + uvm_push_t push; + uvm_access_counter_clear_op_t clear_op; + + clear_op = gpu->parent->host_hal->access_counter_query_clear_op(gpu->parent, notification_start, num_notifications); + if (clear_op == UVM_ACCESS_COUNTER_CLEAR_OP_ALL) + return access_counter_clear_all(gpu, access_counters); + + UVM_ASSERT(clear_op == UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED); + + status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch"); + if (status != NV_OK) { + UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n", + nvstatusToString(status), + uvm_gpu_name(gpu), + access_counters->index); + return status; + } + + for (i = 0; i < num_notifications; i++) + gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]); + + uvm_push_end(&push); + + uvm_mutex_lock(&gpu->parent->access_counters_clear_tracker_lock); + uvm_tracker_remove_completed(&gpu->parent->access_counters_clear_tracker); + status = uvm_tracker_add_push_safe(&gpu->parent->access_counters_clear_tracker, &push); + uvm_mutex_unlock(&gpu->parent->access_counters_clear_tracker_lock); + + return status; } bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index) @@ -373,8 +398,6 @@ NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU3 access_counters->notifications_ignored_count = 0; access_counters->test.reconfiguration_owner = NULL; - uvm_tracker_init(&access_counters->clear_tracker); - access_counters->max_notifications = access_counters->rm_info.bufferSize / parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu); @@ -442,8 +465,6 @@ void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 n UVM_ASSERT(status == NV_OK); access_counters->rm_info.accessCntrBufferHandle = 0; - uvm_tracker_deinit(&access_counters->clear_tracker); - uvm_kvfree(batch_context->notification_cache); uvm_kvfree(batch_context->notifications); batch_context->notification_cache = NULL; @@ -487,7 +508,7 @@ static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, NvU32 index, con if (status != NV_OK) goto error; - status = uvm_tracker_wait(&access_counters->clear_tracker); + status = parent_gpu_clear_tracker_wait(gpu->parent); if (status != NV_OK) goto error; @@ -521,7 +542,7 @@ static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu, NvU32 UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[index].service_lock)); // Wait for any pending clear operation before releasing ownership - status = uvm_tracker_wait(&access_counters->clear_tracker); + status = parent_gpu_clear_tracker_wait(parent_gpu); if (status != NV_OK) UVM_ASSERT(status == uvm_global_get_status()); @@ -1750,28 +1771,21 @@ NV_STATUS uvm_api_clear_all_access_counters(UVM_CLEAR_ALL_ACCESS_COUNTERS_PARAMS uvm_va_space_up_read(va_space); for_each_gpu_in_mask(gpu, retained_gpus) { - NvU32 notif_buf_index; + uvm_access_counter_buffer_t *access_counters; if (!gpu->parent->access_counters_supported) continue; - for (notif_buf_index = 0; notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount; notif_buf_index++) { - uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent, - notif_buf_index); - uvm_access_counters_isr_lock(access_counters); + // clear_all affects all the notification buffers, we issue it for + // the notif_buf_index 0. + access_counters = parent_gpu_access_counter_buffer_get(gpu->parent, 0); + status = access_counter_clear_all(gpu, access_counters); + if (status == NV_OK) + status = parent_gpu_clear_tracker_wait(gpu->parent); - // Access counters are not enabled. Nothing to clear. - if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count) { - status = access_counter_clear_all(gpu, access_counters); - if (status == NV_OK) - status = uvm_tracker_wait(&access_counters->clear_tracker); - } - - uvm_access_counters_isr_unlock(access_counters); - - if (status != NV_OK) - break; - } + // Break the loop if clear_all failed in any of the retained gpus. + if (status != NV_OK) + break; } for_each_gpu_in_mask(gpu, retained_gpus) @@ -2054,7 +2068,9 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS * NV_STATUS status = NV_OK; uvm_gpu_t *gpu = NULL; uvm_va_space_t *va_space = uvm_va_space_get(filp); + uvm_access_counter_buffer_t *access_counters; NvU32 notif_buf_index; + NvBool index0_state; if (params->mode >= UVM_TEST_ACCESS_COUNTER_RESET_MODE_MAX) return NV_ERR_INVALID_ARGUMENT; @@ -2068,51 +2084,52 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS * goto exit_release_gpu; } - for (notif_buf_index = 0; - notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount && status == NV_OK; - notif_buf_index++) { - uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent, - notif_buf_index); + uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock); - uvm_access_counters_isr_lock(access_counters); + // Access counters not enabled. Nothing to reset + if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) { + uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock); + goto exit_release_gpu; + } - // Access counters not enabled. Nothing to reset - if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0) - goto exit_isr_unlock; + uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock); - if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) { - status = access_counter_clear_all(gpu, access_counters); - } - else { - uvm_access_counter_buffer_entry_t entry = { 0 }; - uvm_access_counter_buffer_entry_t *notification = &entry; + // Clear operations affect all notification buffers, we use the + // notif_buf_index = 0; + notif_buf_index = 0; + access_counters = parent_gpu_access_counter_buffer_get(gpu->parent, notif_buf_index); - entry.bank = params->bank; - entry.tag = params->tag; + uvm_access_counters_isr_lock(access_counters); - status = access_counter_clear_notifications(gpu, access_counters, ¬ification, 1); - } + // Recheck access counters are enabled. + index0_state = gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0; + if (index0_state) { + NvU32 i; - if (status == NV_OK) - status = uvm_tracker_wait(&access_counters->clear_tracker); + for (i = notif_buf_index + 1; i < gpu->parent->rm_info.accessCntrBufferCount; i++) + UVM_ASSERT((gpu->parent->isr.access_counters[i].handling_ref_count == 0) == index0_state); + + goto exit_isr_unlock; + } + + if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) { + status = access_counter_clear_all(gpu, access_counters); + } + else { + uvm_access_counter_buffer_entry_t entry = { 0 }; + uvm_access_counter_buffer_entry_t *notification = &entry; + + entry.bank = params->bank; + entry.tag = params->tag; + + status = access_counter_clear_notifications(gpu, access_counters, ¬ification, 1); + } + + if (status == NV_OK) + status = parent_gpu_clear_tracker_wait(gpu->parent); exit_isr_unlock: - uvm_access_counters_isr_unlock(access_counters); - - // We only need to clear_all() once. - if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) { - NvU32 i; - - // Early exit of the main loop; since we only need to clear_all() - // once. Check that all the remaining notification buffers have - // access counters in same state. - NvBool index0_state = (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0); - for (i = notif_buf_index + 1; i < gpu->parent->rm_info.accessCntrBufferCount; i++) - UVM_ASSERT((gpu->parent->isr.access_counters[i].handling_ref_count == 0) == index0_state); - - break; - } - } + uvm_access_counters_isr_unlock(access_counters); exit_release_gpu: uvm_gpu_release(gpu); diff --git a/kernel-open/nvidia-uvm/uvm_hal.c b/kernel-open/nvidia-uvm/uvm_hal.c index 720127b9a..200f477a4 100644 --- a/kernel-open/nvidia-uvm/uvm_hal.c +++ b/kernel-open/nvidia-uvm/uvm_hal.c @@ -218,6 +218,7 @@ static uvm_hal_class_ops_t host_table[] = .clear_faulted_channel_register = uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported, .access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported, .access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported, + .access_counter_query_clear_op = uvm_hal_maxwell_access_counter_query_clear_op_unsupported, .get_time = uvm_hal_maxwell_get_time, } }, @@ -269,6 +270,7 @@ static uvm_hal_class_ops_t host_table[] = .tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test, .access_counter_clear_all = uvm_hal_turing_access_counter_clear_all, .access_counter_clear_targeted = uvm_hal_turing_access_counter_clear_targeted, + .access_counter_query_clear_op = uvm_hal_turing_access_counter_query_clear_op, } }, { @@ -308,12 +310,15 @@ static uvm_hal_class_ops_t host_table[] = .tlb_invalidate_all = uvm_hal_blackwell_host_tlb_invalidate_all, .tlb_invalidate_va = uvm_hal_blackwell_host_tlb_invalidate_va, .tlb_invalidate_test = uvm_hal_blackwell_host_tlb_invalidate_test, + .access_counter_query_clear_op = uvm_hal_blackwell_access_counter_query_clear_op_gb100, } }, { .id = BLACKWELL_CHANNEL_GPFIFO_B, .parent_id = BLACKWELL_CHANNEL_GPFIFO_A, - .u.host_ops = {} + .u.host_ops = { + .access_counter_query_clear_op = uvm_hal_blackwell_access_counter_query_clear_op_gb20x + } }, }; diff --git a/kernel-open/nvidia-uvm/uvm_hal.h b/kernel-open/nvidia-uvm/uvm_hal.h index 387bfebe3..cfe751928 100644 --- a/kernel-open/nvidia-uvm/uvm_hal.h +++ b/kernel-open/nvidia-uvm/uvm_hal.h @@ -703,6 +703,10 @@ typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *pa typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push); typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push, const uvm_access_counter_buffer_entry_t *buffer_entry); +typedef uvm_access_counter_clear_op_t + (*uvm_hal_access_counter_query_clear_op_t)(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries); void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters); void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters); @@ -719,6 +723,10 @@ NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gp void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push); void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push, const uvm_access_counter_buffer_entry_t *buffer_entry); +uvm_access_counter_clear_op_t +uvm_hal_maxwell_access_counter_query_clear_op_unsupported(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries); void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters); void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters); @@ -732,6 +740,18 @@ NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_g void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push); void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push, const uvm_access_counter_buffer_entry_t *buffer_entry); +uvm_access_counter_clear_op_t +uvm_hal_turing_access_counter_query_clear_op(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries); +uvm_access_counter_clear_op_t +uvm_hal_blackwell_access_counter_query_clear_op_gb100(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries); +uvm_access_counter_clear_op_t +uvm_hal_blackwell_access_counter_query_clear_op_gb20x(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries); // The source and destination addresses must be 16-byte aligned. Note that the // best performance is achieved with 256-byte alignment. The decrypt size must @@ -785,6 +805,7 @@ struct uvm_host_hal_struct uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register; uvm_hal_access_counter_clear_all_t access_counter_clear_all; uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted; + uvm_hal_access_counter_query_clear_op_t access_counter_query_clear_op; uvm_hal_get_time_t get_time; }; diff --git a/kernel-open/nvidia-uvm/uvm_hal_types.h b/kernel-open/nvidia-uvm/uvm_hal_types.h index 0763e7115..f84289c14 100644 --- a/kernel-open/nvidia-uvm/uvm_hal_types.h +++ b/kernel-open/nvidia-uvm/uvm_hal_types.h @@ -471,6 +471,13 @@ static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2) return max(membar_1, membar_2); } +typedef enum +{ + UVM_ACCESS_COUNTER_CLEAR_OP_NONE = 0, + UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED, + UVM_ACCESS_COUNTER_CLEAR_OP_ALL +} uvm_access_counter_clear_op_t; + struct uvm_access_counter_buffer_entry_struct { // Address of the region for which a notification was sent diff --git a/kernel-open/nvidia-uvm/uvm_lock.c b/kernel-open/nvidia-uvm/uvm_lock.c index 1ee17044f..1fec9c7d6 100644 --- a/kernel-open/nvidia-uvm/uvm_lock.c +++ b/kernel-open/nvidia-uvm/uvm_lock.c @@ -27,7 +27,7 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order) { - BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 37); + BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 38); switch (lock_order) { UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID); @@ -58,6 +58,7 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order) UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_PMA); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_ROOT_CHUNK); + UVM_ENUM_STRING_CASE(UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHANNEL); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_WLC_CHANNEL); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST); diff --git a/kernel-open/nvidia-uvm/uvm_lock.h b/kernel-open/nvidia-uvm/uvm_lock.h index 2371e0f8c..6286f50e7 100644 --- a/kernel-open/nvidia-uvm/uvm_lock.h +++ b/kernel-open/nvidia-uvm/uvm_lock.h @@ -432,6 +432,11 @@ // Order: UVM_LOCK_ORDER_PMM_ROOT_CHUNK // Exclusive bitlock (mutex) per each root chunk internal to PMM. // +// - Access counters clear operations +// Order: UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS +// +// It protects the parent_gpu's access counters clear tracker. +// // - Channel lock // Order: UVM_LOCK_ORDER_CHANNEL // Spinlock (uvm_spinlock_t) or exclusive lock (mutex) @@ -477,7 +482,7 @@ // // CE semaphore payloads are encrypted, and require to take the CSL lock // (UVM_LOCK_ORDER_LEAF) to decrypt the payload. - +// // - CSL Context // Order: UVM_LOCK_ORDER_CSL_CTX // When the Confidential Computing feature is enabled, encrypt/decrypt @@ -523,6 +528,7 @@ typedef enum UVM_LOCK_ORDER_PMM, UVM_LOCK_ORDER_PMM_PMA, UVM_LOCK_ORDER_PMM_ROOT_CHUNK, + UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS, UVM_LOCK_ORDER_CHANNEL, UVM_LOCK_ORDER_WLC_CHANNEL, UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST, diff --git a/kernel-open/nvidia-uvm/uvm_maxwell_host.c b/kernel-open/nvidia-uvm/uvm_maxwell_host.c index 8b580fd4b..6caee098b 100644 --- a/kernel-open/nvidia-uvm/uvm_maxwell_host.c +++ b/kernel-open/nvidia-uvm/uvm_maxwell_host.c @@ -336,6 +336,15 @@ void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push, UVM_ASSERT_MSG(false, "host access_counter_clear_targeted called on Maxwell GPU\n"); } +uvm_access_counter_clear_op_t +uvm_hal_maxwell_access_counter_query_clear_op_unsupported(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries) +{ + UVM_ASSERT_MSG(false, "host access_counter_query_clear_op called on Maxwell GPU\n"); + return UVM_ACCESS_COUNTER_CLEAR_OP_NONE; +} + NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu) { NvU32 time0; diff --git a/kernel-open/nvidia-uvm/uvm_turing_host.c b/kernel-open/nvidia-uvm/uvm_turing_host.c index 0d31e3b89..2363d36b5 100644 --- a/kernel-open/nvidia-uvm/uvm_turing_host.c +++ b/kernel-open/nvidia-uvm/uvm_turing_host.c @@ -1,5 +1,5 @@ /******************************************************************************* - Copyright (c) 2017-2024 NVIDIA Corporation + Copyright (c) 2017-2025 NVIDIA Corporation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to @@ -382,3 +382,11 @@ void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push, HWCONST(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_TYPE, MIMC) | HWVALUE(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_BANK, buffer_entry->bank)); } + +uvm_access_counter_clear_op_t +uvm_hal_turing_access_counter_query_clear_op(uvm_parent_gpu_t *parent_gpu, + uvm_access_counter_buffer_entry_t **buffer_entries, + NvU32 num_entries) +{ + return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED; +} diff --git a/kernel-open/nvidia/nvidia.Kbuild b/kernel-open/nvidia/nvidia.Kbuild index 6ae67c3f9..81b3cadbd 100644 --- a/kernel-open/nvidia/nvidia.Kbuild +++ b/kernel-open/nvidia/nvidia.Kbuild @@ -260,6 +260,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += foll_longterm_present NV_CONFTEST_TYPE_COMPILE_TESTS += bus_type_has_iommu_ops NV_CONFTEST_TYPE_COMPILE_TESTS += class_create_has_no_owner_arg NV_CONFTEST_TYPE_COMPILE_TESTS += class_devnode_has_const_arg +NV_CONFTEST_TYPE_COMPILE_TESTS += has_enum_pidtype_tgid NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build diff --git a/kernel-open/nvidia/os-interface.c b/kernel-open/nvidia/os-interface.c index 9bd1e99d5..20e6e6826 100644 --- a/kernel-open/nvidia/os-interface.c +++ b/kernel-open/nvidia/os-interface.c @@ -2644,7 +2644,11 @@ NV_STATUS NV_API_CALL os_offline_page_at_address void* NV_API_CALL os_get_pid_info(void) { - return get_task_pid(current, PIDTYPE_PID); +#if defined(NV_HAS_ENUM_PIDTYPE_TGID) + return get_task_pid(current, PIDTYPE_TGID); +#else + return get_task_pid(current->group_leader, PIDTYPE_PID); +#endif } void NV_API_CALL os_put_pid_info(void *pid_info) diff --git a/src/common/displayport/src/dp_connectorimpl2x.cpp b/src/common/displayport/src/dp_connectorimpl2x.cpp index 9f74a6554..70c45e20e 100644 --- a/src/common/displayport/src/dp_connectorimpl2x.cpp +++ b/src/common/displayport/src/dp_connectorimpl2x.cpp @@ -490,12 +490,22 @@ bool ConnectorImpl2x::compoundQueryAttachMSTGeneric(Group * target, tail->bandwidth.compound_query_state.totalTimeSlots) { compoundQueryResult = false; + if(this->bEnableLowerBppCheckForDsc) + { + tail->bandwidth.compound_query_state.timeslots_used_by_query -= linkConfig->slotsForPBN(base_pbn); + tail->bandwidth.compound_query_state.bandwidthAllocatedForIndex &= ~(1 << compoundQueryCount); + } SET_DP_IMP_ERROR(pErrorCode, DP_IMP_ERROR_INSUFFICIENT_BANDWIDTH) } } tail = (DeviceImpl*)tail->getParent(); } } + // If the compoundQueryResult is false, we need to reset the compoundQueryLocalLinkPBN + if (!compoundQueryResult && this->bEnableLowerBppCheckForDsc) + { + compoundQueryLocalLinkPBN -= slots_pbn; + } } else { diff --git a/src/common/inc/nvBldVer.h b/src/common/inc/nvBldVer.h index daafd3605..318bcf07f 100644 --- a/src/common/inc/nvBldVer.h +++ b/src/common/inc/nvBldVer.h @@ -36,25 +36,25 @@ // and then checked back in. You cannot make changes to these sections without // corresponding changes to the buildmeister script #ifndef NV_BUILD_BRANCH - #define NV_BUILD_BRANCH r570_00 + #define NV_BUILD_BRANCH r573_07 #endif #ifndef NV_PUBLIC_BRANCH - #define NV_PUBLIC_BRANCH r570_00 + #define NV_PUBLIC_BRANCH r573_07 #endif #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) -#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r570/r570_00-407" -#define NV_BUILD_CHANGELIST_NUM (35817632) +#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r570/r573_07-429" +#define NV_BUILD_CHANGELIST_NUM (36009859) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "rel/gpu_drv/r570/r570_00-407" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35817632) +#define NV_BUILD_NAME "rel/gpu_drv/r570/r573_07-429" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36009859) #else /* Windows builds */ -#define NV_BUILD_BRANCH_VERSION "r570_00-404" -#define NV_BUILD_CHANGELIST_NUM (35811382) -#define NV_BUILD_TYPE "Nightly" -#define NV_BUILD_NAME "r570_00-250410" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35811869) +#define NV_BUILD_BRANCH_VERSION "r573_07-1" +#define NV_BUILD_CHANGELIST_NUM (35886817) +#define NV_BUILD_TYPE "Official" +#define NV_BUILD_NAME "573.08" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35886817) #define NV_BUILD_BRANCH_BASE_VERSION R570 #endif // End buildmeister python edited section diff --git a/src/common/inc/nvUnixVersion.h b/src/common/inc/nvUnixVersion.h index 278fe8c79..8fc54639e 100644 --- a/src/common/inc/nvUnixVersion.h +++ b/src/common/inc/nvUnixVersion.h @@ -4,7 +4,7 @@ #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \ (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1) -#define NV_VERSION_STRING "570.144" +#define NV_VERSION_STRING "570.148.08" #else diff --git a/src/common/nvswitch/kernel/ls10/intr_ls10.c b/src/common/nvswitch/kernel/ls10/intr_ls10.c index d51919abf..bb4727b78 100644 --- a/src/common/nvswitch/kernel/ls10/intr_ls10.c +++ b/src/common/nvswitch/kernel/ls10/intr_ls10.c @@ -6349,7 +6349,8 @@ _nvswitch_deferred_link_state_check_ls10 lastLinkUpTime = chip_device->deferredLinkErrors[link].state.lastLinkUpTime; lastRetrainTime = chip_device->deferredLinkErrors[link].state.lastRetrainTime; // Sanity Check - NVSWITCH_ASSERT(nvswitch_is_link_valid(device, link)); + if (!nvswitch_is_link_valid(device, link)) + return; chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled = NV_FALSE; bRedeferLinkStateCheck = NV_FALSE; diff --git a/src/common/nvswitch/kernel/ls10/minion_ls10.c b/src/common/nvswitch/kernel/ls10/minion_ls10.c index 69cfe07f6..e40370682 100644 --- a/src/common/nvswitch/kernel/ls10/minion_ls10.c +++ b/src/common/nvswitch/kernel/ls10/minion_ls10.c @@ -941,7 +941,6 @@ cleanup: if (tempStatus != NVL_SUCCESS) { NVSWITCH_PRINT(device, ERROR, "Link %d Inband Buffer transfer for RX_BUFFER_CLEAR\n", linkId); - return; } if (device->link[linkId].inbandData.message != NULL) { diff --git a/src/common/sdk/nvidia/inc/nvos.h b/src/common/sdk/nvidia/inc/nvos.h index 39ce059de..dc532bda8 100644 --- a/src/common/sdk/nvidia/inc/nvos.h +++ b/src/common/sdk/nvidia/inc/nvos.h @@ -79,6 +79,7 @@ extern "C" { #define NVOS_STATUS_ERROR_ILLEGAL_ACTION NV_ERR_ILLEGAL_ACTION #define NVOS_STATUS_ERROR_IN_USE NV_ERR_STATE_IN_USE #define NVOS_STATUS_ERROR_INSUFFICIENT_RESOURCES NV_ERR_INSUFFICIENT_RESOURCES +#define NVOS_STATUS_ERROR_INSUFFICIENT_ZBC_ENTRY NV_ERR_INSUFFICIENT_ZBC_ENTRY #define NVOS_STATUS_ERROR_INVALID_ACCESS_TYPE NV_ERR_INVALID_ACCESS_TYPE #define NVOS_STATUS_ERROR_INVALID_ARGUMENT NV_ERR_INVALID_ARGUMENT #define NVOS_STATUS_ERROR_INVALID_BASE NV_ERR_INVALID_BASE diff --git a/src/nvidia/generated/g_chipset_nvoc.h b/src/nvidia/generated/g_chipset_nvoc.h index 8635511ef..96635df25 100644 --- a/src/nvidia/generated/g_chipset_nvoc.h +++ b/src/nvidia/generated/g_chipset_nvoc.h @@ -157,6 +157,9 @@ struct PCIECONFIGSPACEBASE #define CL_AER_ERROR_SOURCE (CL_AER_BEGIN + 0x34) #define CL_AER_END (CL_AER_BEGIN + 0x34) +// Advanced Error Reporting Root Error Status ERR_COR Subclass Capable Mask +#define CL_AER_ROOT_ERROR_STATUS_ERR_COR_SUBCLASS_MASK (NVBIT32(7) | NVBIT32(8)) + // PCI Express Device Capabilities 2 #define CL_PCIE_DEV_CAP_2_ATOMICS_SUPPORTED_BIT NVBIT(6) #define CL_PCIE_DEV_CAP_2_ATOMIC_32BIT NVBIT(7) diff --git a/src/nvidia/generated/g_gpu_access_nvoc.h b/src/nvidia/generated/g_gpu_access_nvoc.h index 3a6e29e0c..ebf08fe81 100644 --- a/src/nvidia/generated/g_gpu_access_nvoc.h +++ b/src/nvidia/generated/g_gpu_access_nvoc.h @@ -228,6 +228,14 @@ void regCheckAndLogReadFailure(RegisterAccess *, NvU32 addr, NvU32 mask, NvU32 v // Get the address of a register given the Aperture and offset. #define REG_GET_ADDR(ap, offset) ioaprtGetRegAddr(ap, offset) +// +// These UNCHECKED macros are provided for extenuating circumstances to avoid the 0xbadf +// sanity checking done by the usual register read utilities and must not be used generally +// +// +#define GPU_REG_RD08_UNCHECKED(g,a) osDevReadReg008(g, gpuGetDeviceMapping(g, DEVICE_INDEX_GPU, 0), a) +#define GPU_REG_RD32_UNCHECKED(g,a) osDevReadReg032(g, gpuGetDeviceMapping(g, DEVICE_INDEX_GPU, 0), a) + // GPU macros defined in terms of DEV_ macros #define GPU_REG_RD08(g,a) REG_INST_RD08(g,GPU,0,a) #define GPU_REG_RD16(g,a) REG_INST_RD16(g,GPU,0,a) diff --git a/src/nvidia/generated/g_kern_disp_nvoc.h b/src/nvidia/generated/g_kern_disp_nvoc.h index ade3e1e1d..bbb77271e 100644 --- a/src/nvidia/generated/g_kern_disp_nvoc.h +++ b/src/nvidia/generated/g_kern_disp_nvoc.h @@ -14,7 +14,7 @@ extern "C" { #endif /* - * SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -212,12 +212,13 @@ struct KernelDisplay { NvU32 (*__kdispServiceAwakenIntr__)(OBJGPU *, struct KernelDisplay * /*this*/, struct THREAD_STATE_NODE *); // halified (2 hals) body NV_STATUS (*__kdispComputeDpModeSettings__)(OBJGPU *, struct KernelDisplay * /*this*/, NvU32, DPMODESETDATA *, DPIMPINFO *); // halified (3 hals) body - // 6 PDB properties + // 7 PDB properties NvBool PDB_PROP_KDISP_IMP_ENABLE; NvBool PDB_PROP_KDISP_IMP_ALLOC_BW_IN_KERNEL_RM_DEF; NvBool PDB_PROP_KDISP_FEATURE_STRETCH_VBLANK_CAPABLE; NvBool PDB_PROP_KDISP_IN_AWAKEN_INTR; NvBool PDB_PROP_KDISP_HAS_SEPARATE_LOW_LATENCY_LINE; + NvBool PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED; // Data members struct DisplayInstanceMemory *pInst; @@ -302,6 +303,8 @@ extern const struct NVOC_CLASS_DEF __nvoc_class_def_KernelDisplay; #define PDB_PROP_KDISP_IMP_ENABLE_BASE_NAME PDB_PROP_KDISP_IMP_ENABLE #define PDB_PROP_KDISP_FEATURE_STRETCH_VBLANK_CAPABLE_BASE_CAST #define PDB_PROP_KDISP_FEATURE_STRETCH_VBLANK_CAPABLE_BASE_NAME PDB_PROP_KDISP_FEATURE_STRETCH_VBLANK_CAPABLE +#define PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED_BASE_CAST +#define PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED_BASE_NAME PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED NV_STATUS __nvoc_objCreateDynamic_KernelDisplay(KernelDisplay**, Dynamic*, NvU32, va_list); diff --git a/src/nvidia/generated/g_nv_name_released.h b/src/nvidia/generated/g_nv_name_released.h index c7f5b6741..3ef649616 100644 --- a/src/nvidia/generated/g_nv_name_released.h +++ b/src/nvidia/generated/g_nv_name_released.h @@ -5430,6 +5430,7 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2C19, 0x0000, 0x0000, "NVIDIA GeForce RTX 5080 Laptop GPU" }, { 0x2C58, 0x0000, 0x0000, "NVIDIA GeForce RTX 5090 Laptop GPU" }, { 0x2C59, 0x0000, 0x0000, "NVIDIA GeForce RTX 5080 Laptop GPU" }, + { 0x2D04, 0x0000, 0x0000, "NVIDIA GeForce RTX 5060 Ti" }, { 0x2F04, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070" }, { 0x2F18, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070 Ti Laptop GPU" }, { 0x2F58, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070 Ti Laptop GPU" }, diff --git a/src/nvidia/interface/nvrm_registry.h b/src/nvidia/interface/nvrm_registry.h index b2e80c50d..9eb7e915e 100644 --- a/src/nvidia/interface/nvrm_registry.h +++ b/src/nvidia/interface/nvrm_registry.h @@ -64,7 +64,7 @@ // Type Dword // Change all RM internal timeouts to experiment with Bug 5203024. // -// Some timeouts may still silently clamp to differnt min/max values and this +// Some timeouts may still silently clamp to different min/max values and this // regkey does NOT validate their range. // #define NV_REG_STR_RM_BUG5203024_OVERRIDE_TIMEOUT "RmOverrideInternalTimeoutsMs" @@ -84,6 +84,20 @@ #define NV_REG_STR_RM_BUG5203024_OVERRIDE_TIMEOUT_FLAGS_SET_FECS_WATCHDOG_TIMEOUT 26:26 +// +// This regkey is experimental +// +// Type Dword +// Change video Watchdog and GP timeouts to experiment with Bug 5203024. +// Stores the timeout value in ms. +// If this regkey is set and has non-zero value, also disables MB timeouts. +// +// Some timeouts may still silently clamp to different min/max values and this +// regkey does NOT validate their range. +// +#define NV_REG_STR_RM_BUG5203024_OVERRIDE_VIDEO_TIMEOUT "RmVideoEngineTimeoutMs" + + // // Type Dword // Override default RM timeout. Measured in milliseconds. @@ -1332,6 +1346,11 @@ #define NV_REG_STR_RM_INTR_LOCKING_MODE_DEFAULT (0x00000000) #define NV_REG_STR_RM_INTR_LOCKING_MODE_INTR_MASK (0x00000001) +#define NV_REG_INTERNAL_PANEL_DISCONNECTED "RMInternalPanelDisconnected" +#define NV_REG_INTERNAL_PANEL_DISCONNECTED_DISABLE 0x00000000 +#define NV_REG_INTERNAL_PANEL_DISCONNECTED_ENABLE 0x00000001 +#define NV_REG_INTERNAL_PANEL_DISCONNECTED_DEFAULT RM_REG_INTERNAL_PANEL_DISCONNECTED_DISABLE + #define NV_REG_STR_RM_PER_INTR_DPC_QUEUING "RMDisablePerIntrDPCQueueing" // Type DWORD // This regkey is used to disable per interrupt DPC queuing. @@ -1398,14 +1417,14 @@ #define NV_REG_STR_RM_RC_WATCHDOG_DEFAULT NV_REG_STR_RM_RC_WATCHDOG_ENABLE #define NV_REG_STR_RM_WATCHDOG_TIMEOUT "RmWatchDogTimeOut" -#define NV_REG_STR_RM_WATCHDOG_TIMEOUT_LOW 0x00000007 -#define NV_REG_STR_RM_WATCHDOG_TIMEOUT_HI 0x0000000C -#define NV_REG_STR_RM_WATCHDOG_TIMEOUT_DEFAULT NV_REG_STR_RM_WATCHDOG_TIMEOUT_LOW +#define NV_REG_STR_RM_WATCHDOG_TIMEOUT_LOW 5 +#define NV_REG_STR_RM_WATCHDOG_TIMEOUT_HI 60 +#define NV_REG_STR_RM_WATCHDOG_TIMEOUT_DEFAULT 7 -#define NV_REG_STR_RM_WATCHDOG_INTERVAL "RmWatchDogInterval" -#define NV_REG_STR_RM_WATCHDOG_INTERVAL_LOW 0x00000007 -#define NV_REG_STR_RM_WATCHDOG_INTERVAL_HI 0x0000000C -#define NV_REG_STR_RM_WATCHDOG_INTERVAL_DEFAULT NV_REG_STR_RM_WATCHDOG_INTERVAL_LOW +#define NV_REG_STR_RM_WATCHDOG_INTERVAL "RmWatchDogInterval" +#define NV_REG_STR_RM_WATCHDOG_INTERVAL_LOW 5 +#define NV_REG_STR_RM_WATCHDOG_INTERVAL_HI 30 +#define NV_REG_STR_RM_WATCHDOG_INTERVAL_DEFAULT 7 // Enable/Disable watchcat in GSP-Plugin for Guest RPC // Default is Enabled diff --git a/src/nvidia/kernel/inc/gpuvideo/rmifvideng.h b/src/nvidia/kernel/inc/gpuvideo/rmifvideng.h index 8296b1496..0be774785 100644 --- a/src/nvidia/kernel/inc/gpuvideo/rmifvideng.h +++ b/src/nvidia/kernel/inc/gpuvideo/rmifvideng.h @@ -85,8 +85,20 @@ typedef struct * You can reorder or change below this point but update version. * Make sure to align it to 16B as ucode expect 16byte alignment to DMA efficiently. */ - NvU32 videoPgPmuHandshake; // Handshake between PMU and Video Ucode for SW controlled IDLE signal. - NvU64 rsvd2; // reserved field + NvU32 videoPgPmuHandshake; // Handshake between PMU and Video Ucode for SW controlled IDLE signal. + + /* + * WAR to avoid triggering HS signing on older chips, as these bootargs are only used on RISCV anyways. + * Always define videngTimeoutMs for RM builds. Assumption is that ENG_TYPE will be undefined for RM builds + * Only define it in video ucode builds that enable GB10X_OVERRIDE_TIMEOUT in build cfgs. + */ +#if (!defined(ENG_TYPE) || defined(GB10X_OVERRIDE_TIMEOUT)) + NvU32 videngTimeoutMs; // SW WAR for bug 5203864, increasing timeouts on GB10x via regkey RmOverrideInternalTimeoutsMs. + // Specify the timeout value in ms. If the value is 0, do not override the timeouts sent from KMD and/or UMD. + NvU32 rsvd2; // reserved field +#else + NvU64 rsvd2; // reserved field +#endif } NV_VIDENG_BOOT_PARAMS, *PNV_VIDENG_BOOT_PARAMS; #endif // RMIFVIDENG_H diff --git a/src/nvidia/src/kernel/compute/imex_session_api.c b/src/nvidia/src/kernel/compute/imex_session_api.c index 1eaac97db..091735401 100644 --- a/src/nvidia/src/kernel/compute/imex_session_api.c +++ b/src/nvidia/src/kernel/compute/imex_session_api.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -339,7 +339,7 @@ imexsessionapiDestruct_IMPL if (_checkDanglingExports(RES_GET_CLIENT(pImexSessionApi))) { fabricDisableMemAlloc(pFabric); - NV_PRINTF(LEVEL_ERROR, "Abrupt nvidia-imex daemon shutdown detected, disabled fabric allocations!\n"); + NV_PRINTF(LEVEL_ERROR, "Abrupt nvidia-imex daemon shutdown detected, disabled fabric allocations\n"); } // Invalidate export cache to block future imports on this node ID. @@ -361,7 +361,7 @@ imexsessionapiDestruct_IMPL { if (rcAndDisableOutstandingClientsWithImportedMemory(NULL, NV_FABRIC_INVALID_NODE_ID)) { - NV_PRINTF(LEVEL_ERROR, "Abrupt nvidia-imex daemon shutdown detected, robust channel recovery invoked!\n"); + NV_PRINTF(LEVEL_ERROR, "Abrupt nvidia-imex daemon shutdown detected, robust channel recovery invoked\n"); } } } @@ -426,7 +426,10 @@ imexsessionapiCtrlCmdDisableImporters_IMPL return NV_ERR_NOT_SUPPORTED; if (rcAndDisableOutstandingClientsWithImportedMemory(NULL, pParams->nodeId)) - NV_PRINTF(LEVEL_ERROR, "nvidia-imex daemon has invoked robust channel recovery!\n"); + { + NV_PRINTF(LEVEL_ERROR, "nvidia-imex daemon has invoked robust channel recovery for remote node: %u\n", + pParams->nodeId); + } return NV_OK; } diff --git a/src/nvidia/src/kernel/gpu/arch/hopper/kern_gpu_gh100.c b/src/nvidia/src/kernel/gpu/arch/hopper/kern_gpu_gh100.c index e9e7012e1..d9d235d85 100644 --- a/src/nvidia/src/kernel/gpu/arch/hopper/kern_gpu_gh100.c +++ b/src/nvidia/src/kernel/gpu/arch/hopper/kern_gpu_gh100.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -22,6 +22,7 @@ */ #include "gpu/gpu.h" +#include "gpu/gpu_access.h" #include "gpu/gpu_child_class_defs.h" #include "os/os.h" #include "nverror.h" @@ -101,7 +102,7 @@ gpuReadPassThruConfigReg_GH100 NvU32 *pData ) { - *pData = GPU_REG_RD32(pGpu, DEVICE_BASE(NV_EP_PCFGM) + index); + *pData = GPU_REG_RD32_UNCHECKED(pGpu, DEVICE_BASE(NV_EP_PCFGM) + index); return NV_OK; } diff --git a/src/nvidia/src/kernel/gpu/disp/kern_disp.c b/src/nvidia/src/kernel/gpu/disp/kern_disp.c index b90fa2906..932e7b104 100644 --- a/src/nvidia/src/kernel/gpu/disp/kern_disp.c +++ b/src/nvidia/src/kernel/gpu/disp/kern_disp.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -102,6 +102,7 @@ kdispConstructEngine_IMPL(OBJGPU *pGpu, ENGDESCRIPTOR engDesc) { NV_STATUS status; + NvU32 data; // // NOTE: DO NOT call IpVersion _HAL functions in ConstructEngine. @@ -140,6 +141,20 @@ kdispConstructEngine_IMPL(OBJGPU *pGpu, pKernelDisplay->pLowLatencySpinLock = (PORT_SPINLOCK *) portSyncSpinlockCreate(portMemAllocatorGetGlobalNonPaged()); NV_ASSERT_OR_RETURN((pKernelDisplay->pLowLatencySpinLock != NULL), NV_ERR_INSUFFICIENT_RESOURCES); + if ((osReadRegistryDword(pGpu, NV_REG_INTERNAL_PANEL_DISCONNECTED, &data) == NV_OK) + && (data == NV_REG_INTERNAL_PANEL_DISCONNECTED_ENABLE)) + { + pKernelDisplay->setProperty(pKernelDisplay, + PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED, + NV_TRUE); + } + else + { + pKernelDisplay->setProperty(pKernelDisplay, + PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED, + NV_FALSE); + } + return status; } @@ -342,14 +357,17 @@ kdispInitBrightcStateLoad_IMPL(OBJGPU *pGpu, portMemSet(pBrightcInfo, 0, sizeof(*pBrightcInfo)); pBrightcInfo->status = status; - if ((pKernelDisplay != NULL) && (pKernelDisplay->pStaticInfo->internalDispActiveMask != 0) && !bInternalSkuFuseEnabled) + if ((pKernelDisplay != NULL) + && (pKernelDisplay->pStaticInfo->internalDispActiveMask != 0) + && !(bInternalSkuFuseEnabled + || (pKernelDisplay->getProperty(pKernelDisplay, PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED)))) { // Fill in the Backlight Method Data. pBrightcInfo->backLightDataSize = sizeof(pBrightcInfo->backLightData); status = osCallACPI_DSM(pGpu, ACPI_DSM_FUNCTION_CURRENT, NV_ACPI_GENERIC_FUNC_GETBACKLIGHT, (NvU32 *)(pBrightcInfo->backLightData), &pBrightcInfo->backLightDataSize); - pBrightcInfo->status = status; + pBrightcInfo->status = status; } status = pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice, diff --git a/src/nvidia/src/kernel/gpu/perf/kern_perf_ctrl.c b/src/nvidia/src/kernel/gpu/perf/kern_perf_ctrl.c index 41927931f..9feef391a 100644 --- a/src/nvidia/src/kernel/gpu/perf/kern_perf_ctrl.c +++ b/src/nvidia/src/kernel/gpu/perf/kern_perf_ctrl.c @@ -111,6 +111,10 @@ subdeviceCtrlCmdPerfGetGpumonPerfmonUtilSamplesV2_KERNEL pParams, sizeof(*pParams))); + // Skip translation if the request from root namespace. + if (osIsInitNs()) + return NV_OK; + // Now translate the sample's procId. numEntries = pParams->bufSize / sizeof (NV2080_CTRL_PERF_GPUMON_PERFMON_UTIL_SAMPLE); NV_ASSERT_OR_RETURN(numEntries <= NV2080_CTRL_PERF_GPUMON_SAMPLE_COUNT_PERFMON_UTIL, diff --git a/src/nvidia/src/kernel/gpu/rc/kernel_rc.c b/src/nvidia/src/kernel/gpu/rc/kernel_rc.c index c1ebf8e21..c620a51a5 100644 --- a/src/nvidia/src/kernel/gpu/rc/kernel_rc.c +++ b/src/nvidia/src/kernel/gpu/rc/kernel_rc.c @@ -470,8 +470,8 @@ krcCheckBusError_KERNEL &clDevCtrlStatus) == NV_OK && clDevCtrlStatusFlags != 0) { - NV_PRINTF(LEVEL_ERROR, - "PCI-E corelogic status has pending errors (CL_PCIE_DEV_CTRL_STATUS = %08X):\n", + NV_PRINTF(LEVEL_INFO, + "PCI-E corelogic: Pending errors in DEV_CTRL_STATUS = %08X\n", clDevCtrlStatus); clDevCtrlStatusFlags_Org = clDevCtrlStatusFlags; @@ -479,7 +479,7 @@ krcCheckBusError_KERNEL if (clDevCtrlStatusFlags & NV2080_CTRL_BUS_INFO_PCIE_LINK_ERRORS_CORR_ERROR) { - NV_PRINTF(LEVEL_ERROR, " _CORR_ERROR_DETECTED\n"); + NV_PRINTF(LEVEL_INFO, "PCI-E corelogic: CORR_ERROR_DETECTED\n"); // not much interested in this one clDevCtrlStatusFlags &= ~NV2080_CTRL_BUS_INFO_PCIE_LINK_ERRORS_CORR_ERROR; @@ -487,26 +487,27 @@ krcCheckBusError_KERNEL if (clDevCtrlStatusFlags & NV2080_CTRL_BUS_INFO_PCIE_LINK_ERRORS_NON_FATAL_ERROR) { - NV_PRINTF(LEVEL_ERROR, " _NON_FATAL_ERROR_DETECTED\n"); + NV_PRINTF(LEVEL_INFO, "PCI-E corelogic: NON_FATAL_ERROR_DETECTED\n"); } if (clDevCtrlStatusFlags & NV2080_CTRL_BUS_INFO_PCIE_LINK_ERRORS_FATAL_ERROR) { - NV_PRINTF(LEVEL_ERROR, " _FATAL_ERROR_DETECTED\n"); + NV_PRINTF(LEVEL_ERROR, "PCI-E corelogic: FATAL_ERROR_DETECTED\n"); } if (clDevCtrlStatusFlags & NV2080_CTRL_BUS_INFO_PCIE_LINK_ERRORS_UNSUPP_REQUEST) { - NV_PRINTF(LEVEL_ERROR, " _UNSUPP_REQUEST_DETECTED\n"); + NV_PRINTF(LEVEL_INFO, "PCI-E corelogic: UNSUPP_REQUEST_DETECTED\n"); } } // Corelogic AER if (pCl != NULL && clPcieReadAerCapability(pGpu, pCl, &clAer) == NV_OK && - (clAer.UncorrErrStatusReg != 0 || clAer.RooErrStatus != 0)) + (clAer.UncorrErrStatusReg != 0 || + (clAer.RooErrStatus & ~CL_AER_ROOT_ERROR_STATUS_ERR_COR_SUBCLASS_MASK) != 0)) { NV_PRINTF(LEVEL_ERROR, - "PCE-I Advanced Error Reporting Corelogic Info:\n"); + "PCI-E Advanced Error Reporting Corelogic Info:\n"); NV_PRINTF(LEVEL_ERROR, " Uncorr Error Status Register : %08X\n", clAer.UncorrErrStatusReg); diff --git a/src/nvidia/src/kernel/platform/acpi_common.c b/src/nvidia/src/kernel/platform/acpi_common.c index db6383151..6341418e8 100644 --- a/src/nvidia/src/kernel/platform/acpi_common.c +++ b/src/nvidia/src/kernel/platform/acpi_common.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2000-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2000-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -40,6 +40,9 @@ #include "mxm_spec.h" #include "gpu/gsp/gsp_static_config.h" #include "platform/nbsi/nbsi_read.h" +#include "nvrm_registry.h" + +#include "gpu/disp/kern_disp.h" // // DSM ACPI Routines common routines for Linux @@ -706,6 +709,17 @@ checkDsmCall NV_ASSERT_OR_RETURN(pInOut, NV_ERR_INVALID_ARGUMENT); NV_ASSERT_OR_RETURN(pSize, NV_ERR_INVALID_ARGUMENT); + KernelDisplay *pKernelDisplay = GPU_GET_KERNEL_DISPLAY(pGpu); + + if (pKernelDisplay != NULL + && pKernelDisplay->getProperty(pKernelDisplay, PDB_PROP_KDISP_INTERNAL_PANEL_DISCONNECTED)) + { + if (*pAcpiDsmFunction == ACPI_DSM_FUNCTION_NBCI) + { + return NV_ERR_NOT_SUPPORTED; + } + } + // Do any remapping of subfunction if function is current if (remapDsmFunctionAndSubFunction(pGpu, pAcpiDsmFunction, pAcpiDsmSubFunction) != NV_OK) { diff --git a/src/nvidia/src/kernel/vgpu/rpc.c b/src/nvidia/src/kernel/vgpu/rpc.c index 2042a7ce0..55dc65972 100644 --- a/src/nvidia/src/kernel/vgpu/rpc.c +++ b/src/nvidia/src/kernel/vgpu/rpc.c @@ -9698,6 +9698,8 @@ NV_STATUS rpcDumpProtobufComponent_v18_12 if (IS_GSP_CLIENT(pGpu)) { rpc_dump_protobuf_component_v18_12 *rpc_params = &rpc_message->dump_protobuf_component_v18_12; + const NvU32 fixed_param_size = sizeof(rpc_message_header_v) + sizeof(*rpc_params); + NV_ASSERT_OR_RETURN(fixed_param_size <= pRpc->maxRpcSize, NV_ERR_INVALID_STATE); status = rpcWriteCommonHeader(pGpu, pRpc, NV_VGPU_MSG_FUNCTION_DUMP_PROTOBUF_COMPONENT, sizeof(*rpc_params)); @@ -9709,7 +9711,7 @@ NV_STATUS rpcDumpProtobufComponent_v18_12 rpc_params->countOnly = ((pPrbEnc->flags & PRB_COUNT_ONLY) != 0); rpc_params->bugCheckCode = pNvDumpState->bugCheckCode; rpc_params->internalCode = pNvDumpState->internalCode; - rpc_params->bufferSize = NV_MIN(pRpc->maxRpcSize, prbEncBufLeft(pPrbEnc)); + rpc_params->bufferSize = NV_MIN(pRpc->maxRpcSize - fixed_param_size, prbEncBufLeft(pPrbEnc)); status = _issueRpcAndWait(pGpu, pRpc); diff --git a/version.mk b/version.mk index b746a69a9..cd4229d28 100644 --- a/version.mk +++ b/version.mk @@ -1,4 +1,4 @@ -NVIDIA_VERSION = 570.144 +NVIDIA_VERSION = 570.148.08 # This file. VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))