575.57.08

This commit is contained in:
Maneet Singh
2025-05-29 10:58:21 -07:00
parent e00332b05f
commit 30e15d79de
74 changed files with 798 additions and 417 deletions

View File

@@ -75,21 +75,14 @@ $(foreach _module, $(NV_KERNEL_MODULES), \
$(eval include $(src)/$(_module)/$(_module).Kbuild))
#
# Define CFLAGS that apply to all the NVIDIA kernel modules. EXTRA_CFLAGS
# is deprecated since 2.6.24 in favor of ccflags-y, but we need to support
# older kernels which do not have ccflags-y. Newer kernels append
# $(EXTRA_CFLAGS) to ccflags-y for compatibility.
#
EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"575.51.03\"
ccflags-y += -I$(src)/common/inc
ccflags-y += -I$(src)
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
ccflags-y += -DNV_VERSION_STRING=\"575.57.08\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
ccflags-y += -I$(SYSSRCHOST1X)
endif
# Some Android kernels prohibit driver use of filesystem functions like
@@ -99,57 +92,57 @@ endif
PLATFORM_IS_ANDROID ?= 0
ifeq ($(PLATFORM_IS_ANDROID),1)
EXTRA_CFLAGS += -DNV_FILESYSTEM_ACCESS_AVAILABLE=0
ccflags-y += -DNV_FILESYSTEM_ACCESS_AVAILABLE=0
else
EXTRA_CFLAGS += -DNV_FILESYSTEM_ACCESS_AVAILABLE=1
ccflags-y += -DNV_FILESYSTEM_ACCESS_AVAILABLE=1
endif
EXTRA_CFLAGS += -Wno-unused-function
ccflags-y += -Wno-unused-function
ifneq ($(NV_BUILD_TYPE),debug)
EXTRA_CFLAGS += -Wuninitialized
ccflags-y += -Wuninitialized
endif
EXTRA_CFLAGS += -fno-strict-aliasing
ccflags-y += -fno-strict-aliasing
ifeq ($(ARCH),arm64)
EXTRA_CFLAGS += -mstrict-align
ccflags-y += -mstrict-align
endif
ifeq ($(NV_BUILD_TYPE),debug)
EXTRA_CFLAGS += -g
ccflags-y += -g
endif
EXTRA_CFLAGS += -ffreestanding
ccflags-y += -ffreestanding
ifeq ($(ARCH),arm64)
EXTRA_CFLAGS += -mgeneral-regs-only -march=armv8-a
EXTRA_CFLAGS += $(call cc-option,-mno-outline-atomics,)
ccflags-y += -mgeneral-regs-only -march=armv8-a
ccflags-y += $(call cc-option,-mno-outline-atomics,)
endif
ifeq ($(ARCH),x86_64)
EXTRA_CFLAGS += -mno-red-zone -mcmodel=kernel
ccflags-y += -mno-red-zone -mcmodel=kernel
endif
ifeq ($(ARCH),powerpc)
EXTRA_CFLAGS += -mlittle-endian -mno-strict-align
ccflags-y += -mlittle-endian -mno-strict-align
endif
EXTRA_CFLAGS += -DNV_UVM_ENABLE
EXTRA_CFLAGS += $(call cc-option,-Werror=undef,)
EXTRA_CFLAGS += -DNV_SPECTRE_V2=$(NV_SPECTRE_V2)
EXTRA_CFLAGS += -DNV_KERNEL_INTERFACE_LAYER
ccflags-y += -DNV_UVM_ENABLE
ccflags-y += $(call cc-option,-Werror=undef,)
ccflags-y += -DNV_SPECTRE_V2=$(NV_SPECTRE_V2)
ccflags-y += -DNV_KERNEL_INTERFACE_LAYER
#
# Detect SGI UV systems and apply system-specific optimizations.
#
ifneq ($(wildcard /proc/sgi_uv),)
EXTRA_CFLAGS += -DNV_CONFIG_X86_UV
ccflags-y += -DNV_CONFIG_X86_UV
endif
ifdef VGX_FORCE_VFIO_PCI_CORE
EXTRA_CFLAGS += -DNV_VGPU_FORCE_VFIO_PCI_CORE
ccflags-y += -DNV_VGPU_FORCE_VFIO_PCI_CORE
endif
WARNINGS_AS_ERRORS ?=
@@ -183,7 +176,8 @@ NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \
NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(ccflags-y) -fno-pie
NV_CONFTEST_CFLAGS += $(filter -std=%,$(KBUILD_CFLAGS))
NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
NV_CONFTEST_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types,)

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2017-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -63,4 +63,13 @@ static inline void nv_timer_setup(struct nv_timer *nv_timer,
#endif
}
static inline void nv_timer_delete_sync(struct timer_list *timer)
{
#if !defined(NV_BSD) && NV_IS_EXPORT_SYMBOL_PRESENT_timer_delete_sync
timer_delete_sync(timer);
#else
del_timer_sync(timer);
#endif
}
#endif // __NV_TIMER_H__

View File

@@ -961,7 +961,6 @@ NvBool NV_API_CALL rm_isr (nvidia_stack_t *, nv_state_t *
void NV_API_CALL rm_isr_bh (nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_isr_bh_unlocked (nvidia_stack_t *, nv_state_t *);
NvBool NV_API_CALL rm_is_msix_allowed (nvidia_stack_t *, nv_state_t *);
NvBool NV_API_CALL rm_wait_for_bar_firewall (nvidia_stack_t *, NvU32 domain, NvU8 bus, NvU8 device, NvU8 function, NvU16 devId);
NV_STATUS NV_API_CALL rm_power_management (nvidia_stack_t *, nv_state_t *, nv_pm_action_t);
NV_STATUS NV_API_CALL rm_stop_user_channels (nvidia_stack_t *, nv_state_t *);
NV_STATUS NV_API_CALL rm_restart_user_channels (nvidia_stack_t *, nv_state_t *);

View File

@@ -3149,6 +3149,21 @@ compile_test() {
compile_check_conftest "$CODE" "NV_FOLL_LONGTERM_PRESENT" "" "types"
;;
has_enum_pidtype_tgid)
# Determine if PIDTYPE_TGID is present in the kernel as an enum
#
# Added by commit 6883f81aac6f ("pid: Implement PIDTYPE_TGID")
# in v4.19
#
CODE="
#include <linux/pid.h>
enum pid_type type = PIDTYPE_TGID;
"
compile_check_conftest "$CODE" "NV_HAS_ENUM_PIDTYPE_TGID" "" "types"
;;
vfio_pin_pages_has_vfio_device_arg)
#
# Determine if vfio_pin_pages() kABI accepts "struct vfio_device *"
@@ -7579,6 +7594,22 @@ compile_test() {
compile_check_conftest "$CODE" "NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA" "" "types"
;;
page_pgmap)
#
# Determine if the page_pgmap() function is present.
#
# Added by commit 82ba975e4c43 ("mm: allow compound zone device
# pages") in v6.14
#
CODE="
#include <linux/mmzone.h>
int conftest_page_pgmap(void) {
return page_pgmap(NULL);
}"
compile_check_conftest "$CODE" "NV_PAGE_PGMAP_PRESENT" "" "functions"
;;
folio_test_swapcache)
#
# Determine if the folio_test_swapcache() function is present.

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2016-2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -1132,7 +1132,7 @@ static void __nv_drm_semsurf_fence_ctx_destroy(
*/
nv_drm_workthread_shutdown(&ctx->worker);
nv_drm_del_timer_sync(&ctx->timer);
nv_timer_delete_sync(&ctx->timer.kernel_timer);
/*
* The semaphore surface could still be sending callbacks, so it is still

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2023, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -236,15 +236,6 @@ unsigned long nv_drm_timeout_from_ms(NvU64 relative_timeout_ms)
return jiffies + msecs_to_jiffies(relative_timeout_ms);
}
bool nv_drm_del_timer_sync(nv_drm_timer *timer)
{
if (del_timer_sync(&timer->kernel_timer)) {
return true;
} else {
return false;
}
}
#if defined(NV_DRM_FENCE_AVAILABLE)
int nv_drm_create_sync_file(nv_dma_fence_t *fence)
{

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -101,8 +101,6 @@ void nv_drm_timer_setup(nv_drm_timer *timer,
void nv_drm_mod_timer(nv_drm_timer *timer, unsigned long relative_timeout_ms);
bool nv_drm_del_timer_sync(nv_drm_timer *timer);
unsigned long nv_drm_timer_now(void);
unsigned long nv_drm_timeout_from_ms(NvU64 relative_timeout_ms);

View File

@@ -37,6 +37,7 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_dec_and_test
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_alpha_blending_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_fd_to_handle
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_handle_to_fd
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2015-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2015-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -53,6 +53,7 @@
#include "nv-procfs.h"
#include "nv-kthread-q.h"
#include "nv-time.h"
#include "nv-timer.h"
#include "nv-lock.h"
#include "nv-chardev-numbers.h"
@@ -748,7 +749,7 @@ static void nvkms_kthread_q_callback(void *arg)
* pending timers and than waiting for workqueue callbacks.
*/
if (timer->kernel_timer_created) {
del_timer_sync(&timer->kernel_timer);
nv_timer_delete_sync(&timer->kernel_timer);
}
/*
@@ -1932,7 +1933,11 @@ restart:
* completion, and we wait for queue completion with
* nv_kthread_q_stop below.
*/
#if !defined(NV_BSD) && NV_IS_EXPORT_SYMBOL_PRESENT_timer_delete_sync
if (timer_delete_sync(&timer->kernel_timer) == 1) {
#else
if (del_timer_sync(&timer->kernel_timer) == 1) {
#endif
/* We've deactivated timer so we need to clean after it */
list_del(&timer->timers_list);

View File

@@ -103,4 +103,5 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_register_backlight
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kernel_read_has_pointer_pos_arg

View File

@@ -62,6 +62,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += find_next_bit_wrap
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_is_dma_domain
NV_CONFTEST_FUNCTION_COMPILE_TESTS += for_each_sgtable_dma_page
NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache
NV_CONFTEST_FUNCTION_COMPILE_TESTS += page_pgmap
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_invalidate_range
@@ -80,3 +81,5 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += struct_page_has_zone_device_data
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present___iowrite64_lo_hi
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_make_device_exclusive

View File

@@ -139,7 +139,11 @@ static NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg)
static void smmu_vcmdq_write64(void __iomem *smmu_cmdqv_base, int reg, NvU64 val)
{
#if NV_IS_EXPORT_SYMBOL_PRESENT___iowrite64_lo_hi
__iowrite64_lo_hi(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
#else
iowrite64(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
#endif
}
// Fix for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU

View File

@@ -254,3 +254,31 @@ void uvm_hal_blackwell_host_tlb_invalidate_test(uvm_push_t *push,
HWVALUE(C96F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
}
}
uvm_access_counter_clear_op_t
uvm_hal_blackwell_access_counter_query_clear_op_gb100(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries)
{
if (parent_gpu->rm_info.accessCntrBufferCount > 1) {
NvU32 i;
for (i = 0; i < num_entries; i++) {
const uvm_access_counter_buffer_entry_t *entry = buffer_entries[i];
// The LSb identifies the die ID.
if ((entry->tag & 0x1) == 1)
return UVM_ACCESS_COUNTER_CLEAR_OP_ALL;
}
}
return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED;
}
uvm_access_counter_clear_op_t
uvm_hal_blackwell_access_counter_query_clear_op_gb20x(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries)
{
return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED;
}

View File

@@ -1194,6 +1194,8 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
uvm_sema_init(&parent_gpu->isr.replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
uvm_sema_init(&parent_gpu->isr.non_replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
uvm_mutex_init(&parent_gpu->access_counters_enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS);
uvm_mutex_init(&parent_gpu->access_counters_clear_tracker_lock, UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS);
uvm_tracker_init(&parent_gpu->access_counters_clear_tracker);
uvm_spin_lock_irqsave_init(&parent_gpu->isr.interrupts_lock, UVM_LOCK_ORDER_LEAF);
uvm_spin_lock_init(&parent_gpu->instance_ptr_table_lock, UVM_LOCK_ORDER_LEAF);
uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
@@ -1211,6 +1213,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
return NV_OK;
cleanup:
uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker);
uvm_kvfree(parent_gpu);
return status;
@@ -1656,19 +1659,12 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
// Sync the access counter clear tracker too.
if (parent_gpu->access_counters_supported && parent_gpu->access_counter_buffer) {
NvU32 notif_buf_index;
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[notif_buf_index];
uvm_mutex_lock(&parent_gpu->access_counters_clear_tracker_lock);
status = uvm_tracker_wait(&parent_gpu->access_counters_clear_tracker);
uvm_mutex_unlock(&parent_gpu->access_counters_clear_tracker_lock);
if (access_counters->rm_info.accessCntrBufferHandle != 0) {
uvm_access_counters_isr_lock(access_counters);
status = uvm_tracker_wait(&access_counters->clear_tracker);
uvm_access_counters_isr_unlock(access_counters);
if (status != NV_OK)
UVM_ASSERT(status == uvm_global_get_status());
}
}
if (status != NV_OK)
UVM_ASSERT(status == uvm_global_get_status());
}
}
@@ -1801,6 +1797,8 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)
for_each_sub_processor_index(sub_processor_index)
UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);
uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker);
uvm_kvfree(parent_gpu);
}
@@ -2895,6 +2893,10 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
if (status != NV_OK)
goto error_unregister;
// TODO: Bug 5262806: Remove this WAR once the bug is fixed.
if (gpu_info->accessCntrBufferCount > 1)
gpu_info->accessCntrBufferCount = 1;
if (parent_gpu != NULL) {
// If the UUID has been seen before, and if SMC is enabled, then check
// if this specific partition has been seen previously. The UUID-based

View File

@@ -522,10 +522,6 @@ struct uvm_access_counter_buffer_struct
// PCIe
NvU32 cached_put;
// Tracker used to aggregate access counters clear operations, needed for
// GPU removal
uvm_tracker_t clear_tracker;
// Current access counter configuration. During normal operation this
// information is computed once during GPU initialization. However, tests
// may override it to try different configuration values.
@@ -1213,6 +1209,11 @@ struct uvm_parent_gpu_struct
uvm_access_counter_buffer_t *access_counter_buffer;
uvm_mutex_t access_counters_enablement_lock;
// Tracker used to aggregate access counters clear operations, needed for
// GPU removal. It is only used when supports_access_counters is set.
uvm_tracker_t access_counters_clear_tracker;
uvm_mutex_t access_counters_clear_tracker_lock;
// Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
NvU32 utlb_per_gpc_count;

View File

@@ -217,38 +217,19 @@ static NV_STATUS config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY gran
return NV_OK;
}
// Clear the access counter notifications and add it to the per-GPU
// per-notification-buffer clear tracker.
static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
uvm_access_counter_buffer_t *access_counters,
uvm_access_counter_buffer_entry_t **notification_start,
NvU32 num_notifications)
static NV_STATUS parent_gpu_clear_tracker_wait(uvm_parent_gpu_t *parent_gpu)
{
NvU32 i;
NV_STATUS status;
uvm_push_t push;
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch");
if (status != NV_OK) {
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n",
nvstatusToString(status),
uvm_gpu_name(gpu),
access_counters->index);
return status;
}
uvm_mutex_lock(&parent_gpu->access_counters_clear_tracker_lock);
status = uvm_tracker_wait(&parent_gpu->access_counters_clear_tracker);
uvm_mutex_unlock(&parent_gpu->access_counters_clear_tracker_lock);
for (i = 0; i < num_notifications; i++)
gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]);
uvm_push_end(&push);
uvm_tracker_remove_completed(&access_counters->clear_tracker);
return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push);
return status;
}
// Clear all access counters and add the operation to the per-GPU
// per-notification-buffer clear tracker
// Clear all access counters and add the operation to the per-GPU clear
// tracker.
static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu, uvm_access_counter_buffer_t *access_counters)
{
NV_STATUS status;
@@ -270,8 +251,52 @@ static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu, uvm_access_counter_buf
uvm_push_end(&push);
uvm_tracker_remove_completed(&access_counters->clear_tracker);
return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push);
uvm_mutex_lock(&gpu->parent->access_counters_clear_tracker_lock);
uvm_tracker_remove_completed(&gpu->parent->access_counters_clear_tracker);
status = uvm_tracker_add_push_safe(&gpu->parent->access_counters_clear_tracker, &push);
uvm_mutex_unlock(&gpu->parent->access_counters_clear_tracker_lock);
return status;
}
// Clear the access counter notifications and add it to the per-GPU clear
// tracker.
static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
uvm_access_counter_buffer_t *access_counters,
uvm_access_counter_buffer_entry_t **notification_start,
NvU32 num_notifications)
{
NvU32 i;
NV_STATUS status;
uvm_push_t push;
uvm_access_counter_clear_op_t clear_op;
clear_op = gpu->parent->host_hal->access_counter_query_clear_op(gpu->parent, notification_start, num_notifications);
if (clear_op == UVM_ACCESS_COUNTER_CLEAR_OP_ALL)
return access_counter_clear_all(gpu, access_counters);
UVM_ASSERT(clear_op == UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED);
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch");
if (status != NV_OK) {
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n",
nvstatusToString(status),
uvm_gpu_name(gpu),
access_counters->index);
return status;
}
for (i = 0; i < num_notifications; i++)
gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]);
uvm_push_end(&push);
uvm_mutex_lock(&gpu->parent->access_counters_clear_tracker_lock);
uvm_tracker_remove_completed(&gpu->parent->access_counters_clear_tracker);
status = uvm_tracker_add_push_safe(&gpu->parent->access_counters_clear_tracker, &push);
uvm_mutex_unlock(&gpu->parent->access_counters_clear_tracker_lock);
return status;
}
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index)
@@ -374,8 +399,6 @@ NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU3
access_counters->notifications_ignored_count = 0;
access_counters->test.reconfiguration_owner = NULL;
uvm_tracker_init(&access_counters->clear_tracker);
access_counters->max_notifications = access_counters->rm_info.bufferSize /
parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu);
@@ -443,8 +466,6 @@ void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 n
UVM_ASSERT(status == NV_OK);
access_counters->rm_info.accessCntrBufferHandle = 0;
uvm_tracker_deinit(&access_counters->clear_tracker);
uvm_kvfree(batch_context->notification_cache);
uvm_kvfree(batch_context->notifications);
batch_context->notification_cache = NULL;
@@ -488,7 +509,7 @@ static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, NvU32 index, con
if (status != NV_OK)
goto error;
status = uvm_tracker_wait(&access_counters->clear_tracker);
status = parent_gpu_clear_tracker_wait(gpu->parent);
if (status != NV_OK)
goto error;
@@ -522,7 +543,7 @@ static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu, NvU32
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[index].service_lock));
// Wait for any pending clear operation before releasing ownership
status = uvm_tracker_wait(&access_counters->clear_tracker);
status = parent_gpu_clear_tracker_wait(parent_gpu);
if (status != NV_OK)
UVM_ASSERT(status == uvm_global_get_status());
@@ -1751,28 +1772,21 @@ NV_STATUS uvm_api_clear_all_access_counters(UVM_CLEAR_ALL_ACCESS_COUNTERS_PARAMS
uvm_va_space_up_read(va_space);
for_each_gpu_in_mask(gpu, retained_gpus) {
NvU32 notif_buf_index;
uvm_access_counter_buffer_t *access_counters;
if (!gpu->parent->access_counters_supported)
continue;
for (notif_buf_index = 0; notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount; notif_buf_index++) {
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
notif_buf_index);
uvm_access_counters_isr_lock(access_counters);
// clear_all affects all the notification buffers, we issue it for
// the notif_buf_index 0.
access_counters = parent_gpu_access_counter_buffer_get(gpu->parent, 0);
status = access_counter_clear_all(gpu, access_counters);
if (status == NV_OK)
status = parent_gpu_clear_tracker_wait(gpu->parent);
// Access counters are not enabled. Nothing to clear.
if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count) {
status = access_counter_clear_all(gpu, access_counters);
if (status == NV_OK)
status = uvm_tracker_wait(&access_counters->clear_tracker);
}
uvm_access_counters_isr_unlock(access_counters);
if (status != NV_OK)
break;
}
// Break the loop if clear_all failed in any of the retained gpus.
if (status != NV_OK)
break;
}
for_each_gpu_in_mask(gpu, retained_gpus)
@@ -2055,7 +2069,9 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
NV_STATUS status = NV_OK;
uvm_gpu_t *gpu = NULL;
uvm_va_space_t *va_space = uvm_va_space_get(filp);
uvm_access_counter_buffer_t *access_counters;
NvU32 notif_buf_index;
NvBool index0_state;
if (params->mode >= UVM_TEST_ACCESS_COUNTER_RESET_MODE_MAX)
return NV_ERR_INVALID_ARGUMENT;
@@ -2069,51 +2085,52 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
goto exit_release_gpu;
}
for (notif_buf_index = 0;
notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount && status == NV_OK;
notif_buf_index++) {
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
notif_buf_index);
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
uvm_access_counters_isr_lock(access_counters);
// Access counters not enabled. Nothing to reset
if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
goto exit_release_gpu;
}
// Access counters not enabled. Nothing to reset
if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0)
goto exit_isr_unlock;
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) {
status = access_counter_clear_all(gpu, access_counters);
}
else {
uvm_access_counter_buffer_entry_t entry = { 0 };
uvm_access_counter_buffer_entry_t *notification = &entry;
// Clear operations affect all notification buffers, we use the
// notif_buf_index = 0;
notif_buf_index = 0;
access_counters = parent_gpu_access_counter_buffer_get(gpu->parent, notif_buf_index);
entry.bank = params->bank;
entry.tag = params->tag;
uvm_access_counters_isr_lock(access_counters);
status = access_counter_clear_notifications(gpu, access_counters, &notification, 1);
}
// Recheck access counters are enabled.
index0_state = gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0;
if (index0_state) {
NvU32 i;
if (status == NV_OK)
status = uvm_tracker_wait(&access_counters->clear_tracker);
for (i = notif_buf_index + 1; i < gpu->parent->rm_info.accessCntrBufferCount; i++)
UVM_ASSERT((gpu->parent->isr.access_counters[i].handling_ref_count == 0) == index0_state);
goto exit_isr_unlock;
}
if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) {
status = access_counter_clear_all(gpu, access_counters);
}
else {
uvm_access_counter_buffer_entry_t entry = { 0 };
uvm_access_counter_buffer_entry_t *notification = &entry;
entry.bank = params->bank;
entry.tag = params->tag;
status = access_counter_clear_notifications(gpu, access_counters, &notification, 1);
}
if (status == NV_OK)
status = parent_gpu_clear_tracker_wait(gpu->parent);
exit_isr_unlock:
uvm_access_counters_isr_unlock(access_counters);
// We only need to clear_all() once.
if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) {
NvU32 i;
// Early exit of the main loop; since we only need to clear_all()
// once. Check that all the remaining notification buffers have
// access counters in same state.
NvBool index0_state = (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0);
for (i = notif_buf_index + 1; i < gpu->parent->rm_info.accessCntrBufferCount; i++)
UVM_ASSERT((gpu->parent->isr.access_counters[i].handling_ref_count == 0) == index0_state);
break;
}
}
uvm_access_counters_isr_unlock(access_counters);
exit_release_gpu:
uvm_gpu_release(gpu);

View File

@@ -218,6 +218,7 @@ static uvm_hal_class_ops_t host_table[] =
.clear_faulted_channel_register = uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported,
.access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
.access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
.access_counter_query_clear_op = uvm_hal_maxwell_access_counter_query_clear_op_unsupported,
.get_time = uvm_hal_maxwell_get_time,
}
},
@@ -269,6 +270,7 @@ static uvm_hal_class_ops_t host_table[] =
.tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test,
.access_counter_clear_all = uvm_hal_turing_access_counter_clear_all,
.access_counter_clear_targeted = uvm_hal_turing_access_counter_clear_targeted,
.access_counter_query_clear_op = uvm_hal_turing_access_counter_query_clear_op,
}
},
{
@@ -308,12 +310,15 @@ static uvm_hal_class_ops_t host_table[] =
.tlb_invalidate_all = uvm_hal_blackwell_host_tlb_invalidate_all,
.tlb_invalidate_va = uvm_hal_blackwell_host_tlb_invalidate_va,
.tlb_invalidate_test = uvm_hal_blackwell_host_tlb_invalidate_test,
.access_counter_query_clear_op = uvm_hal_blackwell_access_counter_query_clear_op_gb100,
}
},
{
.id = BLACKWELL_CHANNEL_GPFIFO_B,
.parent_id = BLACKWELL_CHANNEL_GPFIFO_A,
.u.host_ops = {}
.u.host_ops = {
.access_counter_query_clear_op = uvm_hal_blackwell_access_counter_query_clear_op_gb20x
}
},
};

View File

@@ -704,6 +704,10 @@ typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *pa
typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push);
typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
typedef uvm_access_counter_clear_op_t
(*uvm_hal_access_counter_query_clear_op_t)(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries);
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
@@ -720,6 +724,10 @@ NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gp
void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push);
void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
uvm_access_counter_clear_op_t
uvm_hal_maxwell_access_counter_query_clear_op_unsupported(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries);
void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
@@ -733,6 +741,18 @@ NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_g
void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push);
void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
uvm_access_counter_clear_op_t
uvm_hal_turing_access_counter_query_clear_op(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries);
uvm_access_counter_clear_op_t
uvm_hal_blackwell_access_counter_query_clear_op_gb100(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries);
uvm_access_counter_clear_op_t
uvm_hal_blackwell_access_counter_query_clear_op_gb20x(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries);
// The source and destination addresses must be 16-byte aligned. Note that the
// best performance is achieved with 256-byte alignment. The decrypt size must
@@ -786,6 +806,7 @@ struct uvm_host_hal_struct
uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register;
uvm_hal_access_counter_clear_all_t access_counter_clear_all;
uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted;
uvm_hal_access_counter_query_clear_op_t access_counter_query_clear_op;
uvm_hal_get_time_t get_time;
};

View File

@@ -471,6 +471,13 @@ static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)
return max(membar_1, membar_2);
}
typedef enum
{
UVM_ACCESS_COUNTER_CLEAR_OP_NONE = 0,
UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED,
UVM_ACCESS_COUNTER_CLEAR_OP_ALL
} uvm_access_counter_clear_op_t;
struct uvm_access_counter_buffer_entry_struct
{
// Address of the region for which a notification was sent

View File

@@ -1992,7 +1992,7 @@ static void fill_dst_pfn(uvm_va_block_t *va_block,
dpage = pfn_to_page(pfn);
UVM_ASSERT(is_device_private_page(dpage));
UVM_ASSERT(dpage->pgmap->owner == &g_uvm_global);
UVM_ASSERT(page_pgmap(dpage)->owner == &g_uvm_global);
hmm_mark_gpu_chunk_referenced(va_block, gpu, gpu_chunk);
UVM_ASSERT(!page_count(dpage));
@@ -2438,6 +2438,39 @@ static void hmm_release_atomic_pages(uvm_va_block_t *va_block,
}
}
static int hmm_make_device_exclusive_range(struct mm_struct *mm,
unsigned long start,
unsigned long end,
struct page **pages)
{
#if NV_IS_EXPORT_SYMBOL_PRESENT_make_device_exclusive
unsigned long addr;
int npages = 0;
for (addr = start; addr < end; addr += PAGE_SIZE) {
struct folio *folio;
struct page *page;
page = make_device_exclusive(mm, addr, &g_uvm_global, &folio);
if (IS_ERR(page)) {
while (npages) {
page = pages[--npages];
unlock_page(page);
put_page(page);
}
npages = PTR_ERR(page);
break;
}
pages[npages++] = page;
}
return npages;
#else
return make_device_exclusive_range(mm, start, end, pages, &g_uvm_global);
#endif
}
static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
uvm_va_block_t *va_block,
uvm_va_block_retry_t *va_block_retry,
@@ -2491,11 +2524,10 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
uvm_mutex_unlock(&va_block->lock);
npages = make_device_exclusive_range(service_context->block_context->mm,
npages = hmm_make_device_exclusive_range(service_context->block_context->mm,
uvm_va_block_cpu_page_address(va_block, region.first),
uvm_va_block_cpu_page_address(va_block, region.outer - 1) + PAGE_SIZE,
pages + region.first,
&g_uvm_global);
pages + region.first);
uvm_mutex_lock(&va_block->lock);

View File

@@ -427,4 +427,8 @@ static inline pgprot_t uvm_pgprot_decrypted(pgprot_t prot)
#define UVM_FOR_EACH_SGTABLE_DMA_PAGE_PRESENT() 1
#endif
#ifndef NV_PAGE_PGMAP_PRESENT
#define page_pgmap(page) (page)->pgmap
#endif
#endif // _UVM_LINUX_H

View File

@@ -27,7 +27,7 @@
const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
{
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 37);
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 38);
switch (lock_order) {
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
@@ -58,6 +58,7 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_PMA);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_ROOT_CHUNK);
UVM_ENUM_STRING_CASE(UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHANNEL);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_WLC_CHANNEL);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST);
@@ -352,15 +353,7 @@ bool __uvm_thread_check_all_unlocked(void)
NV_STATUS uvm_bit_locks_init(uvm_bit_locks_t *bit_locks, size_t count, uvm_lock_order_t lock_order)
{
// TODO: Bug 1772140: Notably bit locks currently do not work on memory
// allocated through vmalloc() (including big allocations created with
// uvm_kvmalloc()). The problem is the bit_waitqueue() helper used by the
// kernel internally that uses virt_to_page().
// To prevent us from using kmalloc() for a huge allocation, warn if the
// allocation size gets bigger than what we are comfortable with for
// kmalloc() in uvm_kvmalloc().
size_t size = sizeof(unsigned long) * BITS_TO_LONGS(count);
WARN_ON_ONCE(size > UVM_KMALLOC_THRESHOLD);
bit_locks->bits = kzalloc(size, NV_UVM_GFP_FLAGS);
if (!bit_locks->bits)

View File

@@ -432,6 +432,11 @@
// Order: UVM_LOCK_ORDER_PMM_ROOT_CHUNK
// Exclusive bitlock (mutex) per each root chunk internal to PMM.
//
// - Access counters clear operations
// Order: UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS
//
// It protects the parent_gpu's access counters clear tracker.
//
// - Channel lock
// Order: UVM_LOCK_ORDER_CHANNEL
// Spinlock (uvm_spinlock_t) or exclusive lock (mutex)
@@ -477,7 +482,7 @@
//
// CE semaphore payloads are encrypted, and require to take the CSL lock
// (UVM_LOCK_ORDER_LEAF) to decrypt the payload.
//
// - CSL Context
// Order: UVM_LOCK_ORDER_CSL_CTX
// When the Confidential Computing feature is enabled, encrypt/decrypt
@@ -523,6 +528,7 @@ typedef enum
UVM_LOCK_ORDER_PMM,
UVM_LOCK_ORDER_PMM_PMA,
UVM_LOCK_ORDER_PMM_ROOT_CHUNK,
UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS,
UVM_LOCK_ORDER_CHANNEL,
UVM_LOCK_ORDER_WLC_CHANNEL,
UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST,

View File

@@ -336,6 +336,15 @@ void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
UVM_ASSERT_MSG(false, "host access_counter_clear_targeted called on Maxwell GPU\n");
}
uvm_access_counter_clear_op_t
uvm_hal_maxwell_access_counter_query_clear_op_unsupported(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries)
{
UVM_ASSERT_MSG(false, "host access_counter_query_clear_op called on Maxwell GPU\n");
return UVM_ACCESS_COUNTER_CLEAR_OP_NONE;
}
NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu)
{
NvU32 time0;

View File

@@ -419,8 +419,9 @@ static void chunk_pin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
// The passed-in subchunk is not the root chunk so the root chunk has to be
// split.
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, "chunk state %s\n",
uvm_pmm_gpu_chunk_state_string(chunk->state));
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT,
"chunk state %s\n",
uvm_pmm_gpu_chunk_state_string(chunk->state));
chunk->suballoc->pinned_leaf_chunks++;
}
@@ -448,8 +449,9 @@ static void chunk_unpin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_pmm_gpu_
// The passed-in subchunk is not the root chunk so the root chunk has to be
// split.
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, "chunk state %s\n",
uvm_pmm_gpu_chunk_state_string(chunk->state));
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT,
"chunk state %s\n",
uvm_pmm_gpu_chunk_state_string(chunk->state));
UVM_ASSERT(chunk->suballoc->pinned_leaf_chunks != 0);
chunk->suballoc->pinned_leaf_chunks--;
@@ -774,8 +776,10 @@ static bool assert_chunk_mergeable(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
UVM_ASSERT(chunk->suballoc->allocated == 0);
}
else {
UVM_ASSERT_MSG(chunk->suballoc->allocated == num_subchunks(chunk), "%u != %u\n",
chunk->suballoc->allocated, num_subchunks(chunk));
UVM_ASSERT_MSG(chunk->suballoc->allocated == num_subchunks(chunk),
"%u != %u\n",
chunk->suballoc->allocated,
num_subchunks(chunk));
}
return true;
@@ -1263,11 +1267,13 @@ static NV_STATUS find_and_retain_va_block_to_evict(uvm_pmm_gpu_t *pmm, uvm_gpu_c
uvm_spin_lock(&pmm->list_lock);
// All free chunks should have been pinned already by pin_free_chunks_func().
// All free chunks should have been pinned already by
// pin_free_chunks_func().
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED ||
chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED ||
chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT,
"state %s\n", uvm_pmm_gpu_chunk_state_string(chunk->state));
"state %s\n",
uvm_pmm_gpu_chunk_state_string(chunk->state));
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
UVM_ASSERT(chunk->va_block);
@@ -1754,8 +1760,10 @@ static NV_STATUS alloc_chunk_with_splits(uvm_pmm_gpu_t *pmm,
UVM_ASSERT(chunk->parent->suballoc);
UVM_ASSERT(uvm_gpu_chunk_get_size(chunk->parent) == uvm_chunk_find_next_size(chunk_sizes, cur_size));
UVM_ASSERT(chunk->parent->type == type);
UVM_ASSERT_MSG(chunk->parent->suballoc->allocated <= num_subchunks(chunk->parent), "allocated %u num %u\n",
chunk->parent->suballoc->allocated, num_subchunks(chunk->parent));
UVM_ASSERT_MSG(chunk->parent->suballoc->allocated <= num_subchunks(chunk->parent),
"allocated %u num %u\n",
chunk->parent->suballoc->allocated,
num_subchunks(chunk->parent));
}
if (cur_size == chunk_size) {
@@ -2373,8 +2381,8 @@ static void free_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
try_free = is_root;
}
else {
// Freeing a chunk can only fail if it requires merging. Take the PMM lock
// and free it with merges supported.
// Freeing a chunk can only fail if it requires merging. Take the PMM
// lock and free it with merges supported.
uvm_mutex_lock(&pmm->lock);
free_chunk_with_merges(pmm, chunk);
uvm_mutex_unlock(&pmm->lock);
@@ -3333,7 +3341,7 @@ void uvm_pmm_gpu_device_p2p_init(uvm_gpu_t *gpu)
// TODO: Bug 4672502: [Linux Upstream][UVM] Allow drivers to manage and
// allocate PCI P2PDMA pages directly
p2p_page = pfn_to_page(pci_start_pfn);
p2p_page->pgmap->ops = &uvm_device_p2p_pgmap_ops;
page_pgmap(p2p_page)->ops = &uvm_device_p2p_pgmap_ops;
for (; page_to_pfn(p2p_page) < pci_end_pfn; p2p_page++)
p2p_page->zone_device_data = NULL;
@@ -3348,7 +3356,7 @@ void uvm_pmm_gpu_device_p2p_deinit(uvm_gpu_t *gpu)
if (gpu->device_p2p_initialised && !uvm_parent_gpu_is_coherent(gpu->parent)) {
p2p_page = pfn_to_page(pci_start_pfn);
devm_memunmap_pages(&gpu->parent->pci_dev->dev, p2p_page->pgmap);
devm_memunmap_pages(&gpu->parent->pci_dev->dev, page_pgmap(p2p_page));
}
gpu->device_p2p_initialised = false;
@@ -3437,6 +3445,7 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
for (i = 0; i < UVM_PMM_GPU_MEMORY_TYPE_COUNT; i++) {
pmm->chunk_sizes[i] = 0;
// Add the common root chunk size to all memory types
pmm->chunk_sizes[i] |= UVM_CHUNK_SIZE_MAX;
for (j = 0; j < ARRAY_SIZE(chunk_size_init); j++)
@@ -3444,7 +3453,9 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
UVM_ASSERT(pmm->chunk_sizes[i] < UVM_CHUNK_SIZE_INVALID);
UVM_ASSERT_MSG(hweight_long(pmm->chunk_sizes[i]) <= UVM_MAX_CHUNK_SIZES,
"chunk sizes %lu, max chunk sizes %u\n", hweight_long(pmm->chunk_sizes[i]), UVM_MAX_CHUNK_SIZES);
"chunk sizes %lu, max chunk sizes %u\n",
hweight_long(pmm->chunk_sizes[i]),
UVM_MAX_CHUNK_SIZES);
}
status = init_caches(pmm);
@@ -3452,9 +3463,9 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
goto cleanup;
// Assert that max physical address of the GPU is not unreasonably big for
// creating the flat array of root chunks. 256GB should provide a reasonable
// amount of future-proofing and results in 128K chunks which is still
// manageable.
// creating the flat array of root chunks. UVM_GPU_MAX_PHYS_MEM should
// provide a reasonable amount of future-proofing and results in 512K chunks
// which is still manageable.
UVM_ASSERT_MSG(gpu->mem_info.max_allocatable_address < UVM_GPU_MAX_PHYS_MEM,
"Max physical address 0x%llx exceeds limit of 0x%llx\n",
gpu->mem_info.max_allocatable_address,

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -382,3 +382,11 @@ void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push,
HWCONST(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_TYPE, MIMC) |
HWVALUE(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_BANK, buffer_entry->bank));
}
uvm_access_counter_clear_op_t
uvm_hal_turing_access_counter_query_clear_op(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t **buffer_entries,
NvU32 num_entries)
{
return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED;
}

View File

@@ -133,6 +133,7 @@ NV_STATUS NV_API_CALL nv_acpi_get_powersource(NvU32 *ac_plugged)
return NV_OK;
}
#define ACPI_POWER_SOURCE_BUS_CHANGE_EVENT 0x00
#define ACPI_POWER_SOURCE_CHANGE_EVENT 0x80
static void nv_acpi_powersource_hotplug_event(acpi_handle handle, u32 event_type, void *data)
{
@@ -143,7 +144,7 @@ static void nv_acpi_powersource_hotplug_event(acpi_handle handle, u32 event_type
nv_acpi_t *pNvAcpiObject = data;
u32 ac_plugged = 0;
if (event_type == ACPI_POWER_SOURCE_CHANGE_EVENT)
if (event_type == ACPI_POWER_SOURCE_CHANGE_EVENT || event_type == ACPI_POWER_SOURCE_BUS_CHANGE_EVENT)
{
if (nv_acpi_get_powersource(&ac_plugged) != NV_OK)
return;
@@ -201,8 +202,16 @@ static nv_acpi_t* nv_install_notifier(
pNvAcpiObject->sp = sp;
pNvAcpiObject->notifier_data = notifier_data;
status = acpi_install_notify_handler(handle, ACPI_DEVICE_NOTIFY,
handler, pNvAcpiObject);
if (handle == psr_device_handle)
{
status = acpi_install_notify_handler(handle, ACPI_ALL_NOTIFY,
handler, pNvAcpiObject);
}
else
{
status = acpi_install_notify_handler(handle, ACPI_DEVICE_NOTIFY,
handler, pNvAcpiObject);
}
if (!ACPI_FAILURE(status))
{
pNvAcpiObject->notify_handler_installed = 1;
@@ -225,7 +234,14 @@ static void nv_uninstall_notifier(nv_acpi_t *pNvAcpiObject, acpi_notify_handler
if (pNvAcpiObject && pNvAcpiObject->notify_handler_installed)
{
status = acpi_remove_notify_handler(pNvAcpiObject->handle, ACPI_DEVICE_NOTIFY, handler);
if (pNvAcpiObject->handle == psr_device_handle)
{
status = acpi_remove_notify_handler(pNvAcpiObject->handle, ACPI_ALL_NOTIFY, handler);
}
else
{
status = acpi_remove_notify_handler(pNvAcpiObject->handle, ACPI_DEVICE_NOTIFY, handler);
}
if (ACPI_FAILURE(status))
{
nv_printf(NV_DBG_INFO,

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -150,8 +150,13 @@ void NV_API_CALL nv_create_nano_timer(
nv_nstimer->nv_nano_timer_callback = nvidia_nano_timer_callback;
#if NV_NANO_TIMER_USE_HRTIMER
#if NV_IS_EXPORT_SYMBOL_PRESENT_hrtimer_setup
hrtimer_setup(&nv_nstimer->hr_timer, &nv_nano_timer_callback_typed_data,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
#else
hrtimer_init(&nv_nstimer->hr_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
nv_nstimer->hr_timer.function = nv_nano_timer_callback_typed_data;
#endif // NV_IS_EXPORT_SYMBOL_PRESENT_hrtimer_setup
#else
#if defined(NV_TIMER_SETUP_PRESENT)
timer_setup(&nv_nstimer->jiffy_timer, nv_jiffy_timer_callback_typed_data, 0);
@@ -203,7 +208,7 @@ void NV_API_CALL nv_cancel_nano_timer(
#if NV_NANO_TIMER_USE_HRTIMER
hrtimer_cancel(&nv_nstimer->hr_timer);
#else
del_timer_sync(&nv_nstimer->jiffy_timer);
nv_timer_delete_sync(&nv_nstimer->jiffy_timer);
#endif
}

View File

@@ -614,19 +614,6 @@ nv_pci_probe
}
#endif /* NV_PCI_SRIOV_SUPPORT */
if (!rm_wait_for_bar_firewall(
sp,
NV_PCI_DOMAIN_NUMBER(pci_dev),
NV_PCI_BUS_NUMBER(pci_dev),
NV_PCI_SLOT_NUMBER(pci_dev),
PCI_FUNC(pci_dev->devfn),
pci_dev->device))
{
nv_printf(NV_DBG_ERRORS,
"NVRM: failed to wait for bar firewall to lower\n");
goto failed;
}
if (!rm_is_supported_pci_device(
(pci_dev->class >> 16) & 0xFF,
(pci_dev->class >> 8) & 0xFF,

View File

@@ -4189,7 +4189,7 @@ int NV_API_CALL nv_stop_rc_timer(
nv_printf(NV_DBG_INFO, "NVRM: stopping rc timer\n");
nv->rc_timer_enabled = 0;
del_timer_sync(&nvl->rc_timer.kernel_timer);
nv_timer_delete_sync(&nvl->rc_timer.kernel_timer);
nv_printf(NV_DBG_INFO, "NVRM: rc timer stopped\n");
return 0;
@@ -4233,7 +4233,7 @@ void NV_API_CALL nv_stop_snapshot_timer(void)
NV_SPIN_UNLOCK_IRQRESTORE(&nvl->snapshot_timer_lock, flags);
if (timer_active)
del_timer_sync(&nvl->snapshot_timer.kernel_timer);
nv_timer_delete_sync(&nvl->snapshot_timer.kernel_timer);
}
void NV_API_CALL nv_flush_snapshot_timer(void)

View File

@@ -238,6 +238,8 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_set_memory_encrypted
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_set_memory_decrypted
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl___platform_driver_register
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present___platform_driver_register
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_hrtimer_setup
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_ops
NV_CONFTEST_TYPE_COMPILE_TESTS += swiotlb_dma_ops
@@ -266,6 +268,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += of_property_for_each_u32_has_internal_args
NV_CONFTEST_TYPE_COMPILE_TESTS += platform_driver_struct_remove_returns_void
NV_CONFTEST_TYPE_COMPILE_TESTS += class_create_has_no_owner_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += class_devnode_has_const_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += has_enum_pidtype_tgid
NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present
NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build

View File

@@ -2672,7 +2672,11 @@ NV_STATUS NV_API_CALL os_offline_page_at_address
void* NV_API_CALL os_get_pid_info(void)
{
return get_task_pid(current, PIDTYPE_PID);
#if defined(NV_HAS_ENUM_PIDTYPE_TGID)
return get_task_pid(current, PIDTYPE_TGID);
#else
return get_task_pid(current->group_leader, PIDTYPE_PID);
#endif
}
void NV_API_CALL os_put_pid_info(void *pid_info)