570.123.07

This commit is contained in:
russellcnv
2025-03-25 12:40:01 -07:00
parent 5e6ad2b575
commit 4d941c0b6e
146 changed files with 53927 additions and 54744 deletions

View File

@@ -59,7 +59,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_access_counter_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_access_counter_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_fault_buffer.c

View File

@@ -240,7 +240,7 @@ static void uvm_release_deferred(void *data)
// Since this function is only scheduled to run when uvm_release() fails
// to trylock-acquire the pm.lock, the following acquisition attempt
// is expected to block this thread, and cause it to remain blocked until
// uvm_resume() releases the lock. As a result, the deferred release
// uvm_resume() releases the lock. As a result, the deferred release
// kthread queue may stall for long periods of time.
uvm_down_read(&g_uvm_global.pm.lock);
@@ -292,14 +292,14 @@ static int uvm_release(struct inode *inode, struct file *filp)
// Because the kernel discards the status code returned from this release
// callback, early exit in case of a pm.lock acquisition failure is not
// an option. Instead, the teardown work normally performed synchronously
// an option. Instead, the teardown work normally performed synchronously
// needs to be scheduled to run after uvm_resume() releases the lock.
if (uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
uvm_va_space_destroy(va_space);
uvm_up_read(&g_uvm_global.pm.lock);
}
else {
// Remove references to this inode from the address_space. This isn't
// Remove references to this inode from the address_space. This isn't
// strictly necessary, as any CPU mappings of this file have already
// been destroyed, and va_space->mapping won't be used again. Still,
// the va_space survives the inode if its destruction is deferred, in
@@ -867,8 +867,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
}
// If the PM lock cannot be acquired, disable the VMA and report success
// to the caller. The caller is expected to determine whether the
// map operation succeeded via an ioctl() call. This is necessary to
// to the caller. The caller is expected to determine whether the
// map operation succeeded via an ioctl() call. This is necessary to
// safely handle MAP_FIXED, which needs to complete atomically to prevent
// the loss of the virtual address range.
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
@@ -1233,19 +1233,8 @@ static int uvm_init(void)
goto error;
}
pr_info("Loaded the UVM driver, major device number %d.\n", MAJOR(g_uvm_base_dev));
if (uvm_enable_builtin_tests)
pr_info("Built-in UVM tests are enabled. This is a security risk.\n");
// After Open RM is released, both the enclosing "#if" and this comment
// block should be removed, because the uvm_hmm_is_enabled_system_wide()
// check is both necessary and sufficient for reporting functionality.
// Until that time, however, we need to avoid advertisting UVM's ability to
// enable HMM functionality.
if (uvm_hmm_is_enabled_system_wide())
UVM_INFO_PRINT("HMM (Heterogeneous Memory Management) is enabled in the UVM driver.\n");
UVM_INFO_PRINT("Built-in UVM tests are enabled. This is a security risk.\n");
return 0;
@@ -1274,8 +1263,6 @@ static void uvm_exit(void)
uvm_global_exit();
uvm_test_unload_state_exit();
pr_info("Unloaded the UVM driver.\n");
}
static void __exit uvm_exit_entry(void)

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021-2024 NVIDIA Corporation
Copyright (c) 2021-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -38,12 +38,10 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_ada_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) *
8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Ada covers 128 TB and that's the minimum size
@@ -82,8 +80,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2018-2024 NVIDIA Corporation
Copyright (c) 2018-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -38,12 +38,10 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_ampere_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
(sizeof(dummy->fault_source.utlb_id) * 8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Ampere covers 128 TB and that's the minimum
@@ -86,8 +84,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2023 NVIDIA Corporation
Copyright (c) 2024-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -139,9 +139,9 @@ static void flush_tlb_va_region(uvm_gpu_va_space_t *gpu_va_space,
uvm_ats_fault_invalidate_t *ats_invalidate;
if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.replayable.ats_invalidate;
else
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.non_replayable.ats_invalidate;
if (!ats_invalidate->tlb_batch_pending) {
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->tlb_batch);

View File

@@ -38,12 +38,10 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_blackwell_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
(sizeof(dummy->fault_source.utlb_id) * 8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Blackwell covers 64 PB and that's the minimum
@@ -85,8 +83,6 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -110,16 +110,22 @@ typedef enum
bool uvm_channel_pool_is_p2p(uvm_channel_pool_t *pool)
{
uvm_channel_manager_t *manager = pool->manager;
uvm_gpu_t *gpu = manager->gpu;
uvm_gpu_id_t id;
if (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_GPU] == pool)
return true;
for_each_gpu_id_in_mask(id, &manager->gpu->peer_info.peer_gpu_mask) {
if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool)
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool) {
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
return true;
}
}
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
return false;
}
@@ -1974,6 +1980,7 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
{
uvm_channel_pool_t *pool;
NV_STATUS status = NV_OK;
uvm_gpu_t *gpu = channel_manager->gpu;
uvm_gpu_id_t gpu_id;
DECLARE_BITMAP(suspended_pools, UVM_COPY_ENGINE_COUNT_MAX);
@@ -1981,7 +1988,9 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
// Use bitmap to track which were suspended.
bitmap_zero(suspended_pools, channel_manager->num_channel_pools);
for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
if (pool && !test_bit(uvm_channel_pool_index_in_channel_manager(pool), suspended_pools)) {
status = channel_pool_suspend_p2p(pool);
@@ -2014,6 +2023,7 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
{
uvm_channel_pool_t *pool;
uvm_gpu_t *gpu = channel_manager->gpu;
uvm_gpu_id_t gpu_id;
DECLARE_BITMAP(resumed_pools, UVM_COPY_ENGINE_COUNT_MAX);
@@ -2021,7 +2031,9 @@ void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
// Use bitmap to track which were suspended.
bitmap_zero(resumed_pools, channel_manager->num_channel_pools);
for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
if (pool && !test_and_set_bit(uvm_channel_pool_index_in_channel_manager(pool), resumed_pools))
channel_pool_resume_p2p(pool);
@@ -3243,9 +3255,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
manager->conf.num_gpfifo_entries = UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT;
if (manager->conf.num_gpfifo_entries != uvm_channel_num_gpfifo_entries) {
pr_info("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
uvm_channel_num_gpfifo_entries,
manager->conf.num_gpfifo_entries);
UVM_INFO_PRINT("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
uvm_channel_num_gpfifo_entries,
manager->conf.num_gpfifo_entries);
}
// 2- Allocation locations
@@ -3285,9 +3297,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
pushbuffer_loc_value = uvm_channel_pushbuffer_loc;
if (!is_string_valid_location(pushbuffer_loc_value)) {
pushbuffer_loc_value = UVM_CHANNEL_PUSHBUFFER_LOC_DEFAULT;
pr_info("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
uvm_channel_pushbuffer_loc,
pushbuffer_loc_value);
UVM_INFO_PRINT("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
uvm_channel_pushbuffer_loc,
pushbuffer_loc_value);
}
// Override the default value if requested by the user
@@ -3297,8 +3309,8 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
// so force the location to sys for now.
// TODO: Bug 2904133: Remove the following "if" after the bug is fixed.
if (NVCPU_IS_AARCH64) {
pr_info("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
pushbuffer_loc_value);
UVM_INFO_PRINT("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
pushbuffer_loc_value);
manager->conf.pushbuffer_loc = UVM_BUFFER_LOCATION_SYS;
}
else {
@@ -3310,8 +3322,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
// Only support the knobs for GPFIFO/GPPut on Volta+
if (!gpu->parent->gpfifo_in_vidmem_supported) {
if (manager->conf.gpput_loc == UVM_BUFFER_LOCATION_SYS) {
pr_info("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s instead\n",
buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
UVM_INFO_PRINT("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s "
"instead\n",
buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
}
manager->conf.gpfifo_loc = UVM_BUFFER_LOCATION_DEFAULT;
@@ -3323,17 +3336,17 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
gpfifo_loc_value = uvm_channel_gpfifo_loc;
if (!is_string_valid_location(gpfifo_loc_value)) {
gpfifo_loc_value = UVM_CHANNEL_GPFIFO_LOC_DEFAULT;
pr_info("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
uvm_channel_gpfifo_loc,
gpfifo_loc_value);
UVM_INFO_PRINT("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
uvm_channel_gpfifo_loc,
gpfifo_loc_value);
}
gpput_loc_value = uvm_channel_gpput_loc;
if (!is_string_valid_location(gpput_loc_value)) {
gpput_loc_value = UVM_CHANNEL_GPPUT_LOC_DEFAULT;
pr_info("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
uvm_channel_gpput_loc,
gpput_loc_value);
UVM_INFO_PRINT("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
uvm_channel_gpput_loc,
gpput_loc_value);
}
// On coherent platforms where the GPU does not cache sysmem but the CPU

View File

@@ -57,6 +57,7 @@ enum {
// NULL.
void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
// Long prefix - typically for debugging and tests.
#define UVM_PRINT_FUNC_PREFIX(func, prefix, fmt, ...) \
func(prefix "%s:%u %s[pid:%d]" fmt, \
kbasename(__FILE__), \
@@ -65,10 +66,15 @@ void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
current->pid, \
##__VA_ARGS__)
// Short prefix - typically for information.
#define UVM_PRINT_FUNC_SHORT_PREFIX(func, prefix, fmt, ...) \
func(prefix fmt, ##__VA_ARGS__)
// No prefix - used by kernel panic messages.
#define UVM_PRINT_FUNC(func, fmt, ...) \
UVM_PRINT_FUNC_PREFIX(func, "", fmt, ##__VA_ARGS__)
// Check whether UVM_{ERR,DBG,INFO)_PRINT* should be enabled
// Check whether UVM_{ERR,DBG)_PRINT* should be enabled.
bool uvm_debug_prints_enabled(void);
// A printing helper like UVM_PRINT_FUNC_PREFIX that only prints if
@@ -80,10 +86,10 @@ bool uvm_debug_prints_enabled(void);
} \
} while (0)
#define UVM_ASSERT_PRINT(fmt, ...) \
#define UVM_ERR_PRINT_ALWAYS(fmt, ...) \
UVM_PRINT_FUNC_PREFIX(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_ASSERT_PRINT_RL(fmt, ...) \
#define UVM_ERR_PRINT_ALWAYS_RL(fmt, ...) \
UVM_PRINT_FUNC_PREFIX(printk_ratelimited, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_ERR_PRINT(fmt, ...) \
@@ -95,13 +101,16 @@ bool uvm_debug_prints_enabled(void);
#define UVM_DBG_PRINT(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_DBG_PRINT_RL(fmt, ...) \
#define UVM_DBG_PRINT_RL(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk_ratelimited, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
// UVM_INFO_PRINT prints in all modes (including in the release mode.) It is
// used for relaying driver-level information, rather than detailed debugging
// information; therefore, it does not add the "pretty long prefix".
#define UVM_INFO_PRINT(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
UVM_PRINT_FUNC_SHORT_PREFIX(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
UVM_ERR_PRINT("ERROR: %s : " msg "\n", nvstatusToString(rmStatus), ##__VA_ARGS__)
#define UVM_PANIC() UVM_PRINT_FUNC(panic, "\n")
@@ -134,13 +143,13 @@ void on_uvm_test_fail(void);
// Unlike on_uvm_test_fail it provides 'panic' coverity semantics
void on_uvm_assert(void);
#define _UVM_ASSERT_MSG(expr, cond, fmt, ...) \
do { \
if (unlikely(!(expr))) { \
UVM_ASSERT_PRINT("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
dump_stack(); \
on_uvm_assert(); \
} \
#define _UVM_ASSERT_MSG(expr, cond, fmt, ...) \
do { \
if (unlikely(!(expr))) { \
UVM_ERR_PRINT_ALWAYS("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
dump_stack(); \
on_uvm_assert(); \
} \
} while (0)
// Prevent function calls in expr and the print argument list from being
@@ -151,7 +160,8 @@ void on_uvm_assert(void);
UVM_NO_PRINT(fmt, ##__VA_ARGS__); \
} while (0)
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity builds
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity
// builds.
#if UVM_IS_DEBUG() || defined __COVERITY__
#define UVM_ASSERT_MSG(expr, fmt, ...) _UVM_ASSERT_MSG(expr, #expr, ": " fmt, ##__VA_ARGS__)
#define UVM_ASSERT(expr) _UVM_ASSERT_MSG(expr, #expr, "\n")
@@ -174,16 +184,16 @@ extern bool uvm_release_asserts_set_global_error_for_tests;
// Given these are enabled for release builds, we need to be more cautious than
// in UVM_ASSERT(). Use a ratelimited print and only dump the stack if a module
// param is enabled.
#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \
do { \
if (uvm_release_asserts && unlikely(!(expr))) { \
UVM_ASSERT_PRINT_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
uvm_global_set_fatal_error(NV_ERR_INVALID_STATE); \
if (uvm_release_asserts_dump_stack) \
dump_stack(); \
on_uvm_assert(); \
} \
#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \
do { \
if (uvm_release_asserts && unlikely(!(expr))) { \
UVM_ERR_PRINT_ALWAYS_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
uvm_global_set_fatal_error(NV_ERR_INVALID_STATE); \
if (uvm_release_asserts_dump_stack) \
dump_stack(); \
on_uvm_assert(); \
} \
} while (0)
#define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...) _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021-2024 NVIDIA Corporation
Copyright (c) 2021-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -532,7 +532,7 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
{
NV_STATUS status;
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;
// There is no dedicated lock for the CSL context associated with replayable
// faults. The mutual exclusion required by the RM CSL API is enforced by
@@ -571,7 +571,7 @@ void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status;
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;
// See comment in uvm_conf_computing_fault_decrypt
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -93,11 +93,11 @@ typedef struct uvm_service_block_context_struct uvm_service_block_context_t;
typedef struct uvm_ats_fault_invalidate_struct uvm_ats_fault_invalidate_t;
typedef struct uvm_replayable_fault_buffer_info_struct uvm_replayable_fault_buffer_info_t;
typedef struct uvm_non_replayable_fault_buffer_info_struct uvm_non_replayable_fault_buffer_info_t;
typedef struct uvm_replayable_fault_buffer_struct uvm_replayable_fault_buffer_t;
typedef struct uvm_non_replayable_fault_buffer_struct uvm_non_replayable_fault_buffer_t;
typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
typedef struct uvm_access_counter_buffer_struct uvm_access_counter_buffer_t;
typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
typedef struct uvm_pmm_sysmem_mappings_struct uvm_pmm_sysmem_mappings_t;
typedef struct uvm_reverse_map_struct uvm_reverse_map_t;

View File

@@ -194,6 +194,12 @@ NV_STATUS uvm_global_init(void)
goto error;
}
status = uvm_access_counters_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_access_counters_init failed: %s\n", nvstatusToString(status));
goto error;
}
// This sets up the ISR (interrupt service routine), by hooking into RM's
// top-half ISR callback. As soon as this call completes, GPU interrupts
// will start arriving, so it's important to be prepared to receive
@@ -224,8 +230,8 @@ void uvm_global_exit(void)
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
uvm_unregister_callbacks();
uvm_access_counters_exit();
uvm_service_block_context_exit();
uvm_perf_heuristics_exit();
uvm_perf_events_exit();
uvm_migrate_exit();
@@ -287,7 +293,7 @@ static NV_STATUS uvm_suspend(void)
// * Flush relevant kthread queues (bottom half, etc.)
// Some locks acquired by this function, such as pm.lock, are released
// by uvm_resume(). This is contrary to the lock tracking code's
// by uvm_resume(). This is contrary to the lock tracking code's
// expectations, so lock tracking is disabled.
uvm_thread_context_lock_disable_tracking();
@@ -304,7 +310,7 @@ static NV_STATUS uvm_suspend(void)
gpu = uvm_gpu_get(gpu_id);
// Since fault buffer state may be lost across sleep cycles, UVM must
// ensure any outstanding replayable faults are dismissed. The RM
// ensure any outstanding replayable faults are dismissed. The RM
// guarantees that all user channels have been preempted before
// uvm_suspend() is called, which implies that no user channels can be
// stalled on faults when this point is reached.
@@ -330,7 +336,7 @@ static NV_STATUS uvm_suspend(void)
}
// Acquire each VA space's lock in write mode to lock out VMA open and
// release callbacks. These entry points do not have feasible early exit
// release callbacks. These entry points do not have feasible early exit
// options, and so aren't suitable for synchronization with pm.lock.
uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
@@ -360,7 +366,7 @@ static NV_STATUS uvm_resume(void)
g_uvm_global.pm.is_suspended = false;
// Some locks released by this function, such as pm.lock, were acquired
// by uvm_suspend(). This is contrary to the lock tracking code's
// by uvm_suspend(). This is contrary to the lock tracking code's
// expectations, so lock tracking is disabled.
uvm_thread_context_lock_disable_tracking();
@@ -392,7 +398,7 @@ static NV_STATUS uvm_resume(void)
uvm_thread_context_lock_enable_tracking();
// Force completion of any release callbacks successfully queued for
// deferred completion while suspended. The deferred release
// deferred completion while suspended. The deferred release
// queue is not guaranteed to remain empty following this flush since
// some threads that failed to acquire pm.lock in uvm_release() may
// not have scheduled their handlers yet.
@@ -424,7 +430,8 @@ void uvm_global_set_fatal_error_impl(NV_STATUS error)
}
else {
UVM_ERR_PRINT("Encountered a global fatal error: %s after a global error has been already set: %s\n",
nvstatusToString(error), nvstatusToString(previous_error));
nvstatusToString(error),
nvstatusToString(previous_error));
}
nvUvmInterfaceReportFatalError(error);

View File

@@ -538,7 +538,9 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
NvU64 num_pages_in;
NvU64 num_pages_out;
NvU64 mapped_cpu_pages_size;
NvU32 get, put;
NvU32 get;
NvU32 put;
NvU32 i;
unsigned int cpu;
UVM_SEQ_OR_DBG_PRINT(s, "GPU %s\n", uvm_gpu_name(gpu));
@@ -608,19 +610,19 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
gpu->parent->isr.replayable_faults.stats.cpu_exec_count[cpu]);
}
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_buffer_entries %u\n",
gpu->parent->fault_buffer_info.replayable.max_faults);
gpu->parent->fault_buffer.replayable.max_faults);
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_cached_get %u\n",
gpu->parent->fault_buffer_info.replayable.cached_get);
gpu->parent->fault_buffer.replayable.cached_get);
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_cached_put %u\n",
gpu->parent->fault_buffer_info.replayable.cached_put);
gpu->parent->fault_buffer.replayable.cached_put);
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_get %u\n",
gpu->parent->fault_buffer_hal->read_get(gpu->parent));
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_put %u\n",
gpu->parent->fault_buffer_hal->read_put(gpu->parent));
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_fault_batch_size %u\n",
gpu->parent->fault_buffer_info.max_batch_size);
gpu->parent->fault_buffer.max_batch_size);
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_replay_policy %s\n",
uvm_perf_fault_replay_policy_string(gpu->parent->fault_buffer_info.replayable.replay_policy));
uvm_perf_fault_replay_policy_string(gpu->parent->fault_buffer.replayable.replay_policy));
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_num_faults %llu\n",
gpu->parent->stats.num_replayable_faults);
}
@@ -634,32 +636,35 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
gpu->parent->isr.non_replayable_faults.stats.cpu_exec_count[cpu]);
}
UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults_buffer_entries %u\n",
gpu->parent->fault_buffer_info.non_replayable.max_faults);
gpu->parent->fault_buffer.non_replayable.max_faults);
UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults_num_faults %llu\n",
gpu->parent->stats.num_non_replayable_faults);
}
if (gpu->parent->isr.access_counters.handling_ref_count > 0) {
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_bh %llu\n",
gpu->parent->isr.access_counters.stats.bottom_half_count);
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_bh/cpu\n");
for_each_cpu(cpu, &gpu->parent->isr.access_counters.stats.cpus_used_mask) {
UVM_SEQ_OR_DBG_PRINT(s, " cpu%02u %llu\n",
cpu,
gpu->parent->isr.access_counters.stats.cpu_exec_count[cpu]);
for (i = 0; i < gpu_info->accessCntrBufferCount; i++) {
if (gpu->parent->access_counters_supported && gpu->parent->isr.access_counters[i].handling_ref_count > 0) {
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_notif_buffer_index %u\n", i);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_bh %llu\n",
gpu->parent->isr.access_counters[i].stats.bottom_half_count);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_bh/cpu\n");
for_each_cpu(cpu, &gpu->parent->isr.access_counters[i].stats.cpus_used_mask) {
UVM_SEQ_OR_DBG_PRINT(s, " cpu%02u %llu\n",
cpu,
gpu->parent->isr.access_counters[i].stats.cpu_exec_count[cpu]);
}
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_buffer_entries %u\n",
gpu->parent->access_counter_buffer[i].max_notifications);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_cached_get %u\n",
gpu->parent->access_counter_buffer[i].cached_get);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_cached_put %u\n",
gpu->parent->access_counter_buffer[i].cached_put);
get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferGet);
put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferPut);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_get %u\n", get);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_put %u\n", put);
}
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_buffer_entries %u\n",
gpu->parent->access_counter_buffer_info.max_notifications);
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_cached_get %u\n",
gpu->parent->access_counter_buffer_info.cached_get);
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_cached_put %u\n",
gpu->parent->access_counter_buffer_info.cached_put);
get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer_info.rm_info.pAccessCntrBufferGet);
put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer_info.rm_info.pAccessCntrBufferPut);
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_get %u\n", get);
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_put %u\n", put);
}
num_pages_out = atomic64_read(&gpu->parent->stats.num_pages_out);
@@ -694,18 +699,18 @@ gpu_fault_stats_print_common(uvm_parent_gpu_t *parent_gpu, struct seq_file *s)
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults %llu\n", parent_gpu->stats.num_replayable_faults);
UVM_SEQ_OR_DBG_PRINT(s, "duplicates %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_duplicate_faults);
parent_gpu->fault_buffer.replayable.stats.num_duplicate_faults);
UVM_SEQ_OR_DBG_PRINT(s, "faults_by_access_type:\n");
UVM_SEQ_OR_DBG_PRINT(s, " prefetch %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_prefetch_faults);
parent_gpu->fault_buffer.replayable.stats.num_prefetch_faults);
UVM_SEQ_OR_DBG_PRINT(s, " read %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_read_faults);
parent_gpu->fault_buffer.replayable.stats.num_read_faults);
UVM_SEQ_OR_DBG_PRINT(s, " write %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_write_faults);
parent_gpu->fault_buffer.replayable.stats.num_write_faults);
UVM_SEQ_OR_DBG_PRINT(s, " atomic %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_atomic_faults);
num_pages_out = atomic64_read(&parent_gpu->fault_buffer_info.replayable.stats.num_pages_out);
num_pages_in = atomic64_read(&parent_gpu->fault_buffer_info.replayable.stats.num_pages_in);
parent_gpu->fault_buffer.replayable.stats.num_atomic_faults);
num_pages_out = atomic64_read(&parent_gpu->fault_buffer.replayable.stats.num_pages_out);
num_pages_in = atomic64_read(&parent_gpu->fault_buffer.replayable.stats.num_pages_in);
UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
@@ -713,25 +718,25 @@ gpu_fault_stats_print_common(uvm_parent_gpu_t *parent_gpu, struct seq_file *s)
(num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
UVM_SEQ_OR_DBG_PRINT(s, "replays:\n");
UVM_SEQ_OR_DBG_PRINT(s, " start %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_replays);
parent_gpu->fault_buffer.replayable.stats.num_replays);
UVM_SEQ_OR_DBG_PRINT(s, " start_ack_all %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_replays_ack_all);
parent_gpu->fault_buffer.replayable.stats.num_replays_ack_all);
UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults %llu\n", parent_gpu->stats.num_non_replayable_faults);
UVM_SEQ_OR_DBG_PRINT(s, "faults_by_access_type:\n");
UVM_SEQ_OR_DBG_PRINT(s, " read %llu\n",
parent_gpu->fault_buffer_info.non_replayable.stats.num_read_faults);
parent_gpu->fault_buffer.non_replayable.stats.num_read_faults);
UVM_SEQ_OR_DBG_PRINT(s, " write %llu\n",
parent_gpu->fault_buffer_info.non_replayable.stats.num_write_faults);
parent_gpu->fault_buffer.non_replayable.stats.num_write_faults);
UVM_SEQ_OR_DBG_PRINT(s, " atomic %llu\n",
parent_gpu->fault_buffer_info.non_replayable.stats.num_atomic_faults);
parent_gpu->fault_buffer.non_replayable.stats.num_atomic_faults);
UVM_SEQ_OR_DBG_PRINT(s, "faults_by_addressing:\n");
UVM_SEQ_OR_DBG_PRINT(s, " virtual %llu\n",
parent_gpu->stats.num_non_replayable_faults -
parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults);
parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults);
UVM_SEQ_OR_DBG_PRINT(s, " physical %llu\n",
parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults);
num_pages_out = atomic64_read(&parent_gpu->fault_buffer_info.non_replayable.stats.num_pages_out);
num_pages_in = atomic64_read(&parent_gpu->fault_buffer_info.non_replayable.stats.num_pages_in);
parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults);
num_pages_out = atomic64_read(&parent_gpu->fault_buffer.non_replayable.stats.num_pages_out);
num_pages_in = atomic64_read(&parent_gpu->fault_buffer.non_replayable.stats.num_pages_in);
UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
@@ -743,16 +748,25 @@ static void gpu_access_counters_print_common(uvm_parent_gpu_t *parent_gpu, struc
{
NvU64 num_pages_in;
NvU64 num_pages_out;
NvU32 i;
UVM_ASSERT(uvm_procfs_is_debug_enabled());
num_pages_out = atomic64_read(&parent_gpu->access_counter_buffer_info.stats.num_pages_out);
num_pages_in = atomic64_read(&parent_gpu->access_counter_buffer_info.stats.num_pages_in);
UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_out %llu (%llu MB)\n", num_pages_out,
(num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
// procfs_files are created before gpu_init_isr, we need to check if the
// access_counter_buffer is allocated.
if (parent_gpu->access_counter_buffer) {
for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++) {
uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[i];
num_pages_out = atomic64_read(&access_counters->stats.num_pages_out);
num_pages_in = atomic64_read(&access_counters->stats.num_pages_in);
UVM_SEQ_OR_DBG_PRINT(s, "migrations - buffer index %u:\n", i);
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_out %llu (%llu MB)\n", num_pages_out,
(num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
}
}
}
// This function converts an index of 2D array of size [N x N] into an index
@@ -892,7 +906,7 @@ static int nv_procfs_read_gpu_info(struct seq_file *s, void *v)
uvm_gpu_t *gpu = (uvm_gpu_t *)s->private;
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
return -EAGAIN;
return -EAGAIN;
gpu_info_print_common(gpu, s);
@@ -911,7 +925,7 @@ static int nv_procfs_read_gpu_fault_stats(struct seq_file *s, void *v)
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)s->private;
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
return -EAGAIN;
return -EAGAIN;
gpu_fault_stats_print_common(parent_gpu, s);
@@ -930,7 +944,7 @@ static int nv_procfs_read_gpu_access_counters(struct seq_file *s, void *v)
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)s->private;
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
return -EAGAIN;
return -EAGAIN;
gpu_access_counters_print_common(parent_gpu, s);
@@ -1182,7 +1196,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
uvm_uuid_copy(&parent_gpu->uuid, gpu_uuid);
uvm_sema_init(&parent_gpu->isr.replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
uvm_sema_init(&parent_gpu->isr.non_replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
uvm_sema_init(&parent_gpu->isr.access_counters.service_lock, 1, UVM_LOCK_ORDER_ISR);
uvm_mutex_init(&parent_gpu->access_counters_enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS);
uvm_spin_lock_irqsave_init(&parent_gpu->isr.interrupts_lock, UVM_LOCK_ORDER_LEAF);
uvm_spin_lock_init(&parent_gpu->instance_ptr_table_lock, UVM_LOCK_ORDER_LEAF);
uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
@@ -1221,7 +1235,7 @@ static uvm_gpu_t *alloc_gpu(uvm_parent_gpu_t *parent_gpu, uvm_gpu_id_t gpu_id)
// Initialize enough of the gpu struct for remove_gpu to be called
gpu->magic = UVM_GPU_MAGIC_VALUE;
uvm_spin_lock_init(&gpu->peer_info.peer_gpus_lock, UVM_LOCK_ORDER_LEAF);
uvm_spin_lock_init(&gpu->peer_info.peer_gpu_lock, UVM_LOCK_ORDER_LEAF);
sub_processor_index = uvm_id_sub_processor_index(gpu_id);
parent_gpu->gpus[sub_processor_index] = gpu;
@@ -1545,12 +1559,6 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
return status;
}
status = uvm_pmm_sysmem_mappings_init(gpu, &gpu->pmm_reverse_sysmem_mappings);
if (status != NV_OK) {
UVM_ERR_PRINT("CPU PMM MMIO initialization failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
return status;
}
uvm_pmm_gpu_device_p2p_init(gpu);
status = init_semaphore_pools(gpu);
@@ -1616,7 +1624,7 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
// trackers.
if (sync_replay_tracker) {
uvm_parent_gpu_replayable_faults_isr_lock(parent_gpu);
status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.replayable.replay_tracker);
status = uvm_tracker_wait(&parent_gpu->fault_buffer.replayable.replay_tracker);
uvm_parent_gpu_replayable_faults_isr_unlock(parent_gpu);
if (status != NV_OK)
@@ -1627,7 +1635,7 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
// VA block trackers, too.
if (sync_clear_faulted_tracker) {
uvm_parent_gpu_non_replayable_faults_isr_lock(parent_gpu);
status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.non_replayable.clear_faulted_tracker);
status = uvm_tracker_wait(&parent_gpu->fault_buffer.non_replayable.clear_faulted_tracker);
uvm_parent_gpu_non_replayable_faults_isr_unlock(parent_gpu);
if (status != NV_OK)
@@ -1635,13 +1643,20 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
}
// Sync the access counter clear tracker too.
if (parent_gpu->access_counters_supported) {
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
status = uvm_tracker_wait(&parent_gpu->access_counter_buffer_info.clear_tracker);
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
if (parent_gpu->access_counters_supported && parent_gpu->access_counter_buffer) {
NvU32 notif_buf_index;
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[notif_buf_index];
if (status != NV_OK)
UVM_ASSERT(status == uvm_global_get_status());
if (access_counters->rm_info.accessCntrBufferHandle != 0) {
uvm_access_counters_isr_lock(access_counters);
status = uvm_tracker_wait(&access_counters->clear_tracker);
uvm_access_counters_isr_unlock(access_counters);
if (status != NV_OK)
UVM_ASSERT(status == uvm_global_get_status());
}
}
}
}
@@ -1680,15 +1695,11 @@ static void deinit_parent_gpu(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->instance_ptr_table));
UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->tsg_table));
// Access counters should have been disabled when the GPU is no longer
// registered in any VA space.
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
deinit_parent_procfs_files(parent_gpu);
// Return ownership to RM
uvm_parent_gpu_deinit_isr(parent_gpu);
deinit_parent_procfs_files(parent_gpu);
uvm_pmm_devmem_deinit(parent_gpu);
uvm_ats_remove_gpu(parent_gpu);
@@ -1746,8 +1757,6 @@ static void deinit_gpu(uvm_gpu_t *gpu)
uvm_pmm_gpu_device_p2p_deinit(gpu);
uvm_pmm_sysmem_mappings_deinit(&gpu->pmm_reverse_sysmem_mappings);
uvm_pmm_gpu_deinit(&gpu->pmm);
if (gpu->rm_address_space != 0)
@@ -1794,14 +1803,14 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
switch (fault_entry->fault_access_type)
{
case UVM_FAULT_ACCESS_TYPE_READ:
++parent_gpu->fault_buffer_info.non_replayable.stats.num_read_faults;
++parent_gpu->fault_buffer.non_replayable.stats.num_read_faults;
break;
case UVM_FAULT_ACCESS_TYPE_WRITE:
++parent_gpu->fault_buffer_info.non_replayable.stats.num_write_faults;
++parent_gpu->fault_buffer.non_replayable.stats.num_write_faults;
break;
case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
++parent_gpu->fault_buffer_info.non_replayable.stats.num_atomic_faults;
++parent_gpu->fault_buffer.non_replayable.stats.num_atomic_faults;
break;
default:
UVM_ASSERT_MSG(false, "Invalid access type for non-replayable faults\n");
@@ -1809,7 +1818,7 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
}
if (!fault_entry->is_virtual)
++parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults;
++parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults;
++parent_gpu->stats.num_non_replayable_faults;
@@ -1821,23 +1830,23 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
switch (fault_entry->fault_access_type)
{
case UVM_FAULT_ACCESS_TYPE_PREFETCH:
++parent_gpu->fault_buffer_info.replayable.stats.num_prefetch_faults;
++parent_gpu->fault_buffer.replayable.stats.num_prefetch_faults;
break;
case UVM_FAULT_ACCESS_TYPE_READ:
++parent_gpu->fault_buffer_info.replayable.stats.num_read_faults;
++parent_gpu->fault_buffer.replayable.stats.num_read_faults;
break;
case UVM_FAULT_ACCESS_TYPE_WRITE:
++parent_gpu->fault_buffer_info.replayable.stats.num_write_faults;
++parent_gpu->fault_buffer.replayable.stats.num_write_faults;
break;
case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
++parent_gpu->fault_buffer_info.replayable.stats.num_atomic_faults;
++parent_gpu->fault_buffer.replayable.stats.num_atomic_faults;
break;
default:
break;
}
if (is_duplicate || fault_entry->filtered)
++parent_gpu->fault_buffer_info.replayable.stats.num_duplicate_faults;
++parent_gpu->fault_buffer.replayable.stats.num_duplicate_faults;
++parent_gpu->stats.num_replayable_faults;
}
@@ -1901,21 +1910,29 @@ static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_
if (gpu_dst) {
atomic64_add(pages, &gpu_dst->parent->stats.num_pages_in);
if (is_replayable_fault)
atomic64_add(pages, &gpu_dst->parent->fault_buffer_info.replayable.stats.num_pages_in);
else if (is_non_replayable_fault)
atomic64_add(pages, &gpu_dst->parent->fault_buffer_info.non_replayable.stats.num_pages_in);
else if (is_access_counter)
atomic64_add(pages, &gpu_dst->parent->access_counter_buffer_info.stats.num_pages_in);
if (is_replayable_fault) {
atomic64_add(pages, &gpu_dst->parent->fault_buffer.replayable.stats.num_pages_in);
}
else if (is_non_replayable_fault) {
atomic64_add(pages, &gpu_dst->parent->fault_buffer.non_replayable.stats.num_pages_in);
}
else if (is_access_counter) {
NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
atomic64_add(pages, &gpu_dst->parent->access_counter_buffer[index].stats.num_pages_in);
}
}
if (gpu_src) {
atomic64_add(pages, &gpu_src->parent->stats.num_pages_out);
if (is_replayable_fault)
atomic64_add(pages, &gpu_src->parent->fault_buffer_info.replayable.stats.num_pages_out);
else if (is_non_replayable_fault)
atomic64_add(pages, &gpu_src->parent->fault_buffer_info.non_replayable.stats.num_pages_out);
else if (is_access_counter)
atomic64_add(pages, &gpu_src->parent->access_counter_buffer_info.stats.num_pages_out);
if (is_replayable_fault) {
atomic64_add(pages, &gpu_src->parent->fault_buffer.replayable.stats.num_pages_out);
}
else if (is_non_replayable_fault) {
atomic64_add(pages, &gpu_src->parent->fault_buffer.non_replayable.stats.num_pages_out);
}
else if (is_access_counter) {
NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
atomic64_add(pages, &gpu_src->parent->access_counter_buffer[index].stats.num_pages_out);
}
}
}
@@ -1929,8 +1946,9 @@ static void uvm_param_conf(void)
}
else {
if (strcmp(uvm_peer_copy, UVM_PARAM_PEER_COPY_PHYSICAL) != 0) {
pr_info("Invalid value for uvm_peer_copy = %s, using %s instead.\n",
uvm_peer_copy, UVM_PARAM_PEER_COPY_PHYSICAL);
UVM_INFO_PRINT("Invalid value for uvm_peer_copy = %s, using %s instead.\n",
uvm_peer_copy,
UVM_PARAM_PEER_COPY_PHYSICAL);
}
g_uvm_global.peer_copy_mode = UVM_GPU_PEER_COPY_MODE_PHYSICAL;
@@ -2397,6 +2415,7 @@ static NV_STATUS peers_init(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *pe
{
NV_STATUS status;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
UVM_ASSERT(peer_caps->ref_count == 0);
status = parent_peers_retain(gpu0->parent, gpu1->parent);
@@ -2419,25 +2438,13 @@ static NV_STATUS peers_init(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *pe
UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);
// In the case of NVLINK peers, this initialization will happen during
// add_gpu. As soon as the peer info table is assigned below, the access
// counter bottom half could start operating on the GPU being newly
// added and inspecting the peer caps, so all of the appropriate
// initialization must happen before this point.
uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
uvm_spin_lock(&gpu0->peer_info.peer_gpu_lock);
uvm_processor_mask_set(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
UVM_ASSERT(gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] == NULL);
gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = gpu1;
uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
uvm_spin_unlock(&gpu0->peer_info.peer_gpu_lock);
uvm_spin_lock(&gpu1->peer_info.peer_gpu_lock);
uvm_processor_mask_set(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
UVM_ASSERT(gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] == NULL);
gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = gpu0;
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
uvm_spin_unlock(&gpu1->peer_info.peer_gpu_lock);
return NV_OK;
@@ -2465,18 +2472,18 @@ static NV_STATUS peers_retain(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
static void peers_destroy(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *peer_caps)
{
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
uvm_mmu_destroy_peer_identity_mappings(gpu0, gpu1);
uvm_mmu_destroy_peer_identity_mappings(gpu1, gpu0);
uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
uvm_spin_lock(&gpu0->peer_info.peer_gpu_lock);
uvm_processor_mask_clear(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = NULL;
uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
uvm_spin_unlock(&gpu0->peer_info.peer_gpu_lock);
uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
uvm_spin_lock(&gpu1->peer_info.peer_gpu_lock);
uvm_processor_mask_clear(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = NULL;
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
uvm_spin_unlock(&gpu1->peer_info.peer_gpu_lock);
// Flush the access counter buffer to avoid getting stale notifications for
// accesses to GPUs to which peer access is being disabled. This is also
@@ -2690,7 +2697,7 @@ static void remove_gpu(uvm_gpu_t *gpu)
uvm_processor_mask_clear(&g_uvm_global.retained_gpus, gpu->id);
// If the parent is being freed, stop scheduling new bottom halves and
// update relevant software state. Else flush any pending bottom halves
// update relevant software state. Else flush any pending bottom halves
// before continuing.
if (free_parent)
uvm_parent_gpu_disable_isr(parent_gpu);
@@ -2713,6 +2720,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
const UvmGpuInfo *gpu_info,
const UvmGpuPlatformInfo *gpu_platform_info,
uvm_parent_gpu_t *parent_gpu,
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
uvm_gpu_t **gpu_out)
{
NV_STATUS status;
@@ -2725,6 +2733,9 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
status = alloc_parent_gpu(gpu_uuid, uvm_parent_gpu_id_from_gpu_id(gpu_id), &parent_gpu);
if (status != NV_OK)
return status;
if (uvm_enable_builtin_tests)
parent_gpu->test = *parent_gpu_error;
}
gpu = alloc_gpu(parent_gpu, gpu_id);
@@ -2794,7 +2805,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
// Clear the interrupt bit and force the re-evaluation of the interrupt
// condition to ensure that we don't miss any pending interrupt
parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
parent_gpu->fault_buffer_info.replayable.cached_get);
parent_gpu->fault_buffer.replayable.cached_get);
}
// Access counters are enabled on demand
@@ -2837,6 +2848,7 @@ error:
// the partition.
static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
const uvm_rm_user_object_t *user_rm_device,
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
uvm_gpu_t **gpu_out)
{
NV_STATUS status = NV_OK;
@@ -2888,7 +2900,7 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
if (status != NV_OK)
goto error_unregister;
status = add_gpu(gpu_uuid, gpu_id, gpu_info, &gpu_platform_info, parent_gpu, &gpu);
status = add_gpu(gpu_uuid, gpu_id, gpu_info, &gpu_platform_info, parent_gpu, parent_gpu_error, &gpu);
if (status != NV_OK)
goto error_unregister;
}
@@ -2913,11 +2925,12 @@ error_free_gpu_info:
NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
const uvm_rm_user_object_t *user_rm_device,
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
uvm_gpu_t **gpu_out)
{
NV_STATUS status;
uvm_mutex_lock(&g_uvm_global.global_lock);
status = gpu_retain_by_uuid_locked(gpu_uuid, user_rm_device, gpu_out);
status = gpu_retain_by_uuid_locked(gpu_uuid, user_rm_device, parent_gpu_error, gpu_out);
uvm_mutex_unlock(&g_uvm_global.global_lock);
return status;
}
@@ -3072,60 +3085,63 @@ bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
return (address.address >= gpu->parent->peer_va_base &&
address.address < (gpu->parent->peer_va_base + gpu->parent->peer_va_size));
}
} else {
}
else {
uvm_parent_gpu_t *parent_gpu;
phys_addr_t phys_addr;
if (uvm_aperture_is_peer(address.aperture)) {
bool is_peer = true;
uvm_parent_processor_mask_t parent_gpus;
uvm_parent_gpu_t *parent_peer_gpu;
// Local EGM accesses don't go over NVLINK
if (gpu->parent->egm.enabled && address.aperture == gpu->parent->egm.local_peer_id)
return false;
// EGM uses peer IDs but they are different from VIDMEM peer IDs.
// Check if the address aperture is an EGM aperture.
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
uvm_parent_gpus_from_processor_mask(&parent_gpus, &gpu->peer_info.peer_gpu_mask);
uvm_spin_lock(&gpu->peer_info.peer_gpus_lock);
for_each_parent_gpu_in_mask(parent_peer_gpu, &parent_gpus) {
uvm_aperture_t egm_peer_aperture;
if (!parent_peer_gpu->egm.enabled)
continue;
egm_peer_aperture = uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu);
if (address.aperture == egm_peer_aperture) {
is_peer = false;
break;
}
// EGM uses peer IDs but they are different from VIDMEM peer
// IDs.
// Check if the address aperture is an EGM aperture.
// We should not use remote EGM addresses internally until
// NVLINK STO handling is updated to handle EGM.
// TODO: Bug: 5068688 [UVM] Detect STO and prevent data leaks
// when accessing EGM memory
// TODO: Bug: 5007527 [UVM] Extend STO recovery to EGM enabled
// systems
UVM_ASSERT(address.aperture != uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu));
}
uvm_spin_unlock(&gpu->peer_info.peer_gpus_lock);
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
return true;
} else if (address.aperture == UVM_APERTURE_SYS) {
bool is_peer = false;
// GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
// either inline, or via ATS.
phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
// Exposed coherent vidmem can be accessed via sys aperture
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
for_each_parent_gpu(parent_gpu) {
if (parent_gpu == gpu->parent)
continue;
if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
phys_addr <= parent_gpu->system_bus.memory_window_end) {
is_peer = true;
}
}
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
return is_peer;
}
if (address.aperture != UVM_APERTURE_SYS)
return false;
// GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
// either inline, or via ATS.
phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
// Exposed coherent vidmem can be accessed via sys aperture
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
for_each_parent_gpu(parent_gpu) {
if (parent_gpu == gpu->parent)
continue;
if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
phys_addr <= parent_gpu->system_bus.memory_window_end) {
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
return true;
}
}
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
UVM_ASSERT(address.aperture == UVM_APERTURE_VID);
}
return false;
@@ -3141,49 +3157,6 @@ uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu)
return UVM_APERTURE_DEFAULT;
}
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr)
{
uvm_processor_id_t id = UVM_ID_INVALID;
// TODO: Bug 1899622: On P9 systems with multiple CPU sockets, SYS aperture
// is also reported for accesses to remote GPUs connected to a different CPU
// NUMA domain. We will need to determine the actual processor id using the
// reported physical address.
if (addr.aperture == UVM_APERTURE_SYS)
return UVM_ID_CPU;
else if (addr.aperture == UVM_APERTURE_VID)
return gpu->id;
uvm_spin_lock(&gpu->peer_info.peer_gpus_lock);
for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
uvm_gpu_t *other_gpu = gpu->peer_info.peer_gpus[uvm_id_gpu_index(id)];
UVM_ASSERT(other_gpu);
UVM_ASSERT(!uvm_gpus_are_smc_peers(gpu, other_gpu));
if (uvm_parent_gpus_are_nvswitch_connected(gpu->parent, other_gpu->parent)) {
// NVSWITCH connected systems use an extended physical address to
// map to peers. Find the physical memory 'slot' containing the
// given physical address to find the peer gpu that owns the
// physical address
NvU64 fabric_window_end = other_gpu->parent->nvswitch_info.fabric_memory_window_start +
other_gpu->mem_info.max_allocatable_address;
if (other_gpu->parent->nvswitch_info.fabric_memory_window_start <= addr.address &&
fabric_window_end >= addr.address)
break;
}
else if (uvm_gpu_peer_aperture(gpu, other_gpu) == addr.aperture) {
break;
}
}
uvm_spin_unlock(&gpu->peer_info.peer_gpus_lock);
return id;
}
static NvU64 instance_ptr_to_key(uvm_gpu_phys_address_t instance_ptr)
{
NvU64 key;
@@ -3570,20 +3543,19 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare
*out_va_space = NULL;
*out_gpu = NULL;
UVM_ASSERT(entry->address.is_virtual);
uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);
user_channel = instance_ptr_to_user_channel(parent_gpu, entry->virtual_info.instance_ptr);
user_channel = instance_ptr_to_user_channel(parent_gpu, entry->instance_ptr);
if (!user_channel) {
status = NV_ERR_INVALID_CHANNEL;
goto exit_unlock;
}
if (!user_channel->in_subctx) {
UVM_ASSERT_MSG(entry->virtual_info.ve_id == 0,
UVM_ASSERT_MSG(entry->ve_id == 0,
"Access counter packet contains SubCTX %u for channel not in subctx\n",
entry->virtual_info.ve_id);
entry->ve_id);
gpu_va_space = user_channel->gpu_va_space;
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
@@ -3591,7 +3563,7 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare
*out_gpu = gpu_va_space->gpu;
}
else {
gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->virtual_info.ve_id);
gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->ve_id);
if (gpu_va_space) {
*out_va_space = gpu_va_space->va_space;
*out_gpu = gpu_va_space->gpu;

View File

@@ -189,6 +189,9 @@ struct uvm_service_block_context_struct
// Prefetch temporary state.
uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
// Access counters notification buffer index.
NvU32 access_counters_buffer_index;
};
typedef struct
@@ -197,8 +200,8 @@ typedef struct
{
struct
{
// Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned region
// of a SAM VMA. Used for batching ATS faults in a vma.
// Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned
// region of a SAM VMA. Used for batching ATS faults in a vma.
uvm_page_mask_t prefetch_only_fault_mask;
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region
@@ -350,7 +353,7 @@ typedef struct
// entries from the GPU buffer
NvU32 max_batch_size;
struct uvm_replayable_fault_buffer_info_struct
struct uvm_replayable_fault_buffer_struct
{
// Maximum number of faults entries that can be stored in the buffer
NvU32 max_faults;
@@ -414,7 +417,7 @@ typedef struct
uvm_ats_fault_invalidate_t ats_invalidate;
} replayable;
struct uvm_non_replayable_fault_buffer_info_struct
struct uvm_non_replayable_fault_buffer_struct
{
// Maximum number of faults entries that can be stored in the buffer
NvU32 max_faults;
@@ -468,7 +471,7 @@ typedef struct
// Timestamp when prefetch faults where disabled last time
NvU64 disable_prefetch_faults_timestamp;
} uvm_fault_buffer_info_t;
} uvm_fault_buffer_t;
struct uvm_access_counter_service_batch_context_struct
{
@@ -476,30 +479,14 @@ struct uvm_access_counter_service_batch_context_struct
NvU32 num_cached_notifications;
struct
{
uvm_access_counter_buffer_entry_t **notifications;
uvm_access_counter_buffer_entry_t **notifications;
NvU32 num_notifications;
NvU32 num_notifications;
// Boolean used to avoid sorting the fault batch by instance_ptr if we
// determine at fetch time that all the access counter notifications in
// the batch report the same instance_ptr
bool is_single_instance_ptr;
} virt;
struct
{
uvm_access_counter_buffer_entry_t **notifications;
uvm_reverse_map_t *translations;
NvU32 num_notifications;
// Boolean used to avoid sorting the fault batch by aperture if we
// determine at fetch time that all the access counter notifications in
// the batch report the same aperture
bool is_single_aperture;
} phys;
// Boolean used to avoid sorting the fault batch by instance_ptr if we
// determine at fetch time that all the access counter notifications in
// the batch report the same instance_ptr
bool is_single_instance_ptr;
// Helper page mask to compute the accessed pages within a VA block
uvm_page_mask_t accessed_pages;
@@ -514,31 +501,15 @@ struct uvm_access_counter_service_batch_context_struct
NvU32 batch_id;
};
typedef struct
struct uvm_access_counter_buffer_struct
{
// Values used to configure access counters in RM
struct
{
UVM_ACCESS_COUNTER_GRANULARITY granularity;
UVM_ACCESS_COUNTER_USE_LIMIT use_limit;
} rm;
uvm_parent_gpu_t *parent_gpu;
// The following values are precomputed by the access counter notification
// handling code. See comments for UVM_MAX_TRANSLATION_SIZE in
// uvm_gpu_access_counters.c for more details.
NvU64 translation_size;
NvU64 translations_per_counter;
NvU64 sub_granularity_region_size;
NvU64 sub_granularity_regions_per_translation;
} uvm_gpu_access_counter_type_config_t;
typedef struct
{
UvmGpuAccessCntrInfo rm_info;
// Access counters may have multiple notification buffers.
NvU32 index;
NvU32 max_notifications;
NvU32 max_batch_size;
@@ -560,10 +531,22 @@ typedef struct
// may override it to try different configuration values.
struct
{
uvm_gpu_access_counter_type_config_t mimc;
uvm_gpu_access_counter_type_config_t momc;
// Values used to configure access counters in RM
struct
{
UVM_ACCESS_COUNTER_GRANULARITY granularity;
} rm;
NvU32 threshold;
// The following values are precomputed by the access counter
// notification handling code. See comments for UVM_MAX_TRANSLATION_SIZE
// in uvm_gpu_access_counters.c for more details.
NvU64 translation_size;
NvU64 sub_granularity_region_size;
NvU64 sub_granularity_regions_per_translation;
NvU32 threshold;
} current_config;
// Access counter statistics
@@ -575,7 +558,7 @@ typedef struct
} stats;
// Ignoring access counters means that notifications are left in the HW
// buffer without being serviced. Requests to ignore access counters
// buffer without being serviced. Requests to ignore access counters
// are counted since the suspend path inhibits access counter interrupts,
// and the resume path needs to know whether to reenable them.
NvU32 notifications_ignored_count;
@@ -583,13 +566,25 @@ typedef struct
// Context structure used to service a GPU access counter batch
uvm_access_counter_service_batch_context_t batch_service_context;
// VA space that reconfigured the access counters configuration, if any.
// Used in builtin tests only, to avoid reconfigurations from different
// processes
//
// Locking: both readers and writers must hold the access counters ISR lock
uvm_va_space_t *reconfiguration_owner;
} uvm_access_counter_buffer_info_t;
struct
{
// VA space that reconfigured the access counters configuration, if any.
// Used in builtin tests only, to avoid reconfigurations from different
// processes.
//
// Locking: both readers and writers must hold the access counters ISR
// lock.
uvm_va_space_t *reconfiguration_owner;
// The service access counters loop breaks after processing the first
// batch. It will be retriggered if there are pending notifications, but
// it releases the ISR service lock to check certain races that would be
// difficult to hit otherwise.
bool one_iteration_per_batch;
NvU32 sleep_per_iteration_us;
} test;
};
typedef struct
{
@@ -745,15 +740,11 @@ struct uvm_gpu_struct
struct
{
// Mask of peer_gpus set
// Mask of peer_gpus set.
uvm_processor_mask_t peer_gpu_mask;
// lazily-populated array of peer GPUs, indexed by the peer's GPU index
uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];
// Leaf spinlock used to synchronize access to the peer_gpus table so
// that it can be safely accessed from the access counters bottom half
uvm_spinlock_t peer_gpus_lock;
// Leaf spinlock used to synchronize access to peer_gpu_mask.
uvm_spinlock_t peer_gpu_lock;
} peer_info;
// Maximum number of subcontexts supported
@@ -828,14 +819,6 @@ struct uvm_gpu_struct
uvm_bit_locks_t bitlocks;
} sysmem_mappings;
// Reverse lookup table used to query the user mapping associated with a
// sysmem (DMA) physical address.
//
// The system memory mapping information referred to by this field is
// different from that of sysmem_mappings, because it relates to user
// mappings (instead of kernel), and it is used in most configurations.
uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings;
struct
{
uvm_conf_computing_dma_buffer_pool_t dma_buffer_pool;
@@ -957,6 +940,16 @@ struct uvm_gpu_struct
uvm_mutex_t device_p2p_lock;
};
typedef struct
{
bool access_counters_alloc_buffer;
bool access_counters_alloc_block_context;
bool isr_access_counters_alloc;
bool isr_access_counters_alloc_stats_cpu;
bool access_counters_batch_context_notifications;
bool access_counters_batch_context_notification_cache;
} uvm_test_parent_gpu_inject_error_t;
// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
@@ -965,8 +958,8 @@ struct uvm_gpu_struct
struct uvm_parent_gpu_struct
{
// Reference count for how many places are holding on to a parent GPU
// (internal to the UVM driver). This includes any GPUs we know about, not
// just GPUs that are registered with a VA space. Most GPUs end up being
// (internal to the UVM driver). This includes any GPUs we know about, not
// just GPUs that are registered with a VA space. Most GPUs end up being
// registered, but there are brief periods when they are not registered,
// such as during interrupt handling, and in add_gpu() or remove_gpu().
nv_kref_t gpu_kref;
@@ -976,7 +969,7 @@ struct uvm_parent_gpu_struct
uvm_gpu_t *gpus[UVM_PARENT_ID_MAX_SUB_PROCESSORS];
// Bitmap of valid child entries in the gpus[] table. Used to retrieve a
// Bitmap of valid child entries in the gpus[] table. Used to retrieve a
// usable child GPU in bottom-halves.
DECLARE_BITMAP(valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);
@@ -1079,11 +1072,6 @@ struct uvm_parent_gpu_struct
bool access_counters_supported;
// If this is true, physical address based access counter notifications are
// potentially generated. If false, only virtual address based notifications
// are generated (assuming access_counters_supported is true too).
bool access_counters_can_use_physical_addresses;
bool fault_cancel_va_supported;
// True if the GPU has hardware support for scoped atomics
@@ -1205,17 +1193,17 @@ struct uvm_parent_gpu_struct
// Interrupt handling state and locks
uvm_isr_info_t isr;
// Fault buffer info. This is only valid if supports_replayable_faults is
// set to true.
uvm_fault_buffer_info_t fault_buffer_info;
// This is only valid if supports_replayable_faults is set to true.
uvm_fault_buffer_t fault_buffer;
// PMM lazy free processing queue.
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
nv_kthread_q_t lazy_free_q;
// Access counter buffer info. This is only valid if
// supports_access_counters is set to true.
uvm_access_counter_buffer_info_t access_counter_buffer_info;
// This is only valid if supports_access_counters is set to true. This array
// has rm_info.accessCntrBufferCount entries.
uvm_access_counter_buffer_t *access_counter_buffer;
uvm_mutex_t access_counters_enablement_lock;
// Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
NvU32 utlb_per_gpc_count;
@@ -1348,6 +1336,8 @@ struct uvm_parent_gpu_struct
// GPUs.
NvU64 base_address;
} egm;
uvm_test_parent_gpu_inject_error_t test;
};
static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
@@ -1395,10 +1385,10 @@ typedef struct
// detected to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was
// called.
//
// - The peer_gpus_lock is held on one of the GPUs. In this case, the other
// GPU must be read from the original GPU's peer_gpus table. The fields
// will not change while the lock is held, but they may no longer be valid
// because the other GPU might be in teardown.
// - The peer_gpu_lock is held on one of the GPUs. In this case, the other
// GPU must be referred from the original GPU's peer_gpu_mask reference.
// The fields will not change while the lock is held, but they may no
// longer be valid because the other GPU might be in teardown.
// This field is used to determine when this struct has been initialized
// (ref_count != 0). NVLink peers are initialized at GPU registration time.
@@ -1510,7 +1500,7 @@ uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
// Like uvm_parent_gpu_get_by_uuid(), but this variant does not assertion-check
// that the caller is holding the global_lock. This is a narrower-purpose
// that the caller is holding the global_lock. This is a narrower-purpose
// function, and is only intended for use by the top-half ISR, or other very
// limited cases.
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);
@@ -1521,6 +1511,7 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_u
// LOCKING: Takes and releases the global lock for the caller.
NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
const uvm_rm_user_object_t *user_rm_device,
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
uvm_gpu_t **gpu_out);
// Retain a gpu which is known to already be retained. Does NOT require the
@@ -1578,10 +1569,6 @@ uvm_gpu_address_t uvm_gpu_peer_copy_address(uvm_gpu_t *owning_gpu, NvU64 address
// The two GPUs must have different parents.
NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);
// Get the processor id accessible by the given GPU for the given physical
// address.
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
// Get the EGM aperture for local_gpu to use to map memory resident on the CPU
// NUMA node that remote_gpu is attached to.
// Note that local_gpu can be equal to remote_gpu when memory is resident in
@@ -1655,7 +1642,8 @@ static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_
// Check whether the provided address points to peer memory:
// * Physical address using one of the PEER apertures
// * Physical address using SYS aperture that belongs to an exposed coherent memory
// * Physical address using SYS aperture that belongs to an exposed coherent
// memory
// * Virtual address in the region [peer_va_base, peer_va_base + peer_va_size)
bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address);
@@ -1684,8 +1672,8 @@ NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu);
// Check for NVLINK errors without calling into RM
//
// Calling into RM is problematic in many places, this check is always safe to
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK error
// and it's required to call uvm_gpu_check_nvlink_error() to be sure.
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK
// error and it's required to call uvm_gpu_check_nvlink_error() to be sure.
NV_STATUS uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_t *gpu);
// Map size bytes of contiguous sysmem on the GPU for physical access

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -27,11 +27,11 @@
#include "uvm_forward_decl.h"
#include "uvm_test_ioctl.h"
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu);
void uvm_service_access_counters(uvm_access_counter_buffer_t *access_counters);
void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
@@ -46,17 +46,23 @@ void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
//
// When uningoring, the interrupt conditions will be re-evaluated to trigger
// processing of buffered notifications, if any exist.
//
// All parent_gpu's notifications buffers are affected.
void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore);
// Return whether the VA space has access counter migrations enabled. The
// caller must ensure that the VA space cannot go away.
bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space);
// Global perf initialization/cleanup functions
// Global access counters initialization/cleanup functions.
NV_STATUS uvm_access_counters_init(void);
void uvm_access_counters_exit(void);
// Global perf initialization/cleanup functions.
NV_STATUS uvm_perf_access_counters_init(void);
void uvm_perf_access_counters_exit(void);
// VA space Initialization/cleanup functions. See comments in
// VA space initialization/cleanup functions. See comments in
// uvm_perf_heuristics.h
NV_STATUS uvm_perf_access_counters_load(uvm_va_space_t *va_space);
void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);
@@ -72,17 +78,18 @@ bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
// counters are currently enabled. The hardware notifications and interrupts on
// the GPU are enabled the first time any VA space invokes
// uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
// uvm_parent_gpu_access_counters_disable().
// uvm_gpu_access_counters_disable().
//
// Locking: the VA space lock must not be held by the caller since these
// functions may take the access counters ISR lock.
NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu, uvm_va_space_t *va_space);
void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_query_access_counters(UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
#endif // __UVM_GPU_ACCESS_COUNTERS_H__

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -154,62 +154,73 @@ static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_
return 1;
}
static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
{
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
// On Volta, accessCntrBufferCount is > 0, but we don't support access
// counters in UVM (access_counters_supported is cleared during HAL
// initialization.) This check prevents the top-half from accessing
// unallocated memory.
if (!parent_gpu->access_counters_supported)
return 0;
if (parent_gpu->isr.is_suspended)
return 0;
if (!parent_gpu->isr.access_counters.handling_ref_count)
if (!parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count)
return 0;
if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
if (down_trylock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem) != 0)
return 0;
if (!uvm_parent_gpu_access_counters_pending(parent_gpu)) {
up(&parent_gpu->isr.access_counters.service_lock.sem);
if (!uvm_parent_gpu_access_counters_pending(parent_gpu, notif_buf_index)) {
up(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem);
return 0;
}
nv_kref_get(&parent_gpu->gpu_kref);
// Interrupts need to be disabled to avoid an interrupt storm
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
uvm_access_counters_intr_disable(&parent_gpu->access_counter_buffer[notif_buf_index]);
nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
&parent_gpu->isr.access_counters.bottom_half_q_item);
&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item);
return 1;
}
// This is called from RM's top-half ISR (see: the nvidia_isr() function), and UVM is given a
// chance to handle the interrupt, before most of the RM processing. UVM communicates what it
// did, back to RM, via the return code:
// This is called from RM's top-half ISR (see: the nvidia_isr() function), and
// UVM is given a chance to handle the interrupt, before most of the RM
// processing. UVM communicates what it did, back to RM, via the return code:
//
// NV_OK:
// UVM handled an interrupt.
//
// NV_WARN_MORE_PROCESSING_REQUIRED:
// UVM did not schedule a bottom half, because it was unable to get the locks it
// needed, but there is still UVM work to be done. RM will return "not handled" to the
// Linux kernel, *unless* RM handled other faults in its top half. In that case, the
// fact that UVM did not handle its interrupt is lost. However, life and interrupt
// processing continues anyway: the GPU will soon raise another interrupt, because
// that's what it does when there are replayable page faults remaining (GET != PUT in
// the fault buffer).
// UVM did not schedule a bottom half, because it was unable to get the
// locks it needed, but there is still UVM work to be done. RM will
// return "not handled" to the Linux kernel, *unless* RM handled other
// faults in its top half. In that case, the fact that UVM did not
// handle its interrupt is lost. However, life and interrupt processing
// continues anyway: the GPU will soon raise another interrupt, because
// that's what it does when there are replayable page faults remaining
// (GET != PUT in the fault buffer).
//
// NV_ERR_NO_INTR_PENDING:
// UVM did not find any work to do. Currently this is handled in RM in exactly the same
// way as NV_WARN_MORE_PROCESSING_REQUIRED is handled. However, the extra precision is
// available for the future. RM's interrupt handling tends to evolve as new chips and
// new interrupts get created.
// UVM did not find any work to do. Currently this is handled in RM in
// exactly the same way as NV_WARN_MORE_PROCESSING_REQUIRED is handled.
// However, the extra precision is available for the future. RM's
// interrupt handling tends to evolve as new chips and new interrupts
// get created.
static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
{
uvm_parent_gpu_t *parent_gpu;
unsigned num_handlers_scheduled = 0;
NV_STATUS status = NV_OK;
NvU32 i;
if (!in_interrupt() && in_atomic()) {
// Early-out if we're not in interrupt context, but memory allocations
@@ -243,14 +254,16 @@ static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
nv_kref_get(&parent_gpu->gpu_kref);
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
// Now that we got a GPU object, lock it so that it can't be removed without us noticing.
// Now that we got a GPU object, lock it so that it can't be removed without
// us noticing.
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
++parent_gpu->isr.interrupt_count;
num_handlers_scheduled += schedule_replayable_faults_handler(parent_gpu);
num_handlers_scheduled += schedule_non_replayable_faults_handler(parent_gpu);
num_handlers_scheduled += schedule_access_counters_handler(parent_gpu);
for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++)
num_handlers_scheduled += schedule_access_counters_handler(parent_gpu, i);
if (num_handlers_scheduled == 0) {
if (parent_gpu->isr.is_suspended)
@@ -288,6 +301,55 @@ static NV_STATUS init_queue_on_node(nv_kthread_q_t *queue, const char *name, int
return errno_to_nv_status(nv_kthread_q_init(queue, name));
}
static NV_STATUS uvm_isr_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
{
NV_STATUS status = NV_OK;
uvm_va_block_context_t *block_context;
UVM_ASSERT(parent_gpu->access_counters_supported);
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
uvm_sema_init(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, 1, UVM_LOCK_ORDER_ISR);
status = uvm_parent_gpu_init_access_counters(parent_gpu, notif_buf_index);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s, notif buf index: %u\n",
nvstatusToString(status),
uvm_parent_gpu_name(parent_gpu),
notif_buf_index);
return status;
}
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_block_context)
return NV_ERR_NO_MEMORY;
block_context = uvm_va_block_context_alloc(NULL);
if (!block_context)
return NV_ERR_NO_MEMORY;
parent_gpu->access_counter_buffer[notif_buf_index].batch_service_context.block_service_context.block_context =
block_context;
nv_kthread_q_item_init(&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item,
access_counters_isr_bottom_half_entry,
&parent_gpu->access_counter_buffer[notif_buf_index]);
// Access counters interrupts are initially disabled. They are
// dynamically enabled when the GPU is registered on a VA space.
parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count = 0;
if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc_stats_cpu)
return NV_ERR_NO_MEMORY;
parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count =
uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count) *
num_possible_cpus());
if (!parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count)
return NV_ERR_NO_MEMORY;
return NV_OK;
}
NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status = NV_OK;
@@ -316,7 +378,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
if (!block_context)
return NV_ERR_NO_MEMORY;
parent_gpu->fault_buffer_info.replayable.block_service_context.block_context = block_context;
parent_gpu->fault_buffer.replayable.block_service_context.block_context = block_context;
parent_gpu->isr.replayable_faults.handling = true;
@@ -344,7 +406,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
if (!block_context)
return NV_ERR_NO_MEMORY;
parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context = block_context;
parent_gpu->fault_buffer.non_replayable.block_service_context.block_context = block_context;
parent_gpu->isr.non_replayable_faults.handling = true;
@@ -361,32 +423,31 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
}
if (parent_gpu->access_counters_supported) {
status = uvm_parent_gpu_init_access_counters(parent_gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
nvstatusToString(status),
uvm_parent_gpu_name(parent_gpu));
return status;
NvU32 index_count = parent_gpu->rm_info.accessCntrBufferCount;
NvU32 notif_buf_index;
UVM_ASSERT(index_count > 0);
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_buffer)
return NV_ERR_NO_MEMORY;
parent_gpu->access_counter_buffer = uvm_kvmalloc_zero(sizeof(*parent_gpu->access_counter_buffer) *
index_count);
if (!parent_gpu->access_counter_buffer)
return NV_ERR_NO_MEMORY;
if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc)
return NV_ERR_NO_MEMORY;
parent_gpu->isr.access_counters = uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters) * index_count);
if (!parent_gpu->isr.access_counters)
return NV_ERR_NO_MEMORY;
for (notif_buf_index = 0; notif_buf_index < index_count; notif_buf_index++) {
status = uvm_isr_init_access_counters(parent_gpu, notif_buf_index);
if (status != NV_OK)
return status;
}
block_context = uvm_va_block_context_alloc(NULL);
if (!block_context)
return NV_ERR_NO_MEMORY;
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context =
block_context;
nv_kthread_q_item_init(&parent_gpu->isr.access_counters.bottom_half_q_item,
access_counters_isr_bottom_half_entry,
parent_gpu);
// Access counters interrupts are initially disabled. They are
// dynamically enabled when the GPU is registered on a VA space.
parent_gpu->isr.access_counters.handling_ref_count = 0;
parent_gpu->isr.access_counters.stats.cpu_exec_count =
uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters.stats.cpu_exec_count) * num_possible_cpus());
if (!parent_gpu->isr.access_counters.stats.cpu_exec_count)
return NV_ERR_NO_MEMORY;
}
}
@@ -401,7 +462,15 @@ void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu)
void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
NvU32 notif_buf_index;
if (parent_gpu->isr.access_counters) {
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
UVM_ASSERT_MSG(parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count == 0,
"notif buf index: %u\n",
notif_buf_index);
}
}
// Now that the GPU is safely out of the global table, lock the GPU and mark
// it as no longer handling interrupts so the top half knows not to schedule
@@ -459,24 +528,38 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
}
if (parent_gpu->access_counters_supported) {
// It is safe to deinitialize access counters even if they have not been
// successfully initialized.
uvm_parent_gpu_deinit_access_counters(parent_gpu);
block_context =
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context;
uvm_va_block_context_free(block_context);
NvU32 notif_buf_index;
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
// It is safe to deinitialize access counters even if they have not
// been successfully initialized.
uvm_parent_gpu_deinit_access_counters(parent_gpu, notif_buf_index);
if (parent_gpu->access_counter_buffer) {
uvm_access_counter_buffer_t *access_counter = &parent_gpu->access_counter_buffer[notif_buf_index];
block_context = access_counter->batch_service_context.block_service_context.block_context;
uvm_va_block_context_free(block_context);
}
if (parent_gpu->isr.access_counters)
uvm_kvfree(parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count);
}
uvm_kvfree(parent_gpu->isr.access_counters);
uvm_kvfree(parent_gpu->access_counter_buffer);
}
if (parent_gpu->non_replayable_faults_supported) {
block_context = parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context;
block_context = parent_gpu->fault_buffer.non_replayable.block_service_context.block_context;
uvm_va_block_context_free(block_context);
uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
}
block_context = parent_gpu->fault_buffer_info.replayable.block_service_context.block_context;
block_context = parent_gpu->fault_buffer.replayable.block_service_context.block_context;
uvm_va_block_context_free(block_context);
uvm_kvfree(parent_gpu->isr.replayable_faults.stats.cpu_exec_count);
uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
uvm_kvfree(parent_gpu->isr.access_counters.stats.cpu_exec_count);
}
uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
@@ -584,25 +667,29 @@ static void non_replayable_faults_isr_bottom_half_entry(void *args)
static void access_counters_isr_bottom_half(void *args)
{
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
uvm_access_counter_buffer_t *access_counters = (uvm_access_counter_buffer_t *)args;
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
unsigned int cpu;
UVM_ASSERT(parent_gpu->access_counters_supported);
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
uvm_record_lock(&parent_gpu->isr.access_counters.service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
uvm_record_lock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
// Multiple bottom halves for counter notifications can be running
// concurrently, but only one can be running this function for a given GPU
// since we enter with the access_counters_isr_lock held.
// concurrently, but only one per-notification-buffer (i.e.,
// notif_buf_index) can be running this function for a given GPU since we
// enter with the per-notification-buffer access_counters_isr_lock held.
cpu = get_cpu();
++parent_gpu->isr.access_counters.stats.bottom_half_count;
cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters.stats.cpus_used_mask);
++parent_gpu->isr.access_counters.stats.cpu_exec_count[cpu];
++parent_gpu->isr.access_counters[notif_buf_index].stats.bottom_half_count;
cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters[notif_buf_index].stats.cpus_used_mask);
++parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count[cpu];
put_cpu();
uvm_parent_gpu_service_access_counters(parent_gpu);
uvm_service_access_counters(access_counters);
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
uvm_access_counters_isr_unlock(access_counters);
uvm_parent_gpu_kref_put(parent_gpu);
}
@@ -725,7 +812,7 @@ void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
// clear_replayable_faults is a no-op for architectures that don't
// support pulse-based interrupts.
parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
parent_gpu->fault_buffer_info.replayable.cached_get);
parent_gpu->fault_buffer.replayable.cached_get);
}
// This unlock call has to be out-of-order unlock due to interrupts_lock
@@ -751,37 +838,41 @@ void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gp
uvm_up(&parent_gpu->isr.non_replayable_faults.service_lock);
}
void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu)
void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters)
{
// See comments in uvm_parent_gpu_replayable_faults_isr_lock
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
uvm_access_counters_intr_disable(access_counters);
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
uvm_down(&parent_gpu->isr.access_counters.service_lock);
uvm_down(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);
}
void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters)
{
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
uvm_access_counter_buffer_hal_t *ac_hal = parent_gpu->access_counter_buffer_hal;
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
// See comments in uvm_parent_gpu_replayable_faults_isr_unlock
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
uvm_access_counters_intr_enable(access_counters);
if (parent_gpu->isr.access_counters.handling_ref_count > 0) {
parent_gpu->access_counter_buffer_hal->clear_access_counter_notifications(parent_gpu,
parent_gpu->access_counter_buffer_info.cached_get);
}
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0)
ac_hal->clear_access_counter_notifications(access_counters, access_counters->cached_get);
// This unlock call has to be out-of-order unlock due to interrupts_lock
// still being held. Otherwise, it would result in a lock order violation.
uvm_up_out_of_order(&parent_gpu->isr.access_counters.service_lock);
uvm_up_out_of_order(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
}
@@ -806,8 +897,11 @@ static void uvm_parent_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *paren
parent_gpu->fault_buffer_hal->enable_replayable_faults(parent_gpu);
}
void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters)
{
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
// The read of handling_ref_count could race with a write from
@@ -815,24 +909,27 @@ void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
// ISR lock. But those functions are invoked with the interrupt disabled
// (disable_intr_ref_count > 0), so the check always returns false when the
// race occurs
if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(parent_gpu);
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(access_counters);
}
++parent_gpu->isr.access_counters.disable_intr_ref_count;
++parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;
}
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu)
void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters)
{
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
UVM_ASSERT(parent_gpu->isr.access_counters.disable_intr_ref_count > 0);
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
UVM_ASSERT(parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count > 0);
--parent_gpu->isr.access_counters.disable_intr_ref_count;
--parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;
if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(parent_gpu);
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(access_counters);
}
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -70,8 +70,8 @@ typedef struct
struct
{
// Number of the bottom-half invocations for this interrupt on a GPU over
// its lifetime
// Number of the bottom-half invocations for this interrupt on a GPU
// over its lifetime.
NvU64 bottom_half_count;
// A bitmask of the CPUs on which the bottom half has executed. The
@@ -110,20 +110,20 @@ typedef struct
// bottom-half per interrupt type.
nv_kthread_q_t bottom_half_q;
// Protects the state of interrupts (enabled/disabled) and whether the GPU is
// currently handling them. Taken in both interrupt and process context.
// Protects the state of interrupts (enabled/disabled) and whether the GPU
// is currently handling them. Taken in both interrupt and process context.
uvm_spinlock_irqsave_t interrupts_lock;
uvm_intr_handler_t replayable_faults;
uvm_intr_handler_t non_replayable_faults;
uvm_intr_handler_t access_counters;
uvm_intr_handler_t *access_counters;
// Kernel thread used to kill channels on fatal non-replayable faults.
// This is needed because we cannot call into RM from the bottom-half to
// avoid deadlocks.
nv_kthread_q_t kill_channel_q;
// Number of top-half ISRs called for this GPU over its lifetime
// Number of top-half ISRs called for this GPU over its lifetime.
NvU64 interrupt_count;
} uvm_isr_info_t;
@@ -133,7 +133,7 @@ NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid);
// Initialize ISR handling state
NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);
// Flush any currently scheduled bottom halves. This is called during GPU
// Flush any currently scheduled bottom halves. This is called during GPU
// removal.
void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);
@@ -146,7 +146,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
// Take parent_gpu->isr.replayable_faults.service_lock from a non-top/bottom
// half thread. This will also disable replayable page fault interrupts (if
// half thread. This will also disable replayable page fault interrupts (if
// supported by the GPU) because the top half attempts to take this lock, and we
// would cause an interrupt storm if we didn't disable them first.
//
@@ -154,49 +154,48 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
// Unlock parent_gpu->isr.replayable_faults.service_lock. This call may
// re-enable replayable page fault interrupts. Unlike
// uvm_parent_gpu_replayable_faults_isr_lock(), which should only called from
// re-enable replayable page fault interrupts. Unlike
// uvm_parent_gpu_replayable_faults_isr_lock(), which should only be called from
// non-top/bottom half threads, this can be called by any thread.
void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
// Lock/unlock routines for non-replayable faults. These do not need to prevent
// interrupt storms since the GPU fault buffers for non-replayable faults are
// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
// under the parent need to have been previously retained.
void uvm_parent_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
// See uvm_parent_gpu_replayable_faults_isr_lock/unlock
void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters);
void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters);
// Increments the reference count tracking whether access counter interrupts
// should be disabled. The caller is guaranteed that access counter interrupts
// are disabled upon return. Interrupts might already be disabled prior to
// making this call. Each call is ref-counted, so this must be paired with a
// call to uvm_parent_gpu_access_counters_intr_enable().
// call to uvm_access_counters_intr_enable().
//
// parent_gpu->isr.interrupts_lock must be held to call this function.
void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters);
// Decrements the reference count tracking whether access counter interrupts
// should be disabled. Only once the count reaches 0 are the HW interrupts
// actually enabled, so this call does not guarantee that the interrupts have
// been re-enabled upon return.
//
// uvm_parent_gpu_access_counters_intr_disable() must have been called prior to
// calling this function.
// uvm_access_counters_intr_disable() must have been called prior to calling
// this function.
//
// NOTE: For pulse-based interrupts, the caller is responsible for re-arming
// the interrupt.
//
// parent_gpu->isr.interrupts_lock must be held to call this function.
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters);
// Return the first valid GPU given the parent GPU or NULL if no MIG instances
// are registered. This should only be called from bottom halves or if the
// g_uvm_global.global_lock is held so that the returned pointer remains valid.
//
uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu);
#endif // __UVM_GPU_ISR_H__

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -119,18 +119,18 @@
// calling uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults on failure.
NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
non_replayable_faults->shadow_buffer_copy = NULL;
non_replayable_faults->fault_cache = NULL;
non_replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize /
non_replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize /
parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
non_replayable_faults->shadow_buffer_copy =
uvm_kvmalloc_zero(parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize);
uvm_kvmalloc_zero(parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize);
if (!non_replayable_faults->shadow_buffer_copy)
return NV_ERR_NO_MEMORY;
@@ -147,7 +147,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_
void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
if (non_replayable_faults->fault_cache) {
UVM_ASSERT(uvm_tracker_is_empty(&non_replayable_faults->clear_faulted_tracker));
@@ -170,7 +170,7 @@ bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(parent_gpu->isr.non_replayable_faults.handling);
status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
&has_pending_faults);
UVM_ASSERT(status == NV_OK);
@@ -182,14 +182,14 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
NV_STATUS status;
NvU32 i;
NvU32 entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
char *current_hw_entry = (char *)non_replayable_faults->shadow_buffer_copy;
uvm_fault_buffer_entry_t *fault_entry = non_replayable_faults->fault_cache;
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.non_replayable_faults.service_lock));
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
current_hw_entry,
cached_faults);
@@ -267,7 +267,7 @@ static NV_STATUS clear_faulted_method_on_gpu(uvm_user_channel_t *user_channel,
uvm_gpu_t *gpu = user_channel->gpu;
NV_STATUS status;
uvm_push_t push;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
UVM_ASSERT(!fault_entry->is_fatal);
@@ -355,7 +355,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_va_block_t *va_block,
uvm_processor_id_t new_residency;
bool read_duplicate;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
const uvm_va_policy_t *policy;
UVM_ASSERT(!fault_entry->is_fatal);
@@ -450,7 +450,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
NV_STATUS status, tracker_status;
uvm_va_block_retry_t va_block_retry;
uvm_gpu_t *gpu = fault_entry->gpu;
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.non_replayable.block_service_context;
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.non_replayable.block_service_context;
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
service_context->num_retries = 0;
@@ -467,7 +467,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
service_context,
hmm_migratable));
tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer_info.non_replayable.fault_service_tracker,
tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer.non_replayable.fault_service_tracker,
&va_block->tracker);
uvm_mutex_unlock(&va_block->lock);
@@ -507,7 +507,7 @@ static void schedule_kill_channel(uvm_fault_buffer_entry_t *fault_entry, uvm_use
{
uvm_va_space_t *va_space = fault_entry->va_space;
uvm_parent_gpu_t *parent_gpu = fault_entry->gpu->parent;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
void *packet = (char *)non_replayable_faults->shadow_buffer_copy +
(fault_entry->non_replayable.buffer_index * parent_gpu->fault_buffer_hal->entry_size(parent_gpu));
@@ -551,7 +551,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_gpu_t *gpu = gpu_va_space->gpu;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
NV_STATUS status = lookup_status;
NV_STATUS fatal_fault_status = NV_ERR_INVALID_ADDRESS;
@@ -649,7 +649,7 @@ static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,
struct mm_struct *mm;
uvm_gpu_va_space_t *gpu_va_space;
uvm_gpu_t *gpu;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
uvm_va_block_context_t *va_block_context = non_replayable_faults->block_service_context.block_context;
status = uvm_parent_gpu_fault_entry_to_va_space(parent_gpu,
@@ -757,7 +757,7 @@ exit_no_channel:
static NV_STATUS service_fault(uvm_parent_gpu_t *parent_gpu, uvm_fault_buffer_entry_t *fault_entry)
{
uvm_service_block_context_t *service_context =
&parent_gpu->fault_buffer_info.non_replayable.block_service_context;
&parent_gpu->fault_buffer.non_replayable.block_service_context;
NV_STATUS status;
bool hmm_migratable = true;
@@ -794,7 +794,7 @@ void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent
// non-replayable faults since getting multiple faults on the same
// memory region is not very likely
for (i = 0; i < cached_faults; ++i) {
status = service_fault(parent_gpu, &parent_gpu->fault_buffer_info.non_replayable.fault_cache[i]);
status = service_fault(parent_gpu, &parent_gpu->fault_buffer.non_replayable.fault_cache[i]);
if (status != NV_OK)
return;
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -119,7 +119,7 @@ module_param(uvm_perf_fault_coalesce, uint, S_IRUGO);
// the power management resume path.
static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
// Read the current get/put pointers, as this might not be the first time
// we take control of the fault buffer since the GPU was initialized,
@@ -129,7 +129,7 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);
// (Re-)enable fault prefetching
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled)
if (parent_gpu->fault_buffer.prefetch_faults_enabled)
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
else
parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
@@ -140,28 +140,28 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status = NV_OK;
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
UVM_ASSERT(parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize %
UVM_ASSERT(parent_gpu->fault_buffer.rm_info.replayable.bufferSize %
parent_gpu->fault_buffer_hal->entry_size(parent_gpu) == 0);
replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize /
replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.replayable.bufferSize /
parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
// Check provided module parameter value
parent_gpu->fault_buffer_info.max_batch_size = max(uvm_perf_fault_batch_count,
(NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
parent_gpu->fault_buffer_info.max_batch_size = min(parent_gpu->fault_buffer_info.max_batch_size,
replayable_faults->max_faults);
parent_gpu->fault_buffer.max_batch_size = max(uvm_perf_fault_batch_count,
(NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
parent_gpu->fault_buffer.max_batch_size = min(parent_gpu->fault_buffer.max_batch_size,
replayable_faults->max_faults);
if (parent_gpu->fault_buffer_info.max_batch_size != uvm_perf_fault_batch_count) {
pr_info("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_batch_count,
UVM_PERF_FAULT_BATCH_COUNT_MIN,
replayable_faults->max_faults,
parent_gpu->fault_buffer_info.max_batch_size);
if (parent_gpu->fault_buffer.max_batch_size != uvm_perf_fault_batch_count) {
UVM_INFO_PRINT("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_batch_count,
UVM_PERF_FAULT_BATCH_COUNT_MIN,
replayable_faults->max_faults,
parent_gpu->fault_buffer.max_batch_size);
}
batch_context->fault_cache = uvm_kvmalloc_zero(replayable_faults->max_faults * sizeof(*batch_context->fault_cache));
@@ -198,22 +198,22 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
UVM_PERF_FAULT_REPLAY_POLICY_DEFAULT;
if (replayable_faults->replay_policy != uvm_perf_fault_replay_policy) {
pr_info("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_replay_policy,
replayable_faults->replay_policy);
UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_replay_policy,
replayable_faults->replay_policy);
}
replayable_faults->replay_update_put_ratio = min(uvm_perf_fault_replay_update_put_ratio, 100u);
if (replayable_faults->replay_update_put_ratio != uvm_perf_fault_replay_update_put_ratio) {
pr_info("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_replay_update_put_ratio,
replayable_faults->replay_update_put_ratio);
UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_replay_update_put_ratio,
replayable_faults->replay_update_put_ratio);
}
// Re-enable fault prefetching just in case it was disabled in a previous run
parent_gpu->fault_buffer_info.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;
parent_gpu->fault_buffer.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;
fault_buffer_reinit_replayable_faults(parent_gpu);
@@ -222,7 +222,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
if (batch_context->fault_cache) {
@@ -230,9 +230,9 @@ static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
uvm_tracker_deinit(&replayable_faults->replay_tracker);
}
if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
// Re-enable prefetch faults in case we disabled them
if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer_info.prefetch_faults_enabled)
if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer.prefetch_faults_enabled)
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
}
@@ -252,7 +252,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(parent_gpu->replayable_faults_supported);
status = uvm_rm_locked_call(nvUvmInterfaceInitFaultInfo(parent_gpu->rm_device,
&parent_gpu->fault_buffer_info.rm_info));
&parent_gpu->fault_buffer.rm_info));
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to init fault buffer info from RM: %s, GPU %s\n",
nvstatusToString(status),
@@ -262,7 +262,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
// when it returns an error. Set the buffer handle to zero as it is
// used by the deinitialization logic to determine if it was correctly
// initialized.
parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
goto fail;
}
@@ -304,24 +304,25 @@ void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
fault_buffer_deinit_replayable_faults(parent_gpu);
if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
status = uvm_rm_locked_call(nvUvmInterfaceOwnPageFaultIntr(parent_gpu->rm_device, NV_FALSE));
UVM_ASSERT(status == NV_OK);
uvm_rm_locked_call_void(nvUvmInterfaceDestroyFaultInfo(parent_gpu->rm_device,
&parent_gpu->fault_buffer_info.rm_info));
&parent_gpu->fault_buffer.rm_info));
parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
}
}
bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
{
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
UVM_ASSERT(parent_gpu->replayable_faults_supported);
// Fast path 1: we left some faults unserviced in the buffer in the last pass
// Fast path 1: we left some faults unserviced in the buffer in the last
// pass
if (replayable_faults->cached_get != replayable_faults->cached_put)
return true;
@@ -357,7 +358,7 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
{
NV_STATUS status;
uvm_push_t push;
uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer_info.replayable.replay_tracker;
uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer.replayable.replay_tracker;
UVM_ASSERT(tracker != NULL);
@@ -443,7 +444,7 @@ static NV_STATUS cancel_fault_precise_va(uvm_fault_buffer_entry_t *fault_entry,
uvm_gpu_t *gpu = fault_entry->gpu;
uvm_gpu_phys_address_t pdb;
uvm_push_t push;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
NvU64 offset;
UVM_ASSERT(gpu->parent->replayable_faults_supported);
@@ -505,7 +506,7 @@ static NV_STATUS push_replay_on_gpu(uvm_gpu_t *gpu,
{
NV_STATUS status;
uvm_push_t push;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
uvm_tracker_t *tracker = NULL;
if (batch_context)
@@ -556,7 +557,7 @@ static NV_STATUS push_replay_on_parent_gpu(uvm_parent_gpu_t *parent_gpu,
static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
{
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
@@ -589,7 +590,7 @@ static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu, hw_f
return NV_OK;
is_flush_mode_move = (NvBool) (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer_info.rm_info, is_flush_mode_move);
status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer.rm_info, is_flush_mode_move);
UVM_ASSERT(status == NV_OK);
@@ -618,7 +619,7 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
NvU32 get;
NvU32 put;
uvm_spin_loop_t spin;
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
NV_STATUS status;
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
@@ -852,7 +853,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
uvm_fault_buffer_entry_t *fault_cache;
uvm_spin_loop_t spin;
NV_STATUS status = NV_OK;
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
const bool in_pascal_cancel_path = (!parent_gpu->fault_cancel_va_supported && fetch_mode == FAULT_FETCH_MODE_ALL);
const bool may_filter = uvm_perf_fault_coalesce && !in_pascal_cancel_path;
@@ -887,7 +888,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
// Parse until get != put and have enough space to cache.
while ((get != put) &&
(fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer_info.max_batch_size)) {
(fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer.max_batch_size)) {
bool is_same_instance_ptr = true;
uvm_fault_buffer_entry_t *current_entry = &fault_cache[fault_index];
uvm_fault_utlb_info_t *current_tlb;
@@ -1385,7 +1386,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
uvm_page_index_t last_page_index;
NvU32 page_fault_count = 0;
uvm_range_group_range_iter_t iter;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
uvm_fault_buffer_entry_t **ordered_fault_cache = batch_context->ordered_fault_cache;
uvm_fault_buffer_entry_t *first_fault_entry = ordered_fault_cache[first_fault_index];
uvm_service_block_context_t *block_context = &replayable_faults->block_service_context;
@@ -1612,7 +1613,7 @@ static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
NV_STATUS status;
uvm_va_block_retry_t va_block_retry;
NV_STATUS tracker_status;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
uvm_service_block_context_t *fault_block_context = &replayable_faults->block_service_context;
fault_block_context->operation = UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS;
@@ -1803,7 +1804,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
uvm_page_mask_t *prefetch_only_fault_mask = &ats_context->faults.prefetch_only_fault_mask;
uvm_gpu_t *gpu = gpu_va_space->gpu;
bool replay_per_va_block =
(gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
(gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
UVM_ASSERT(vma);
@@ -1851,8 +1852,8 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
page_index = (fault_address - sub_batch_base) / PAGE_SIZE;
// Do not check for coalesced access type. If there are multiple different
// accesses to an address, we can disregard the prefetch one.
// Do not check for coalesced access type. If there are multiple
// different accesses to an address, we can disregard the prefetch one.
if ((access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) &&
(uvm_fault_access_type_mask_highest(current_entry->access_type_mask) == UVM_FAULT_ACCESS_TYPE_PREFETCH))
uvm_page_mask_set(prefetch_only_fault_mask, page_index);
@@ -1956,7 +1957,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
uvm_va_block_t *va_block;
uvm_gpu_t *gpu = gpu_va_space->gpu;
uvm_va_block_context_t *va_block_context =
gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
gpu->parent->fault_buffer.replayable.block_service_context.block_context;
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[fault_index];
struct mm_struct *mm = va_block_context->mm;
NvU64 fault_address = current_entry->fault_address;
@@ -1985,7 +1986,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
NvU64 outer = ~0ULL;
UVM_ASSERT(replay_per_va_block ==
(gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));
(gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));
// Limit outer to the minimum of next va_range.start and first
// fault_address' next UVM_GMMU_ATS_GRANULARITY alignment so that it's
@@ -2046,8 +2047,8 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
uvm_gpu_t *gpu = batch_context->fatal_gpu;
uvm_gpu_va_space_t *gpu_va_space = NULL;
struct mm_struct *mm;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.replayable.block_service_context;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.replayable.block_service_context;
uvm_va_block_context_t *va_block_context = service_context->block_context;
UVM_ASSERT(va_space);
@@ -2155,7 +2156,7 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
++i;
}
else {
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer.replayable.ats_invalidate;
NvU32 block_faults;
const bool hmm_migratable = true;
@@ -2236,12 +2237,12 @@ static NV_STATUS service_fault_batch(uvm_parent_gpu_t *parent_gpu,
NvU32 i;
uvm_va_space_t *va_space = NULL;
uvm_gpu_va_space_t *prev_gpu_va_space = NULL;
uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer_info.replayable.ats_invalidate;
uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer.replayable.ats_invalidate;
struct mm_struct *mm = NULL;
const bool replay_per_va_block = service_mode != FAULT_SERVICE_MODE_CANCEL &&
parent_gpu->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
parent_gpu->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
uvm_service_block_context_t *service_context =
&parent_gpu->fault_buffer_info.replayable.block_service_context;
&parent_gpu->fault_buffer.replayable.block_service_context;
uvm_va_block_context_t *va_block_context = service_context->block_context;
bool hmm_migratable = true;
@@ -2711,8 +2712,9 @@ static void cancel_fault_batch(uvm_parent_gpu_t *parent_gpu,
// 5- Fetch all faults from buffer
// 6- Check what uTLBs are in lockdown mode and can be cancelled
// 7- Preprocess faults (order per va_space, fault address, access type)
// 8- Service all non-fatal faults and mark all non-serviceable faults as fatal
// 6.1- If fatal faults are not found, we are done
// 8- Service all non-fatal faults and mark all non-serviceable faults as
// fatal.
// 8.1- If fatal faults are not found, we are done
// 9- Search for a uTLB which can be targeted for cancel, as described in
// try_to_cancel_utlbs. If found, cancel it.
// END LOOP
@@ -2726,14 +2728,14 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
{
NV_STATUS status;
NV_STATUS tracker_status;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
bool first = true;
UVM_ASSERT(gpu->parent->replayable_faults_supported);
// 1) Disable prefetching to avoid new requests keep coming and flooding
// the buffer
if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
if (gpu->parent->fault_buffer.prefetch_faults_enabled)
gpu->parent->arch_hal->disable_prefetch_faults(gpu->parent);
while (1) {
@@ -2847,7 +2849,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
}
// 10) Re-enable prefetching
if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
if (gpu->parent->fault_buffer.prefetch_faults_enabled)
gpu->parent->arch_hal->enable_prefetch_faults(gpu->parent);
if (status == NV_OK)
@@ -2884,16 +2886,16 @@ static void enable_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu,
// comment in mark_fault_invalid_prefetch(..).
// Some tests rely on this logic (and ratio) to correctly disable prefetch
// fault reporting. If the logic changes, the tests will have to be changed.
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled &&
if (parent_gpu->fault_buffer.prefetch_faults_enabled &&
uvm_perf_reenable_prefetch_faults_lapse_msec > 0 &&
((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer_info.max_batch_size * 2) ||
((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer.max_batch_size * 2) ||
(uvm_enable_builtin_tests &&
parent_gpu->rm_info.isSimulated &&
batch_context->num_invalid_prefetch_faults > 5))) {
uvm_parent_gpu_disable_prefetch_faults(parent_gpu);
}
else if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp;
else if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer.disable_prefetch_faults_timestamp;
// Reenable prefetch faults after some time
if (lapse > ((NvU64)uvm_perf_reenable_prefetch_faults_lapse_msec * (1000 * 1000)))
@@ -2907,7 +2909,7 @@ void uvm_parent_gpu_service_replayable_faults(uvm_parent_gpu_t *parent_gpu)
NvU32 num_batches = 0;
NvU32 num_throttled = 0;
NV_STATUS status = NV_OK;
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
UVM_ASSERT(parent_gpu->replayable_faults_supported);
@@ -3030,9 +3032,9 @@ void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
parent_gpu->fault_buffer_info.prefetch_faults_enabled = true;
parent_gpu->fault_buffer.prefetch_faults_enabled = true;
}
}
@@ -3041,10 +3043,10 @@ void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
if (parent_gpu->fault_buffer.prefetch_faults_enabled) {
parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
parent_gpu->fault_buffer_info.prefetch_faults_enabled = false;
parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp = NV_GETTIME();
parent_gpu->fault_buffer.prefetch_faults_enabled = false;
parent_gpu->fault_buffer.disable_prefetch_faults_timestamp = NV_GETTIME();
}
}

View File

@@ -217,7 +217,6 @@ static uvm_hal_class_ops_t host_table[] =
.clear_faulted_channel_method = uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported,
.clear_faulted_channel_register = uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported,
.access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
.access_counter_clear_type = uvm_hal_maxwell_access_counter_clear_type_unsupported,
.access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
.get_time = uvm_hal_maxwell_get_time,
}
@@ -254,9 +253,6 @@ static uvm_hal_class_ops_t host_table[] =
.replay_faults = uvm_hal_volta_replay_faults,
.cancel_faults_va = uvm_hal_volta_cancel_faults_va,
.clear_faulted_channel_method = uvm_hal_volta_host_clear_faulted_channel_method,
.access_counter_clear_all = uvm_hal_volta_access_counter_clear_all,
.access_counter_clear_type = uvm_hal_volta_access_counter_clear_type,
.access_counter_clear_targeted = uvm_hal_volta_access_counter_clear_targeted,
.semaphore_timestamp = uvm_hal_volta_host_semaphore_timestamp,
}
},
@@ -271,6 +267,8 @@ static uvm_hal_class_ops_t host_table[] =
.tlb_invalidate_all = uvm_hal_turing_host_tlb_invalidate_all,
.tlb_invalidate_va = uvm_hal_turing_host_tlb_invalidate_va,
.tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test,
.access_counter_clear_all = uvm_hal_turing_access_counter_clear_all,
.access_counter_clear_targeted = uvm_hal_turing_access_counter_clear_targeted,
}
},
{
@@ -537,22 +535,19 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
.u.access_counter_buffer_ops = {
.enable_access_counter_notifications = uvm_hal_volta_enable_access_counter_notifications,
.disable_access_counter_notifications = uvm_hal_volta_disable_access_counter_notifications,
.clear_access_counter_notifications = uvm_hal_volta_clear_access_counter_notifications,
.parse_entry = uvm_hal_volta_access_counter_buffer_parse_entry,
.entry_is_valid = uvm_hal_volta_access_counter_buffer_entry_is_valid,
.entry_clear_valid = uvm_hal_volta_access_counter_buffer_entry_clear_valid,
.entry_size = uvm_hal_volta_access_counter_buffer_entry_size,
}
.u.access_counter_buffer_ops = {}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
.u.access_counter_buffer_ops = {
.enable_access_counter_notifications = uvm_hal_turing_enable_access_counter_notifications,
.disable_access_counter_notifications = uvm_hal_turing_disable_access_counter_notifications,
.clear_access_counter_notifications = uvm_hal_turing_clear_access_counter_notifications,
.parse_entry = uvm_hal_turing_access_counter_buffer_parse_entry,
.entry_is_valid = uvm_hal_turing_access_counter_buffer_entry_is_valid,
.entry_clear_valid = uvm_hal_turing_access_counter_buffer_entry_clear_valid,
.entry_size = uvm_hal_turing_access_counter_buffer_entry_size,
}
},
{
@@ -843,10 +838,8 @@ static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
// Computing.
//
// TODO: Bug 200692962: Add support for access counters in vGPU
if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled) {
if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled)
parent_gpu->access_counters_supported = false;
parent_gpu->access_counters_can_use_physical_addresses = false;
}
}
void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
@@ -1042,36 +1035,15 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry)
UVM_DBG_PRINT(" timestamp: %llu\n", entry->timestamp);
}
const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type)
{
BUILD_BUG_ON(UVM_ACCESS_COUNTER_TYPE_MAX != 2);
switch (access_counter_type) {
UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MIMC);
UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MOMC);
UVM_ENUM_STRING_DEFAULT();
}
}
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry)
{
if (!entry->address.is_virtual) {
UVM_DBG_PRINT("physical address: {0x%llx:%s}\n",
entry->address.address,
uvm_aperture_string(entry->address.aperture));
}
else {
UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address.address);
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n",
entry->virtual_info.instance_ptr.address,
uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
UVM_DBG_PRINT(" mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->virtual_info.mmu_engine_type));
UVM_DBG_PRINT(" mmu_engine_id %u\n", entry->virtual_info.mmu_engine_id);
UVM_DBG_PRINT(" ve_id %u\n", entry->virtual_info.ve_id);
}
UVM_DBG_PRINT(" is_virtual %u\n", entry->address.is_virtual);
UVM_DBG_PRINT(" counter_type %s\n", uvm_access_counter_type_string(entry->counter_type));
UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address);
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n",
entry->instance_ptr.address,
uvm_aperture_string(entry->instance_ptr.aperture));
UVM_DBG_PRINT(" mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->mmu_engine_type));
UVM_DBG_PRINT(" mmu_engine_id %u\n", entry->mmu_engine_id);
UVM_DBG_PRINT(" ve_id %u\n", entry->ve_id);
UVM_DBG_PRINT(" counter_value %u\n", entry->counter_value);
UVM_DBG_PRINT(" subgranularity 0x%08x\n", entry->sub_granularity);
UVM_DBG_PRINT(" bank %u\n", entry->bank);

View File

@@ -686,54 +686,52 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry);
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry);
// Access counters
typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters, NvU32 get);
// Parse the entry on the given buffer index. This also clears the valid bit of
// the entry in the buffer.
typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_parent_gpu_t *parent_gpu,
typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_access_counter_buffer_t *access_counters,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry);
typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_access_counter_buffer_t *access_counters,
NvU32 index);
typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_access_counter_buffer_t *access_counters,
NvU32 index);
typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push);
typedef void (*uvm_hal_access_counter_clear_type_t)(uvm_push_t *push, uvm_access_counter_type_t type);
typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 get);
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry);
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 index);
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 index);
NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push);
void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type);
void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry);
bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push);
void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type);
void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
void uvm_hal_turing_clear_access_counter_notifications(uvm_access_counter_buffer_t *access_counters, NvU32 get);
void uvm_hal_turing_access_counter_buffer_parse_entry(uvm_access_counter_buffer_t *access_counters,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry);
bool uvm_hal_turing_access_counter_buffer_entry_is_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
void uvm_hal_turing_access_counter_buffer_entry_clear_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push);
void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
// The source and destination addresses must be 16-byte aligned. Note that the
// best performance is achieved with 256-byte alignment. The decrypt size must
@@ -786,7 +784,6 @@ struct uvm_host_hal_struct
uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_method;
uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register;
uvm_hal_access_counter_clear_all_t access_counter_clear_all;
uvm_hal_access_counter_clear_type_t access_counter_clear_type;
uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted;
uvm_hal_get_time_t get_time;
};

View File

@@ -471,69 +471,34 @@ static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)
return max(membar_1, membar_2);
}
typedef enum
{
UVM_ACCESS_COUNTER_TYPE_MIMC = 0,
UVM_ACCESS_COUNTER_TYPE_MOMC,
UVM_ACCESS_COUNTER_TYPE_MAX,
} uvm_access_counter_type_t;
const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type);
struct uvm_access_counter_buffer_entry_struct
{
// Whether this counter refers to outbound accesses to remote GPUs or
// sysmem (MIMC), or it refers to inbound accesses from CPU or a non-peer
// GPU (whose accesses are routed through the CPU, too) to vidmem (MOMC)
uvm_access_counter_type_t counter_type;
// Address of the region for which a notification was sent
uvm_gpu_address_t address;
NvU64 address;
union
{
// These fields are only valid if address.is_virtual is true
struct
{
// Instance pointer of one of the channels in the TSG that triggered
// the notification.
uvm_gpu_phys_address_t instance_ptr;
// Instance pointer of one of the channels in the TSG that triggered
// the notification.
uvm_gpu_phys_address_t instance_ptr;
uvm_mmu_engine_type_t mmu_engine_type;
uvm_mmu_engine_type_t mmu_engine_type;
NvU32 mmu_engine_id;
NvU32 mmu_engine_id;
// Identifier of the subcontext that performed the memory accesses
// that triggered the notification. This value, combined with the
// instance_ptr, is needed to obtain the GPU VA space of the process
// that triggered the notification.
NvU32 ve_id;
// Identifier of the subcontext that performed the memory accesses
// that triggered the notification. This value, combined with the
// instance_ptr, is needed to obtain the GPU VA space of the process
// that triggered the notification.
NvU32 ve_id;
// VA space for the address that triggered the notification
uvm_va_space_t *va_space;
} virtual_info;
// VA space for the address that triggered the notification
uvm_va_space_t *va_space;
// These fields are only valid if address.is_virtual is false
struct
{
// Processor id where data is resident
//
// Although this information is not tied to a VA space, we can use
// a regular processor id because P2P is not allowed between
// partitioned GPUs.
uvm_processor_id_t resident_id;
} physical_info;
};
// This is the GPU that triggered the notification. Note that physical
// address based notifications are only supported on non-MIG-capable GPUs.
// This is the GPU that triggered the notification.
uvm_gpu_t *gpu;
// Number of times the tracked region was accessed since the last time it
// was cleared. Counter values saturate at the maximum value supported by
// the GPU (2^16 - 1 in Volta)
// the GPU (2^16 - 1 on Turing)
NvU32 counter_value;
// When the granularity of the tracked regions is greater than 64KB, the

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -1602,7 +1602,7 @@ static NV_STATUS hmm_va_block_cpu_page_populate(uvm_va_block_t *va_block,
return status;
}
status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk, page_index);
status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk);
if (status != NV_OK) {
uvm_cpu_chunk_remove_from_block(va_block, page_to_nid(page), page_index);
uvm_cpu_chunk_free(chunk);

View File

@@ -50,12 +50,10 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_hopper_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
(sizeof(dummy->fault_source.utlb_id) * 8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Hopper covers 64 PB and that's the minimum
@@ -99,8 +97,6 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2020 NVIDIA Corporation
Copyright (c) 2016-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -111,13 +111,13 @@ void uvm_kvmalloc_exit(void)
return;
if (atomic_long_read(&g_uvm_leak_checker.bytes_allocated) > 0) {
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "Memory leak of %lu bytes detected.%s\n",
atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
UVM_INFO_PRINT("Memory leak of %lu bytes detected.%s\n",
atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
" insmod with uvm_leak_checker=2 for detailed information." :
"");
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
if (g_uvm_global.unload_state.ptr)
*g_uvm_global.unload_state.ptr |= UVM_TEST_UNLOAD_STATE_MEMORY_LEAK;
@@ -129,12 +129,12 @@ void uvm_kvmalloc_exit(void)
uvm_rb_tree_for_each_safe(node, next, &g_uvm_leak_checker.allocation_info) {
uvm_kvmalloc_info_t *info = container_of(node, uvm_kvmalloc_info_t, node);
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX " Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
uvm_kvsize((void *)((uintptr_t)info->node.key)),
kbasename(info->file),
info->line,
info->function,
info->node.key);
UVM_INFO_PRINT(" Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
uvm_kvsize((void *)((uintptr_t)info->node.key)),
kbasename(info->file),
info->line,
info->function,
info->node.key);
// Free so we don't keep eating up memory while debugging. Note that
// this also removes the entry from the table, frees info, and drops

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -27,12 +27,13 @@
const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
{
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 36);
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 37);
switch (lock_order) {
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL_PM);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ACCESS_COUNTERS);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ISR);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_MMAP_LOCK);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACES_LIST);

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -69,6 +69,17 @@
//
// This should be taken whenever global GPU state might need to be modified.
//
// - Access counters VA space enablement state lock
// Order: UVM_LOCK_ORDER_ACCESS_COUNTERS
// Exclusive lock (mutex)
//
// This protects VA space state associated with access counters enablement.
// Blackwell+ GPUs may have multiple access counters notification buffers
// and their "atomic" enablement is protected by this lock.
//
// This should be taken whenever VA space access counters state might need
// to be modified.
//
// - GPU ISR lock
// Order: UVM_LOCK_ORDER_ISR
// Exclusive lock (mutex) per gpu
@@ -487,6 +498,7 @@ typedef enum
UVM_LOCK_ORDER_INVALID = 0,
UVM_LOCK_ORDER_GLOBAL_PM,
UVM_LOCK_ORDER_GLOBAL,
UVM_LOCK_ORDER_ACCESS_COUNTERS,
UVM_LOCK_ORDER_ISR,
UVM_LOCK_ORDER_MMAP_LOCK,
UVM_LOCK_ORDER_VA_SPACES_LIST,
@@ -742,7 +754,8 @@ bool __uvm_locking_initialized(void);
ret; \
})
// Helper for calling a UVM-RM interface function that returns void with lock recording
// Helper for calling a UVM-RM interface function that returns void with lock
// recording
#define uvm_rm_locked_call_void(call) ({ \
uvm_record_lock_rm_all(); \
call; \

View File

@@ -63,8 +63,6 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->access_counters_supported = false;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = false;
parent_gpu->scoped_atomics_supported = false;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Copyright (c) 2021-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -24,25 +24,29 @@
#include "uvm_gpu.h"
#include "uvm_hal.h"
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters)
{
UVM_ASSERT_MSG(false,
"enable_access_counter_notifications is not supported on GPU: %s.\n",
uvm_parent_gpu_name(parent_gpu));
"enable_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
uvm_parent_gpu_name(access_counters->parent_gpu),
access_counters->index);
}
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters)
{
UVM_ASSERT_MSG(false,
"disable_access_counter_notifications is not supported on GPU: %s.\n",
uvm_parent_gpu_name(parent_gpu));
"disable_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
uvm_parent_gpu_name(access_counters->parent_gpu),
access_counters->index);
}
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get)
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 get)
{
UVM_ASSERT_MSG(false,
"clear_access_counter_notifications is not supported on GPU: %s.\n",
uvm_parent_gpu_name(parent_gpu));
"clear_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
uvm_parent_gpu_name(access_counters->parent_gpu),
access_counters->index);
}
NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu)
@@ -53,26 +57,31 @@ NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gp
return 0;
}
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 index)
{
UVM_ASSERT_MSG(false,
"access_counter_buffer_entry_is_valid is not supported on GPU: %s.\n",
uvm_parent_gpu_name(parent_gpu));
"access_counter_buffer_entry_is_valid is not supported on GPU: %s notif buf index: %u.\n",
uvm_parent_gpu_name(access_counters->parent_gpu),
access_counters->index);
return false;
}
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 index)
{
UVM_ASSERT_MSG(false,
"access_counter_buffer_entry_clear_valid is not supported on GPU: %s.\n",
uvm_parent_gpu_name(parent_gpu));
"access_counter_buffer_entry_clear_valid is not supported on GPU: %s notif buf index: %u.\n",
uvm_parent_gpu_name(access_counters->parent_gpu),
access_counters->index);
}
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry)
{
UVM_ASSERT_MSG(false,
"access_counter_buffer_parse_entry is not supported on GPU: %s.\n",
uvm_parent_gpu_name(parent_gpu));
"access_counter_buffer_parse_entry is not supported on GPU: %s notif buf index: %u.\n",
uvm_parent_gpu_name(access_counters->parent_gpu),
access_counters->index);
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021-2022 NVIDIA Corporation
Copyright (c) 2021-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -330,11 +330,6 @@ void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push)
UVM_ASSERT_MSG(false, "host access_counter_clear_all called on Maxwell GPU\n");
}
void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type)
{
UVM_ASSERT_MSG(false, "host access_counter_clear_type called on Maxwell GPU\n");
}
void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry)
{

View File

@@ -582,7 +582,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
managed_range_last = managed_range;
// For UVM-Lite GPUs, the CUDA driver may suballocate a single
// managed_range into many range groups. For this reason, we iterate
// managed_range into many range groups. For this reason, we iterate
// over each managed_range first then through the range groups within.
uvm_range_group_for_each_migratability_in(&iter,
va_space,
@@ -865,9 +865,9 @@ NV_STATUS uvm_migrate_init(void)
else {
g_uvm_perf_migrate_cpu_preunmap_size = UVM_VA_BLOCK_SIZE << UVM_PERF_MIGRATE_CPU_PREUNMAP_BLOCK_ORDER_DEFAULT;
pr_info("Invalid value %u for uvm_perf_migrate_cpu_preunmap_block_order. Using %u instead\n",
uvm_perf_migrate_cpu_preunmap_block_order,
UVM_PERF_MIGRATE_CPU_PREUNMAP_BLOCK_ORDER_DEFAULT);
UVM_INFO_PRINT("Invalid value %u for uvm_perf_migrate_cpu_preunmap_block_order. Using %u instead\n",
uvm_perf_migrate_cpu_preunmap_block_order,
UVM_PERF_MIGRATE_CPU_PREUNMAP_BLOCK_ORDER_DEFAULT);
}
}
@@ -909,14 +909,13 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
if ((params->flags & UVM_MIGRATE_FLAGS_TEST_ALL) && !uvm_enable_builtin_tests) {
UVM_INFO_PRINT("Test flag set for UVM_MIGRATE. Did you mean to insmod with uvm_enable_builtin_tests=1?\n");
UVM_INFO_PRINT("TEMP\n");
return NV_ERR_INVALID_ARGUMENT;
}
gpus_to_check_for_nvlink_errors = uvm_processor_mask_cache_alloc();
if (!gpus_to_check_for_nvlink_errors)
return NV_ERR_NO_MEMORY;
uvm_processor_mask_zero(gpus_to_check_for_nvlink_errors);
// mmap_lock will be needed if we have to create CPU mappings

View File

@@ -90,9 +90,9 @@ NV_STATUS uvm_mmu_init(void)
page_table_aperture = UVM_APERTURE_SYS;
}
else {
pr_info("Invalid uvm_page_table_location %s. Using %s instead.\n",
uvm_page_table_location,
uvm_aperture_string(page_table_aperture));
UVM_INFO_PRINT("Invalid uvm_page_table_location %s. Using %s instead.\n",
uvm_page_table_location,
uvm_aperture_string(page_table_aperture));
}
return NV_OK;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -40,10 +40,10 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_pascal_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Pascal covers 128 TB and that's the minimum
@@ -92,8 +92,6 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->access_counters_supported = false;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = false;
parent_gpu->scoped_atomics_supported = false;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2023 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -44,8 +44,8 @@ void uvm_hal_pascal_enable_replayable_faults(uvm_parent_gpu_t *parent_gpu)
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntrEnSet;
mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntrEnSet;
mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
}
@@ -55,33 +55,33 @@ void uvm_hal_pascal_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu)
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntrEnClear;
mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntrEnClear;
mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
}
NvU32 uvm_hal_pascal_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu)
{
NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferPut);
UVM_ASSERT(put < parent_gpu->fault_buffer_info.replayable.max_faults);
NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferPut);
UVM_ASSERT(put < parent_gpu->fault_buffer.replayable.max_faults);
return put;
}
NvU32 uvm_hal_pascal_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu)
{
NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet);
UVM_ASSERT(get < parent_gpu->fault_buffer_info.replayable.max_faults);
NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet);
UVM_ASSERT(get < parent_gpu->fault_buffer.replayable.max_faults);
return get;
}
void uvm_hal_pascal_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index)
{
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet, index);
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet, index);
}
static uvm_fault_access_type_t get_fault_access_type(const NvU32 *fault_entry)
@@ -189,9 +189,9 @@ static NvU32 *get_fault_buffer_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index)
fault_buffer_entry_b069_t *buffer_start;
NvU32 *fault_entry;
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
buffer_start = (fault_buffer_entry_b069_t *)parent_gpu->fault_buffer_info.rm_info.replayable.bufferAddress;
buffer_start = (fault_buffer_entry_b069_t *)parent_gpu->fault_buffer.rm_info.replayable.bufferAddress;
fault_entry = (NvU32 *)&buffer_start[index];
return fault_entry;
@@ -205,10 +205,10 @@ static UvmFaultMetadataPacket *get_fault_buffer_entry_metadata(uvm_parent_gpu_t
{
UvmFaultMetadataPacket *fault_entry_metadata;
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
fault_entry_metadata = parent_gpu->fault_buffer_info.rm_info.replayable.bufferMetadata;
fault_entry_metadata = parent_gpu->fault_buffer.rm_info.replayable.bufferMetadata;
UVM_ASSERT(fault_entry_metadata != NULL);
return fault_entry_metadata + index;
@@ -267,7 +267,7 @@ NV_STATUS uvm_hal_pascal_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *p
// Compute global uTLB id
utlb_id = buffer_entry->fault_source.gpc_id * parent_gpu->utlb_per_gpc_count + gpc_utlb_id;
UVM_ASSERT(utlb_id < parent_gpu->fault_buffer_info.replayable.utlb_count);
UVM_ASSERT(utlb_id < parent_gpu->fault_buffer.replayable.utlb_count);
buffer_entry->fault_source.utlb_id = utlb_id;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -21,7 +21,6 @@
*******************************************************************************/
// For Pascal, UVM page tree 'depth' maps to hardware as follows:
//
// UVM depth HW level VA bits
@@ -377,7 +376,7 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU64 big_page_size)
static void mmu_set_prefetch_faults(uvm_parent_gpu_t *parent_gpu, bool enable)
{
volatile NvU32 *prefetch_ctrl = parent_gpu->fault_buffer_info.rm_info.replayable.pPrefetchCtrl;
volatile NvU32 *prefetch_ctrl = parent_gpu->fault_buffer.rm_info.replayable.pPrefetchCtrl;
// A null prefetch control mapping indicates that UVM should toggle the
// register's value using the RM API, instead of performing a direct access.
@@ -388,7 +387,7 @@ static void mmu_set_prefetch_faults(uvm_parent_gpu_t *parent_gpu, bool enable)
// Computing.
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
status = nvUvmInterfaceTogglePrefetchFaults(&parent_gpu->fault_buffer_info.rm_info, (NvBool)enable);
status = nvUvmInterfaceTogglePrefetchFaults(&parent_gpu->fault_buffer.rm_info, (NvBool)enable);
UVM_ASSERT(status == NV_OK);
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2023 NVIDIA Corporation
Copyright (c) 2016-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -512,8 +512,9 @@ NV_STATUS uvm_perf_prefetch_init(void)
g_uvm_perf_prefetch_threshold = uvm_perf_prefetch_threshold;
}
else {
pr_info("Invalid value %u for uvm_perf_prefetch_threshold. Using %u instead\n",
uvm_perf_prefetch_threshold, UVM_PREFETCH_THRESHOLD_DEFAULT);
UVM_INFO_PRINT("Invalid value %u for uvm_perf_prefetch_threshold. Using %u instead\n",
uvm_perf_prefetch_threshold,
UVM_PREFETCH_THRESHOLD_DEFAULT);
g_uvm_perf_prefetch_threshold = UVM_PREFETCH_THRESHOLD_DEFAULT;
}
@@ -523,8 +524,9 @@ NV_STATUS uvm_perf_prefetch_init(void)
g_uvm_perf_prefetch_min_faults = uvm_perf_prefetch_min_faults;
}
else {
pr_info("Invalid value %u for uvm_perf_prefetch_min_faults. Using %u instead\n",
uvm_perf_prefetch_min_faults, UVM_PREFETCH_MIN_FAULTS_DEFAULT);
UVM_INFO_PRINT("Invalid value %u for uvm_perf_prefetch_min_faults. Using %u instead\n",
uvm_perf_prefetch_min_faults,
UVM_PREFETCH_MIN_FAULTS_DEFAULT);
g_uvm_perf_prefetch_min_faults = UVM_PREFETCH_MIN_FAULTS_DEFAULT;
}

View File

@@ -338,28 +338,28 @@ static unsigned g_uvm_perf_thrashing_max_resets;
// parameter _d. The user value is read from _v, and the final value is stored
// in a variable named g_##_v, so it must be declared, too. Only unsigned
// parameters are supported.
#define INIT_THRASHING_PARAMETER_MIN_MAX(_v, _d, _mi, _ma) \
do { \
unsigned v = (_v); \
unsigned d = (_d); \
unsigned mi = (_mi); \
unsigned ma = (_ma); \
\
BUILD_BUG_ON(sizeof(_v) > sizeof(unsigned)); \
BUILD_BUG_ON(THRASHING_PARAMETER_IS_SIGNED(_v)); \
\
UVM_ASSERT(mi <= ma); \
UVM_ASSERT(d >= mi); \
UVM_ASSERT(d <= ma); \
\
if (v >= mi && v <= ma) { \
g_##_v = v; \
} \
else { \
pr_info("Invalid value %u for " #_v ". Using %u instead\n", v, d); \
\
g_##_v = d; \
} \
#define INIT_THRASHING_PARAMETER_MIN_MAX(_v, _d, _mi, _ma) \
do { \
unsigned v = (_v); \
unsigned d = (_d); \
unsigned mi = (_mi); \
unsigned ma = (_ma); \
\
BUILD_BUG_ON(sizeof(_v) > sizeof(unsigned)); \
BUILD_BUG_ON(THRASHING_PARAMETER_IS_SIGNED(_v)); \
\
UVM_ASSERT(mi <= ma); \
UVM_ASSERT(d >= mi); \
UVM_ASSERT(d <= ma); \
\
if (v >= mi && v <= ma) { \
g_##_v = v; \
} \
else { \
UVM_INFO_PRINT("Invalid value %u for " #_v ". Using %u instead\n", v, d); \
\
g_##_v = d; \
} \
} while (0)
#define INIT_THRASHING_PARAMETER(v, d) INIT_THRASHING_PARAMETER_MIN_MAX(v, d, 0u, UINT_MAX)

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -31,21 +31,14 @@ static int uvm_cpu_chunk_allocation_sizes = UVM_CPU_CHUNK_SIZES;
module_param(uvm_cpu_chunk_allocation_sizes, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(uvm_cpu_chunk_allocation_sizes, "OR'ed value of all CPU chunk allocation sizes.");
static struct kmem_cache *g_reverse_page_map_cache __read_mostly;
NV_STATUS uvm_pmm_sysmem_init(void)
{
g_reverse_page_map_cache = NV_KMEM_CACHE_CREATE("uvm_pmm_sysmem_page_reverse_map_t",
uvm_reverse_map_t);
if (!g_reverse_page_map_cache)
return NV_ERR_NO_MEMORY;
// Ensure that only supported CPU chunk sizes are enabled.
uvm_cpu_chunk_allocation_sizes &= UVM_CPU_CHUNK_SIZES;
if (!uvm_cpu_chunk_allocation_sizes || !(uvm_cpu_chunk_allocation_sizes & PAGE_SIZE)) {
pr_info("Invalid value for uvm_cpu_chunk_allocation_sizes = 0x%x, using 0x%llx instead\n",
uvm_cpu_chunk_allocation_sizes,
UVM_CPU_CHUNK_SIZES);
UVM_INFO_PRINT("Invalid value for uvm_cpu_chunk_allocation_sizes = 0x%x, using 0x%llx instead\n",
uvm_cpu_chunk_allocation_sizes,
UVM_CPU_CHUNK_SIZES);
uvm_cpu_chunk_allocation_sizes = UVM_CPU_CHUNK_SIZES;
}
@@ -54,387 +47,11 @@ NV_STATUS uvm_pmm_sysmem_init(void)
void uvm_pmm_sysmem_exit(void)
{
kmem_cache_destroy_safe(&g_reverse_page_map_cache);
}
NV_STATUS uvm_pmm_sysmem_mappings_init(uvm_gpu_t *gpu, uvm_pmm_sysmem_mappings_t *sysmem_mappings)
{
memset(sysmem_mappings, 0, sizeof(*sysmem_mappings));
sysmem_mappings->gpu = gpu;
uvm_mutex_init(&sysmem_mappings->reverse_map_lock, UVM_LOCK_ORDER_LEAF);
uvm_init_radix_tree_preloadable(&sysmem_mappings->reverse_map_tree);
return NV_OK;
}
void uvm_pmm_sysmem_mappings_deinit(uvm_pmm_sysmem_mappings_t *sysmem_mappings)
{
if (sysmem_mappings->gpu) {
UVM_ASSERT_MSG(radix_tree_empty(&sysmem_mappings->reverse_map_tree),
"radix_tree not empty for GPU %s\n",
uvm_gpu_name(sysmem_mappings->gpu));
}
sysmem_mappings->gpu = NULL;
}
// TODO: Bug 1995015: use a more efficient data structure for
// physically-contiguous allocations.
NV_STATUS uvm_pmm_sysmem_mappings_add_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 virt_addr,
NvU64 region_size,
uvm_va_block_t *va_block,
uvm_processor_id_t owner)
{
NV_STATUS status = NV_OK;
uvm_reverse_map_t *new_reverse_map;
NvU64 key;
const NvU64 base_key = dma_addr / PAGE_SIZE;
const NvU32 num_pages = region_size / PAGE_SIZE;
uvm_page_index_t page_index;
UVM_ASSERT(va_block);
UVM_ASSERT(!uvm_va_block_is_dead(va_block));
UVM_ASSERT(IS_ALIGNED(dma_addr, region_size));
UVM_ASSERT(IS_ALIGNED(virt_addr, region_size));
UVM_ASSERT(region_size <= UVM_VA_BLOCK_SIZE);
UVM_ASSERT(is_power_of_2(region_size));
UVM_ASSERT(uvm_va_block_contains_address(va_block, virt_addr));
UVM_ASSERT(uvm_va_block_contains_address(va_block, virt_addr + region_size - 1));
uvm_assert_mutex_locked(&va_block->lock);
if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
return NV_OK;
new_reverse_map = nv_kmem_cache_zalloc(g_reverse_page_map_cache, NV_UVM_GFP_FLAGS);
if (!new_reverse_map)
return NV_ERR_NO_MEMORY;
page_index = uvm_va_block_cpu_page_index(va_block, virt_addr);
new_reverse_map->va_block = va_block;
new_reverse_map->region = uvm_va_block_region(page_index, page_index + num_pages);
new_reverse_map->owner = owner;
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
for (key = base_key; key < base_key + num_pages; ++key) {
int ret = radix_tree_insert(&sysmem_mappings->reverse_map_tree, key, new_reverse_map);
if (ret != 0) {
NvU64 remove_key;
for (remove_key = base_key; remove_key < key; ++remove_key)
(void)radix_tree_delete(&sysmem_mappings->reverse_map_tree, remove_key);
kmem_cache_free(g_reverse_page_map_cache, new_reverse_map);
status = errno_to_nv_status(ret);
break;
}
}
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
// The assert is added for Coverity's sake. It is equivalent to adding
// assert(num_pages > 0) before the loop. However, Coverity is not able to
// deduce that the loop has to execute at least once from num_pages > 0.
UVM_ASSERT(key != base_key || status != NV_OK);
return status;
}
static void pmm_sysmem_mappings_remove_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
bool check_mapping)
{
uvm_reverse_map_t *reverse_map;
NvU64 key;
const NvU64 base_key = dma_addr / PAGE_SIZE;
if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
return;
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
reverse_map = radix_tree_delete(&sysmem_mappings->reverse_map_tree, base_key);
if (check_mapping)
UVM_ASSERT(reverse_map);
if (!reverse_map) {
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
return;
}
uvm_assert_mutex_locked(&reverse_map->va_block->lock);
for (key = base_key + 1; key < base_key + uvm_va_block_region_num_pages(reverse_map->region); ++key) {
uvm_reverse_map_t *curr_reverse_map = radix_tree_delete(&sysmem_mappings->reverse_map_tree, key);
UVM_ASSERT(curr_reverse_map == reverse_map);
}
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
kmem_cache_free(g_reverse_page_map_cache, reverse_map);
}
void uvm_pmm_sysmem_mappings_remove_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr)
{
pmm_sysmem_mappings_remove_gpu_mapping(sysmem_mappings, dma_addr, true);
}
void uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr)
{
pmm_sysmem_mappings_remove_gpu_mapping(sysmem_mappings, dma_addr, false);
}
void uvm_pmm_sysmem_mappings_reparent_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
uvm_va_block_t *va_block)
{
NvU64 virt_addr;
uvm_reverse_map_t *reverse_map;
const NvU64 base_key = dma_addr / PAGE_SIZE;
uvm_page_index_t new_start_page;
UVM_ASSERT(PAGE_ALIGNED(dma_addr));
UVM_ASSERT(va_block);
UVM_ASSERT(!uvm_va_block_is_dead(va_block));
if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
return;
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, base_key);
UVM_ASSERT(reverse_map);
// Compute virt address by hand since the old VA block may be messed up
// during split
virt_addr = reverse_map->va_block->start + reverse_map->region.first * PAGE_SIZE;
new_start_page = uvm_va_block_cpu_page_index(va_block, virt_addr);
reverse_map->region = uvm_va_block_region(new_start_page,
new_start_page + uvm_va_block_region_num_pages(reverse_map->region));
reverse_map->va_block = va_block;
UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_start(reverse_map)));
UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_end(reverse_map)));
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
}
NV_STATUS uvm_pmm_sysmem_mappings_split_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 new_region_size)
{
uvm_reverse_map_t *orig_reverse_map;
const NvU64 base_key = dma_addr / PAGE_SIZE;
const size_t num_pages = new_region_size / PAGE_SIZE;
size_t old_num_pages;
size_t subregion, num_subregions;
uvm_reverse_map_t **new_reverse_maps;
UVM_ASSERT(IS_ALIGNED(dma_addr, new_region_size));
UVM_ASSERT(new_region_size <= UVM_VA_BLOCK_SIZE);
UVM_ASSERT(is_power_of_2(new_region_size));
if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
return NV_OK;
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
orig_reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, base_key);
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
// We can access orig_reverse_map outside the tree lock because we hold the
// VA block lock so we cannot have concurrent modifications in the tree for
// the mappings of the chunks that belong to that VA block.
UVM_ASSERT(orig_reverse_map);
UVM_ASSERT(orig_reverse_map->va_block);
uvm_assert_mutex_locked(&orig_reverse_map->va_block->lock);
old_num_pages = uvm_va_block_region_num_pages(orig_reverse_map->region);
UVM_ASSERT(num_pages < old_num_pages);
num_subregions = old_num_pages / num_pages;
new_reverse_maps = uvm_kvmalloc_zero(sizeof(*new_reverse_maps) * (num_subregions - 1));
if (!new_reverse_maps)
return NV_ERR_NO_MEMORY;
// Allocate the descriptors for the new subregions
for (subregion = 1; subregion < num_subregions; ++subregion) {
uvm_reverse_map_t *new_reverse_map = nv_kmem_cache_zalloc(g_reverse_page_map_cache, NV_UVM_GFP_FLAGS);
uvm_page_index_t page_index = orig_reverse_map->region.first + num_pages * subregion;
if (new_reverse_map == NULL) {
// On error, free the previously-created descriptors
while (--subregion != 0)
kmem_cache_free(g_reverse_page_map_cache, new_reverse_maps[subregion - 1]);
uvm_kvfree(new_reverse_maps);
return NV_ERR_NO_MEMORY;
}
new_reverse_map->va_block = orig_reverse_map->va_block;
new_reverse_map->region = uvm_va_block_region(page_index, page_index + num_pages);
new_reverse_map->owner = orig_reverse_map->owner;
new_reverse_maps[subregion - 1] = new_reverse_map;
}
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
for (subregion = 1; subregion < num_subregions; ++subregion) {
NvU64 key;
for (key = base_key + num_pages * subregion; key < base_key + num_pages * (subregion + 1); ++key) {
void **slot = radix_tree_lookup_slot(&sysmem_mappings->reverse_map_tree, key);
UVM_ASSERT(slot);
UVM_ASSERT(radix_tree_deref_slot(slot) == orig_reverse_map);
NV_RADIX_TREE_REPLACE_SLOT(&sysmem_mappings->reverse_map_tree, slot, new_reverse_maps[subregion - 1]);
}
}
orig_reverse_map->region = uvm_va_block_region(orig_reverse_map->region.first,
orig_reverse_map->region.first + num_pages);
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
uvm_kvfree(new_reverse_maps);
return NV_OK;
}
void uvm_pmm_sysmem_mappings_merge_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 new_region_size)
{
uvm_reverse_map_t *first_reverse_map;
uvm_page_index_t running_page_index;
NvU64 key;
const NvU64 base_key = dma_addr / PAGE_SIZE;
const size_t num_pages = new_region_size / PAGE_SIZE;
size_t num_mapping_pages;
UVM_ASSERT(IS_ALIGNED(dma_addr, new_region_size));
UVM_ASSERT(new_region_size <= UVM_VA_BLOCK_SIZE);
UVM_ASSERT(is_power_of_2(new_region_size));
if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
return;
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
// Find the first mapping in the region
first_reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, base_key);
UVM_ASSERT(first_reverse_map);
num_mapping_pages = uvm_va_block_region_num_pages(first_reverse_map->region);
UVM_ASSERT(num_pages >= num_mapping_pages);
UVM_ASSERT(IS_ALIGNED(base_key, num_mapping_pages));
// The region in the tree matches the size of the merged region, just return
if (num_pages == num_mapping_pages)
goto unlock_no_update;
// Otherwise update the rest of slots to point at the same reverse map
// descriptor
key = base_key + uvm_va_block_region_num_pages(first_reverse_map->region);
running_page_index = first_reverse_map->region.outer;
while (key < base_key + num_pages) {
uvm_reverse_map_t *reverse_map = NULL;
void **slot = radix_tree_lookup_slot(&sysmem_mappings->reverse_map_tree, key);
size_t slot_index;
UVM_ASSERT(slot);
reverse_map = radix_tree_deref_slot(slot);
UVM_ASSERT(reverse_map);
UVM_ASSERT(reverse_map != first_reverse_map);
UVM_ASSERT(reverse_map->va_block == first_reverse_map->va_block);
UVM_ASSERT(uvm_id_equal(reverse_map->owner, first_reverse_map->owner));
UVM_ASSERT(reverse_map->region.first == running_page_index);
NV_RADIX_TREE_REPLACE_SLOT(&sysmem_mappings->reverse_map_tree, slot, first_reverse_map);
num_mapping_pages = uvm_va_block_region_num_pages(reverse_map->region);
UVM_ASSERT(IS_ALIGNED(key, num_mapping_pages));
UVM_ASSERT(key + num_mapping_pages <= base_key + num_pages);
for (slot_index = 1; slot_index < num_mapping_pages; ++slot_index) {
slot = radix_tree_lookup_slot(&sysmem_mappings->reverse_map_tree, key + slot_index);
UVM_ASSERT(slot);
UVM_ASSERT(reverse_map == radix_tree_deref_slot(slot));
NV_RADIX_TREE_REPLACE_SLOT(&sysmem_mappings->reverse_map_tree, slot, first_reverse_map);
}
key += num_mapping_pages;
running_page_index = reverse_map->region.outer;
kmem_cache_free(g_reverse_page_map_cache, reverse_map);
}
// Grow the first mapping to cover the whole region
first_reverse_map->region.outer = first_reverse_map->region.first + num_pages;
unlock_no_update:
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
}
size_t uvm_pmm_sysmem_mappings_dma_to_virt(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 region_size,
uvm_reverse_map_t *out_mappings,
size_t max_out_mappings)
{
NvU64 key;
size_t num_mappings = 0;
const NvU64 base_key = dma_addr / PAGE_SIZE;
NvU32 num_pages = region_size / PAGE_SIZE;
UVM_ASSERT(region_size >= PAGE_SIZE);
UVM_ASSERT(PAGE_ALIGNED(region_size));
UVM_ASSERT(sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses);
UVM_ASSERT(max_out_mappings > 0);
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
key = base_key;
do {
uvm_reverse_map_t *reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, key);
if (reverse_map) {
size_t num_chunk_pages = uvm_va_block_region_num_pages(reverse_map->region);
NvU32 page_offset = key & (num_chunk_pages - 1);
NvU32 num_mapping_pages = min(num_pages, (NvU32)num_chunk_pages - page_offset);
// Sysmem mappings are removed during VA block destruction.
// Therefore, we can safely retain the VA blocks as long as they
// are in the reverse map and we hold the reverse map lock.
uvm_va_block_retain(reverse_map->va_block);
out_mappings[num_mappings] = *reverse_map;
out_mappings[num_mappings].region.first += page_offset;
out_mappings[num_mappings].region.outer = out_mappings[num_mappings].region.first + num_mapping_pages;
if (++num_mappings == max_out_mappings)
break;
num_pages -= num_mapping_pages;
key += num_mapping_pages;
}
else {
--num_pages;
++key;
}
}
while (num_pages > 0);
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
return num_mappings;
}
uvm_chunk_sizes_mask_t uvm_cpu_chunk_get_allocation_sizes(void)
{
return uvm_cpu_chunk_allocation_sizes & UVM_CPU_CHUNK_SIZES;
return uvm_cpu_chunk_allocation_sizes & UVM_CPU_CHUNK_SIZES;
}
static void uvm_cpu_chunk_set_size(uvm_cpu_chunk_t *chunk, uvm_chunk_size_t size)

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -30,96 +30,12 @@
#include "uvm_lock.h"
#include "uvm_pmm_gpu.h"
// Module to handle per-GPU user mappings to sysmem physical memory. Notably,
// this implements a reverse map of the DMA address to {va_block, virt_addr}.
// This is required by the GPU access counters feature since they may provide a
// physical address in the notification packet (GPA notifications). We use the
// table to obtain the VAs of the memory regions being accessed remotely. The
// reverse map is implemented by a radix tree, which is indexed using the
// DMA address. For now, only PAGE_SIZE translations are supported (i.e. no
// big/huge pages).
//
// TODO: Bug 1995015: add support for physically-contiguous mappings.
struct uvm_pmm_sysmem_mappings_struct
{
uvm_gpu_t *gpu;
struct radix_tree_root reverse_map_tree;
uvm_mutex_t reverse_map_lock;
};
// Global initialization/exit functions, that need to be called during driver
// initialization/tear-down. These are needed to allocate/free global internal
// data structures.
NV_STATUS uvm_pmm_sysmem_init(void);
void uvm_pmm_sysmem_exit(void);
// Initialize per-GPU sysmem mapping tracking
NV_STATUS uvm_pmm_sysmem_mappings_init(uvm_gpu_t *gpu, uvm_pmm_sysmem_mappings_t *sysmem_mappings);
// Destroy per-GPU sysmem mapping tracking. The caller must ensure that all the
// mappings have been removed before calling this function.
void uvm_pmm_sysmem_mappings_deinit(uvm_pmm_sysmem_mappings_t *sysmem_mappings);
// If the GPU used to initialize sysmem_mappings supports access counters, the
// dma_addr -> {va_block, virt_addr} mapping is inserted in the reverse map.
NV_STATUS uvm_pmm_sysmem_mappings_add_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 virt_addr,
NvU64 region_size,
uvm_va_block_t *va_block,
uvm_processor_id_t owner);
// If the GPU used to initialize sysmem_mappings supports access counters, the
// entries for the physical region starting at dma_addr are removed from the
// reverse map.
void uvm_pmm_sysmem_mappings_remove_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr);
// Like uvm_pmm_sysmem_mappings_remove_gpu_mapping but it doesn't assert if the
// mapping doesn't exist. See uvm_va_block_evict_chunks for more information.
void uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr);
// If the GPU used to initialize sysmem_mappings supports access counters, the
// mapping for the region starting at dma_addr is updated with va_block.
// This is required on VA block split.
void uvm_pmm_sysmem_mappings_reparent_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
uvm_va_block_t *va_block);
// If the GPU used to initialize sysmem_mappings supports access counters, the
// mapping for the region starting at dma_addr is split into regions of
// new_region_size. new_region_size must be a power of two and smaller than the
// previously-registered size.
NV_STATUS uvm_pmm_sysmem_mappings_split_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 new_region_size);
// If the GPU used to initialize sysmem_mappings supports access counters, all
// the mappings within the region [dma_addr, dma_addr + new_region_size) are
// merged into a single mapping. new_region_size must be a power of two. The
// whole region must be previously populated with mappings and all of them must
// have the same VA block and processor owner.
void uvm_pmm_sysmem_mappings_merge_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 new_region_size);
// Obtain the {va_block, virt_addr} information for the mappings in the given
// [dma_addr:dma_addr + region_size) range. dma_addr and region_size must be
// page-aligned.
//
// Valid translations are written to out_mappings sequentially (there are no
// gaps). max_out_mappings are written, at most. The caller is required to
// provide enough entries in out_mappings.
//
// The VA Block in each returned translation entry is retained, and it's up to
// the caller to release them
size_t uvm_pmm_sysmem_mappings_dma_to_virt(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 region_size,
uvm_reverse_map_t *out_mappings,
size_t max_out_mappings);
#define UVM_CPU_CHUNK_SIZES (UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | PAGE_SIZE)
typedef enum
@@ -425,9 +341,9 @@ void uvm_cpu_chunk_mark_clean(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_inde
bool uvm_cpu_chunk_is_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
static NV_STATUS uvm_test_get_cpu_chunk_allocation_sizes(UVM_TEST_GET_CPU_CHUNK_ALLOC_SIZES_PARAMS *params,
struct file *filp)
struct file *filp)
{
params->alloc_size_mask = (NvU32)uvm_cpu_chunk_get_allocation_sizes();
return NV_OK;
params->alloc_size_mask = (NvU32)uvm_cpu_chunk_get_allocation_sizes();
return NV_OK;
}
#endif

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -35,544 +35,6 @@
#include "uvm_push.h"
#include "uvm_processors.h"
// Pre-allocated array used for dma-to-virt translations
static uvm_reverse_map_t g_sysmem_translations[PAGES_PER_UVM_VA_BLOCK];
// We use our own separate reverse map to easily specify contiguous DMA
// address ranges
static uvm_pmm_sysmem_mappings_t g_reverse_map;
// Check that the DMA addresses in the range defined by
// [base_dma_addr:base_dma_addr + uvm_va_block_size(va_block)] and page_mask
// are registered in the reverse map, using one call per entry. The returned
// virtual addresses must belong to va_block. The function assumes a 1:1
// dma-to-virt mapping for the whole VA block
static NV_STATUS check_reverse_map_block_page(uvm_va_block_t *va_block,
NvU64 base_dma_addr,
const uvm_page_mask_t *page_mask)
{
uvm_page_index_t page_index;
for_each_va_block_page(page_index, va_block) {
size_t num_pages;
memset(g_sysmem_translations, 0, sizeof(g_sysmem_translations));
num_pages = uvm_pmm_sysmem_mappings_dma_to_virt(&g_reverse_map,
base_dma_addr + page_index * PAGE_SIZE,
PAGE_SIZE,
g_sysmem_translations,
PAGES_PER_UVM_VA_BLOCK);
if (!page_mask || uvm_page_mask_test(page_mask, page_index)) {
TEST_CHECK_RET(num_pages == 1);
TEST_CHECK_RET(g_sysmem_translations[0].va_block == va_block);
TEST_CHECK_RET(nv_kref_read(&va_block->kref) >= 2);
TEST_CHECK_RET(uvm_reverse_map_start(&g_sysmem_translations[0]) == uvm_va_block_cpu_page_address(va_block, page_index));
TEST_CHECK_RET(uvm_va_block_region_num_pages(g_sysmem_translations[0].region) == 1);
TEST_CHECK_RET(UVM_ID_IS_CPU(g_sysmem_translations[0].owner));
uvm_va_block_release(g_sysmem_translations[0].va_block);
}
else {
TEST_CHECK_RET(num_pages == 0);
}
}
return NV_OK;
}
// Check that the DMA addresses in the range defined by
// [base_dma_addr:base_dma_addr + uvm_va_block_size(va_block)] and page_mask
// are registered in the reverse map, using a single translation call. The
// returned virtual addresses must belong to va_block. The function assumes a
// 1:1 dma-to-virt mapping for the whole VA block
static NV_STATUS check_reverse_map_block_batch(uvm_va_block_t *va_block,
NvU64 base_dma_addr,
const uvm_page_mask_t *page_mask)
{
size_t num_translations;
size_t num_pages;
size_t reverse_map_index;
memset(g_sysmem_translations, 0, sizeof(g_sysmem_translations));
num_translations = uvm_pmm_sysmem_mappings_dma_to_virt(&g_reverse_map,
base_dma_addr,
uvm_va_block_size(va_block),
g_sysmem_translations,
PAGES_PER_UVM_VA_BLOCK);
if (num_translations == 0 && page_mask)
TEST_CHECK_RET(uvm_page_mask_empty(page_mask));
num_pages = 0;
for (reverse_map_index = 0; reverse_map_index < num_translations; ++reverse_map_index) {
uvm_reverse_map_t *reverse_map = &g_sysmem_translations[reverse_map_index];
size_t num_reverse_map_pages = uvm_va_block_region_num_pages(reverse_map->region);
num_pages += num_reverse_map_pages;
TEST_CHECK_RET(reverse_map->va_block == va_block);
TEST_CHECK_RET(nv_kref_read(&va_block->kref) >= 2);
uvm_va_block_release(reverse_map->va_block);
TEST_CHECK_RET(UVM_ID_IS_CPU(reverse_map->owner));
}
if (page_mask)
TEST_CHECK_RET(num_pages == uvm_page_mask_weight(page_mask));
else
TEST_CHECK_RET(num_pages == uvm_va_block_num_cpu_pages(va_block));
return NV_OK;
}
// Check that the DMA addresses for all the CPU pages of the two given VA blocks
// are registered in the reverse map, using a single translation call. The
// returned virtual addresses must belong to one of the blocks. The function
// assumes a 1:1 dma-to-virt mapping for each VA block and that va_block1 is
// mapped behind va_block0.
static NV_STATUS check_reverse_map_two_blocks_batch(NvU64 base_dma_addr,
uvm_va_block_t *va_block0,
uvm_va_block_t *va_block1)
{
size_t num_pages;
size_t num_translations;
size_t reverse_map_index;
memset(g_sysmem_translations, 0, sizeof(g_sysmem_translations));
num_translations = uvm_pmm_sysmem_mappings_dma_to_virt(&g_reverse_map,
base_dma_addr,
UVM_VA_BLOCK_SIZE,
g_sysmem_translations,
PAGES_PER_UVM_VA_BLOCK);
TEST_CHECK_RET(num_translations == 2);
num_pages = 0;
for (reverse_map_index = 0; reverse_map_index < num_translations; ++reverse_map_index) {
uvm_va_block_t *block;
uvm_reverse_map_t *reverse_map = &g_sysmem_translations[reverse_map_index];
NvU64 virt_addr = uvm_reverse_map_start(reverse_map);
size_t num_reverse_map_pages = uvm_va_block_region_num_pages(reverse_map->region);
if (reverse_map_index == 0)
block = va_block0;
else
block = va_block1;
TEST_CHECK_RET(reverse_map->va_block == block);
TEST_CHECK_RET(nv_kref_read(&block->kref) >= 2);
uvm_va_block_release(reverse_map->va_block);
TEST_CHECK_RET(num_reverse_map_pages == uvm_va_block_num_cpu_pages(block));
TEST_CHECK_RET(virt_addr == block->start);
TEST_CHECK_RET(UVM_ID_IS_CPU(reverse_map->owner));
num_pages += num_reverse_map_pages;
}
TEST_CHECK_RET(num_pages == uvm_va_block_num_cpu_pages(va_block0) + uvm_va_block_num_cpu_pages(va_block1));
return NV_OK;
}
static const NvU64 g_base_dma_addr = UVM_VA_BLOCK_SIZE;
// This function adds the mappings for all the subregions in va_block defined
// by page_mask. g_base_dma_addr is used as the base DMA address for the whole
// VA block.
static NV_STATUS test_pmm_sysmem_reverse_map_single(uvm_va_block_t *va_block,
uvm_page_mask_t *page_mask,
uvm_chunk_size_t split_size,
bool merge)
{
NV_STATUS status = NV_OK;
uvm_va_block_region_t subregion;
TEST_CHECK_RET(is_power_of_2(split_size));
TEST_CHECK_RET(split_size >= PAGE_SIZE);
for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
TEST_CHECK_RET(is_power_of_2(uvm_va_block_region_size(subregion)));
uvm_mutex_lock(&va_block->lock);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
g_base_dma_addr + subregion.first * PAGE_SIZE,
va_block->start + subregion.first * PAGE_SIZE,
uvm_va_block_region_size(subregion),
va_block,
UVM_ID_CPU);
uvm_mutex_unlock(&va_block->lock);
if (status != NV_OK)
return status;
}
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
if (split_size != UVM_CHUNK_SIZE_MAX) {
for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
TEST_CHECK_RET(uvm_va_block_region_size(subregion) > split_size);
uvm_mutex_lock(&va_block->lock);
status = uvm_pmm_sysmem_mappings_split_gpu_mappings(&g_reverse_map,
g_base_dma_addr + subregion.first * PAGE_SIZE,
split_size);
uvm_mutex_unlock(&va_block->lock);
TEST_CHECK_RET(status == NV_OK);
}
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
}
if (split_size != UVM_CHUNK_SIZE_MAX && merge) {
for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&g_reverse_map,
g_base_dma_addr + subregion.first * PAGE_SIZE,
uvm_va_block_region_size(subregion));
}
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
}
for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
NvU64 subregion_dma_addr = g_base_dma_addr + subregion.first * PAGE_SIZE;
if (split_size == UVM_CHUNK_SIZE_MAX || merge) {
uvm_mutex_lock(&va_block->lock);
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, subregion_dma_addr);
uvm_mutex_unlock(&va_block->lock);
}
else {
size_t chunk;
size_t num_chunks = uvm_va_block_region_size(subregion) / split_size;
TEST_CHECK_RET(num_chunks > 1);
uvm_mutex_lock(&va_block->lock);
for (chunk = 0; chunk < num_chunks; ++chunk)
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, subregion_dma_addr + chunk * split_size);
uvm_mutex_unlock(&va_block->lock);
}
}
uvm_page_mask_zero(page_mask);
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
return status;
}
static uvm_page_mask_t g_page_mask;
static NV_STATUS test_pmm_sysmem_reverse_map_single_whole(uvm_va_space_t *va_space, NvU64 addr)
{
NV_STATUS status;
uvm_va_block_t *va_block;
const bool merge_array[] = {false, true};
const uvm_chunk_size_t chunk_split_array[] = { UVM_CHUNK_SIZE_4K, UVM_CHUNK_SIZE_64K, UVM_CHUNK_SIZE_MAX };
unsigned merge_index;
unsigned chunk_split_index;
status = uvm_va_block_find(va_space, addr, &va_block);
if (status != NV_OK)
return status;
TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block)));
for (merge_index = 0; merge_index < ARRAY_SIZE(merge_array); ++merge_index) {
for (chunk_split_index = 0; chunk_split_index < ARRAY_SIZE(chunk_split_array); ++chunk_split_index) {
// The reverse map has PAGE_SIZE granularity
if (chunk_split_array[chunk_split_index] < PAGE_SIZE)
continue;
uvm_page_mask_region_fill(&g_page_mask, uvm_va_block_region_from_block(va_block));
TEST_CHECK_RET(test_pmm_sysmem_reverse_map_single(va_block,
&g_page_mask,
chunk_split_array[chunk_split_index],
merge_array[merge_index]) == NV_OK);
}
}
return status;
}
static NV_STATUS test_pmm_sysmem_reverse_map_single_pattern(uvm_va_space_t *va_space, NvU64 addr)
{
NV_STATUS status;
uvm_va_block_t *va_block;
uvm_page_index_t page_index;
status = uvm_va_block_find(va_space, addr, &va_block);
if (status != NV_OK)
return status;
uvm_page_mask_zero(&g_page_mask);
for_each_va_block_page(page_index, va_block) {
if (page_index % 2 == 0)
uvm_page_mask_set(&g_page_mask, page_index);
}
return test_pmm_sysmem_reverse_map_single(va_block, &g_page_mask, UVM_CHUNK_SIZE_MAX, false);
}
// This function assumes that addr points at a VA range with 4 sized VA blocks
// with size UVM_VA_BLOCK_SIZE / 4.
static NV_STATUS test_pmm_sysmem_reverse_map_many_blocks(uvm_va_space_t *va_space, NvU64 addr)
{
NV_STATUS status;
uvm_va_block_t *va_block0;
uvm_va_block_t *va_block1;
NvU64 base_dma_addr0;
NvU64 base_dma_addr1;
status = uvm_va_block_find(va_space, addr + UVM_VA_BLOCK_SIZE / 4, &va_block0);
if (status != NV_OK)
return status;
status = uvm_va_block_find(va_space, addr + 3 * UVM_VA_BLOCK_SIZE / 4, &va_block1);
if (status != NV_OK)
return status;
TEST_CHECK_RET(va_block0 != va_block1);
base_dma_addr0 = g_base_dma_addr + uvm_va_block_size(va_block0);
base_dma_addr1 = base_dma_addr0 + uvm_va_block_size(va_block0);
TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block0)));
TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block1)));
uvm_mutex_lock(&va_block0->lock);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
base_dma_addr0,
va_block0->start,
uvm_va_block_size(va_block0),
va_block0,
UVM_ID_CPU);
uvm_mutex_unlock(&va_block0->lock);
TEST_CHECK_RET(status == NV_OK);
uvm_mutex_lock(&va_block1->lock);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
base_dma_addr1,
va_block1->start,
uvm_va_block_size(va_block1),
va_block1,
UVM_ID_CPU);
uvm_mutex_unlock(&va_block1->lock);
// Check each VA block individually
if (status == NV_OK) {
TEST_CHECK_GOTO(check_reverse_map_block_page(va_block0, base_dma_addr0, NULL) == NV_OK, error);
TEST_CHECK_GOTO(check_reverse_map_block_batch(va_block0, base_dma_addr0, NULL) == NV_OK, error);
TEST_CHECK_GOTO(check_reverse_map_block_page(va_block1, base_dma_addr1, NULL) == NV_OK, error);
TEST_CHECK_GOTO(check_reverse_map_block_batch(va_block1, base_dma_addr1, NULL) == NV_OK, error);
// Check both VA blocks at the same time
TEST_CHECK_GOTO(check_reverse_map_two_blocks_batch(g_base_dma_addr, va_block0, va_block1) == NV_OK, error);
error:
uvm_mutex_lock(&va_block1->lock);
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, base_dma_addr1);
uvm_mutex_unlock(&va_block1->lock);
}
uvm_mutex_lock(&va_block0->lock);
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, base_dma_addr0);
uvm_mutex_unlock(&va_block0->lock);
return status;
}
// This function registers a non-uniform distribution of chunks (mixing 4K and 64K chunks)
// and merges them back to verify that the logic is working.
static NV_STATUS test_pmm_sysmem_reverse_map_merge(uvm_va_space_t *va_space, NvU64 addr)
{
NV_STATUS status = NV_OK;
uvm_va_block_t *va_block;
const unsigned chunks_64k_pos[] =
{
16,
64,
96,
192,
208,
224,
288,
320,
384,
480
};
uvm_page_index_t page_index;
unsigned i;
if (PAGE_SIZE != UVM_PAGE_SIZE_4K)
return NV_OK;
status = uvm_va_block_find(va_space, addr, &va_block);
if (status != NV_OK)
return status;
TEST_CHECK_RET(uvm_va_block_size(va_block) == UVM_VA_BLOCK_SIZE);
page_index = 0;
for (i = 0; i < ARRAY_SIZE(chunks_64k_pos); ++i) {
// Fill with 4K mappings until the next 64K mapping
while (page_index < chunks_64k_pos[i]) {
uvm_mutex_lock(&va_block->lock);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
g_base_dma_addr + page_index * PAGE_SIZE,
uvm_va_block_cpu_page_address(va_block, page_index),
PAGE_SIZE,
va_block,
UVM_ID_CPU);
uvm_mutex_unlock(&va_block->lock);
TEST_CHECK_RET(status == NV_OK);
++page_index;
}
// Register the 64K mapping
uvm_mutex_lock(&va_block->lock);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
g_base_dma_addr + page_index * PAGE_SIZE,
uvm_va_block_cpu_page_address(va_block, page_index),
UVM_CHUNK_SIZE_64K,
va_block,
UVM_ID_CPU);
uvm_mutex_unlock(&va_block->lock);
TEST_CHECK_RET(status == NV_OK);
page_index += UVM_PAGE_SIZE_64K / PAGE_SIZE;
}
// Fill the tail with 4K mappings, too
while (page_index < PAGES_PER_UVM_VA_BLOCK) {
uvm_mutex_lock(&va_block->lock);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
g_base_dma_addr + page_index * PAGE_SIZE,
uvm_va_block_cpu_page_address(va_block, page_index),
PAGE_SIZE,
va_block,
UVM_ID_CPU);
uvm_mutex_unlock(&va_block->lock);
TEST_CHECK_RET(status == NV_OK);
++page_index;
}
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, NULL) == NV_OK);
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, NULL) == NV_OK);
uvm_mutex_lock(&va_block->lock);
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&g_reverse_map,
g_base_dma_addr,
uvm_va_block_size(va_block));
uvm_mutex_unlock(&va_block->lock);
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, NULL) == NV_OK);
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, NULL) == NV_OK);
uvm_mutex_lock(&va_block->lock);
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, g_base_dma_addr);
uvm_mutex_unlock(&va_block->lock);
return status;
}
static NV_STATUS test_pmm_sysmem_reverse_map_remove_on_eviction(uvm_va_space_t *va_space, NvU64 addr)
{
uvm_va_block_t *va_block;
NV_STATUS status = uvm_va_block_find(va_space, addr, &va_block);
if (status != NV_OK)
return status;
TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block)));
uvm_mutex_lock(&va_block->lock);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
g_base_dma_addr,
addr,
uvm_va_block_size(va_block),
va_block,
UVM_ID_CPU);
uvm_mutex_unlock(&va_block->lock);
uvm_mutex_lock(&va_block->lock);
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, g_base_dma_addr);
uvm_mutex_unlock(&va_block->lock);
TEST_CHECK_RET(status == NV_OK);
uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(&g_reverse_map, g_base_dma_addr);
uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(&g_reverse_map, g_base_dma_addr);
return NV_OK;
}
static NV_STATUS test_pmm_sysmem_reverse_map(uvm_va_space_t *va_space, NvU64 addr1, NvU64 addr2)
{
NV_STATUS status = NV_OK;
uvm_gpu_t *volta_gpu = NULL;
uvm_gpu_t *gpu;
// Find a GPU with support for access counters with physical address
// notifications, since it is required to add or remove entries to the
// reverse map.
for_each_va_space_gpu(gpu, va_space) {
if (gpu->parent->access_counters_can_use_physical_addresses) {
// Initialize the reverse map.
status = uvm_pmm_sysmem_mappings_init(gpu, &g_reverse_map);
if (status != NV_OK)
return status;
volta_gpu = gpu;
break;
}
}
if (!volta_gpu)
return NV_ERR_INVALID_DEVICE;
status = test_pmm_sysmem_reverse_map_single_whole(va_space, addr1);
if (status == NV_OK)
status = test_pmm_sysmem_reverse_map_single_pattern(va_space, addr1);
if (status == NV_OK)
status = test_pmm_sysmem_reverse_map_many_blocks(va_space, addr2);
if (status == NV_OK)
status = test_pmm_sysmem_reverse_map_merge(va_space, addr1);
if (status == NV_OK)
status = test_pmm_sysmem_reverse_map_remove_on_eviction(va_space, addr1);
uvm_pmm_sysmem_mappings_deinit(&g_reverse_map);
return status;
}
NV_STATUS uvm_test_pmm_sysmem(UVM_TEST_PMM_SYSMEM_PARAMS *params, struct file *filp)
{
NV_STATUS status;
uvm_va_space_t *va_space;
va_space = uvm_va_space_get(filp);
// Take the global lock to void interferences from different instances of
// the test, since we use a bunch of global variables
uvm_mutex_lock(&g_uvm_global.global_lock);
uvm_va_space_down_write(va_space);
status = test_pmm_sysmem_reverse_map(va_space, params->range_address1, params->range_address2);
uvm_va_space_up_write(va_space);
uvm_mutex_unlock(&g_uvm_global.global_lock);
return status;
}
static NV_STATUS cpu_chunk_map_on_cpu(uvm_cpu_chunk_t *chunk, void **cpu_addr)
{
struct page **pages;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -144,6 +144,9 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
}
if (gpu->parent->replayable_faults_supported) {
UVM_ASSERT(gpu->parent->isr.access_counters);
UVM_ASSERT(gpu->parent->access_counter_buffer);
uvm_parent_gpu_replayable_faults_isr_lock(gpu->parent);
status = uvm_test_verify_bh_affinity(&gpu->parent->isr.replayable_faults,
gpu->parent->closest_cpu_numa_node);
@@ -161,10 +164,11 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
}
if (gpu->parent->access_counters_supported) {
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
status = uvm_test_verify_bh_affinity(&gpu->parent->isr.access_counters,
// We only need to test one notification buffer, we pick index 0.
uvm_access_counters_isr_lock(&gpu->parent->access_counter_buffer[0]);
status = uvm_test_verify_bh_affinity(&gpu->parent->isr.access_counters[0],
gpu->parent->closest_cpu_numa_node);
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
uvm_access_counters_isr_unlock(&gpu->parent->access_counter_buffer[0]);
}
}
@@ -311,7 +315,6 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_DISABLE_NVLINK_PEER_ACCESS, uvm_test_disable_nvlink_peer_access);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_GET_PAGE_THRASHING_POLICY, uvm_test_get_page_thrashing_policy);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SET_PAGE_THRASHING_POLICY, uvm_test_set_page_thrashing_policy);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PMM_SYSMEM, uvm_test_pmm_sysmem);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PMM_REVERSE_MAP, uvm_test_pmm_reverse_map);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_VA_SPACE_MM_RETAIN, uvm_test_va_space_mm_retain);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PMM_CHUNK_WITH_ELEVATED_PAGE, uvm_test_pmm_chunk_with_elevated_page);
@@ -350,6 +353,7 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_INJECT_TOOLS_EVENT_V2, uvm_test_inject_tools_event_v2);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SET_P2P_SUSPENDED, uvm_test_set_p2p_suspended);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_INJECT_NVLINK_ERROR, uvm_test_inject_nvlink_error);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_QUERY_ACCESS_COUNTERS, uvm_test_query_access_counters);
}
return -EINVAL;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -28,8 +28,7 @@
#include "uvm_common.h"
#include "uvm_test_ioctl.h"
// Unlike UVM_INFO_PRINT, this prints on release builds
#define UVM_TEST_PRINT(fmt, ...) UVM_PRINT_FUNC(pr_info, " " fmt, ##__VA_ARGS__)
#define UVM_TEST_PRINT UVM_ERR_PRINT_ALWAYS
// WARNING: This macro will return out of the current scope
#define TEST_CHECK_RET(cond) \
@@ -160,30 +159,35 @@ NV_STATUS uvm_test_range_group_tree(UVM_TEST_RANGE_GROUP_TREE_PARAMS *params, st
NV_STATUS uvm_test_range_group_range_info(UVM_TEST_RANGE_GROUP_RANGE_INFO_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_range_group_range_count(UVM_TEST_RANGE_GROUP_RANGE_COUNT_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_get_prefetch_faults_reenable_lapse(UVM_TEST_GET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_set_prefetch_faults_reenable_lapse(UVM_TEST_SET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_get_prefetch_faults_reenable_lapse(UVM_TEST_GET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_set_prefetch_faults_reenable_lapse(UVM_TEST_SET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_check_channel_va_space(UVM_TEST_CHECK_CHANNEL_VA_SPACE_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_pmm_sysmem(UVM_TEST_PMM_SYSMEM_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_pmm_reverse_map(UVM_TEST_PMM_REVERSE_MAP_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_pmm_chunk_with_elevated_page(UVM_TEST_PMM_CHUNK_WITH_ELEVATED_PAGE_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_pmm_chunk_with_elevated_page(UVM_TEST_PMM_CHUNK_WITH_ELEVATED_PAGE_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_va_space_inject_error(UVM_TEST_VA_SPACE_INJECT_ERROR_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_get_gpu_time(UVM_TEST_GET_GPU_TIME_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_pmm_release_free_root_chunks(UVM_TEST_PMM_RELEASE_FREE_ROOT_CHUNKS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_pmm_release_free_root_chunks(UVM_TEST_PMM_RELEASE_FREE_ROOT_CHUNKS_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_drain_replayable_faults(UVM_TEST_DRAIN_REPLAYABLE_FAULTS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_va_space_add_dummy_thread_contexts(UVM_TEST_VA_SPACE_ADD_DUMMY_THREAD_CONTEXTS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_va_space_remove_dummy_thread_contexts(UVM_TEST_VA_SPACE_REMOVE_DUMMY_THREAD_CONTEXTS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_va_space_add_dummy_thread_contexts(UVM_TEST_VA_SPACE_ADD_DUMMY_THREAD_CONTEXTS_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_va_space_remove_dummy_thread_contexts(UVM_TEST_VA_SPACE_REMOVE_DUMMY_THREAD_CONTEXTS_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_thread_context_sanity(UVM_TEST_THREAD_CONTEXT_SANITY_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_thread_context_perf(UVM_TEST_THREAD_CONTEXT_PERF_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_tools_flush_replay_events(UVM_TEST_TOOLS_FLUSH_REPLAY_EVENTS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_rb_tree_directed(UVM_TEST_RB_TREE_DIRECTED_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_rb_tree_random(UVM_TEST_RB_TREE_RANDOM_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_sec2_sanity(UVM_TEST_SEC2_SANITY_PARAMS *params, struct file *filp);

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVidia Corporation
Copyright (c) 2015-2025 NVidia Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -926,31 +926,38 @@ typedef struct
// Change configuration of access counters. This call will disable access
// counters and reenable them using the new configuration. All previous
// notifications will be lost
// notifications will be lost.
//
// The reconfiguration affects all VA spaces that rely on the access
// counters information for the same GPU. To avoid conflicting configurations,
// only one VA space is allowed to reconfigure the GPU at a time.
//
// When the reconfiguration VA space is destroyed, the bottom-half control
// settings are reset.
//
// Error returns:
// NV_ERR_INVALID_STATE
// - The GPU has already been reconfigured in a different VA space
// - The GPU has already been reconfigured in a different VA space.
#define UVM_TEST_RECONFIGURE_ACCESS_COUNTERS UVM_TEST_IOCTL_BASE(56)
typedef struct
{
NvProcessorUuid gpu_uuid; // In
// Type UVM_ACCESS_COUNTER_GRANULARITY from nv_uvm_types.h
NvU32 mimc_granularity; // In
NvU32 momc_granularity; // In
// Type UVM_ACCESS_COUNTER_USE_LIMIT from nv_uvm_types.h
NvU32 mimc_use_limit; // In
NvU32 momc_use_limit; // In
NvU32 granularity; // In
NvU32 threshold; // In
NvBool enable_mimc_migrations; // In
NvBool enable_momc_migrations; // In
NvBool enable_migrations; // In
// Settings to control how notifications are serviced by the access counters
// bottom-half. These settings help tests to exercise races in the driver,
// e.g., unregister a GPU while (valid) pending notifications remain in the
// notification buffer.
//
// 0 max_batch_size doesn't change driver's behavior.
NvU32 max_batch_size; // In
NvBool one_iteration_per_batch; // In
NvU32 sleep_per_iteration_us; // In
NV_STATUS rmStatus; // Out
} UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS;
@@ -962,13 +969,6 @@ typedef enum
UVM_TEST_ACCESS_COUNTER_RESET_MODE_MAX
} UVM_TEST_ACCESS_COUNTER_RESET_MODE;
typedef enum
{
UVM_TEST_ACCESS_COUNTER_TYPE_MIMC = 0,
UVM_TEST_ACCESS_COUNTER_TYPE_MOMC,
UVM_TEST_ACCESS_COUNTER_TYPE_MAX
} UVM_TEST_ACCESS_COUNTER_TYPE;
// Clear the contents of the access counters. This call supports different
// modes for targeted/global resets.
#define UVM_TEST_RESET_ACCESS_COUNTERS UVM_TEST_IOCTL_BASE(57)
@@ -979,9 +979,6 @@ typedef struct
// Type UVM_TEST_ACCESS_COUNTER_RESET_MODE
NvU32 mode; // In
// Type UVM_TEST_ACCESS_COUNTER_TYPE
NvU32 counter_type; // In
NvU32 bank; // In
NvU32 tag; // In
NV_STATUS rmStatus; // Out
@@ -1061,14 +1058,6 @@ typedef struct
NV_STATUS rmStatus; // Out
} UVM_TEST_SET_PAGE_THRASHING_POLICY_PARAMS;
#define UVM_TEST_PMM_SYSMEM UVM_TEST_IOCTL_BASE(64)
typedef struct
{
NvU64 range_address1 NV_ALIGN_BYTES(8); // In
NvU64 range_address2 NV_ALIGN_BYTES(8); // In
NV_STATUS rmStatus; // Out
} UVM_TEST_PMM_SYSMEM_PARAMS;
#define UVM_TEST_PMM_REVERSE_MAP UVM_TEST_IOCTL_BASE(65)
typedef struct
{
@@ -1142,18 +1131,46 @@ typedef struct
NV_STATUS rmStatus; // Out
} UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS;
// Inject an error into the VA space
// Inject an error into the VA space or into a to-be registered GPU.
//
// If migrate_vma_allocation_fail_nth is greater than 0, the nth page
// allocation within migrate_vma will fail.
//
// If va_block_allocation_fail_nth is greater than 0, the nth call to
// uvm_va_block_find_create() will fail with NV_ERR_NO_MEMORY.
//
// If gpu_access_counters_alloc_buffer is set, the parent_gpu's access counters
// buffer allocation will fail with NV_ERR_NO_MEMORY.
//
// If gpu_access_counters_alloc_block_context is set, the access counters
// buffer's block_context allocation will fail with NV_ERR_NO_MEMORY.
//
// If gpu_isr_access_counters_alloc is set, the ISR access counters allocation
// will fail with NV_ERR_NO_MEMORY.
//
// If gpu_isr_access_counters_alloc_stats_cpu is set, the ISR access counters
// buffer's stats_cpu allocation will fail with NV_ERR_NO_MEMORY.
//
// If access_counters_batch_context_notifications is set, the access counters
// batch_context's notifications allocation will fail with NV_ERR_NO_MEMORY.
//
// If access_counters_batch_context_notification_cache is set, the access
// counters batch_context's notification cache allocation will fail with
// NV_ERR_NO_MEMORY.
//
// Note that only one of the gpu_* or access_counters_* setting can be selected
// at a time.
#define UVM_TEST_VA_SPACE_INJECT_ERROR UVM_TEST_IOCTL_BASE(72)
typedef struct
{
NvU32 migrate_vma_allocation_fail_nth; // In
NvU32 va_block_allocation_fail_nth; // In
NvBool gpu_access_counters_alloc_buffer; // In
NvBool gpu_access_counters_alloc_block_context; // In
NvBool gpu_isr_access_counters_alloc; // In
NvBool gpu_isr_access_counters_alloc_stats_cpu; // In
NvBool access_counters_batch_context_notifications; // In
NvBool access_counters_batch_context_notification_cache; // In
NV_STATUS rmStatus; // Out
} UVM_TEST_VA_SPACE_INJECT_ERROR_PARAMS;
@@ -1505,6 +1522,16 @@ typedef struct
NV_STATUS rmStatus; // Out
} UVM_TEST_INJECT_NVLINK_ERROR_PARAMS;
#define UVM_TEST_QUERY_ACCESS_COUNTERS UVM_TEST_IOCTL_BASE(109)
typedef struct
{
NvProcessorUuid gpu_uuid; // In
NvU8 num_notification_buffers; // Out
NvU32 num_notification_entries; // Out
NV_STATUS rmStatus; // Out
} UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS;
#ifdef __cplusplus
}
#endif

View File

@@ -1305,8 +1305,7 @@ void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_c
void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
uvm_gpu_id_t gpu_id,
const uvm_access_counter_buffer_entry_t *buffer_entry,
bool on_managed_phys)
const uvm_access_counter_buffer_entry_t *buffer_entry)
{
uvm_down_read(&va_space->tools.lock);
@@ -1318,18 +1317,10 @@ void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
info->eventType = UvmEventTypeTestAccessCounter;
info->srcIndex = uvm_parent_id_value_from_processor_id(gpu_id);
info->address = buffer_entry->address.address;
info->isVirtual = buffer_entry->address.is_virtual? 1: 0;
if (buffer_entry->address.is_virtual) {
info->instancePtr = buffer_entry->virtual_info.instance_ptr.address;
info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
info->veId = buffer_entry->virtual_info.ve_id;
}
else {
info->aperture = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
}
info->isFromCpu = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
info->physOnManaged = on_managed_phys? 1 : 0;
info->address = buffer_entry->address;
info->instancePtr = buffer_entry->instance_ptr.address;
info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->instance_ptr.aperture];
info->veId = buffer_entry->ve_id;
info->value = buffer_entry->counter_value;
info->subGranularity = buffer_entry->sub_granularity;
info->bank = buffer_entry->bank;
@@ -1345,18 +1336,10 @@ void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
info->eventType = UvmEventTypeTestAccessCounter;
info->srcIndex = uvm_id_value(gpu_id);
info->address = buffer_entry->address.address;
info->isVirtual = buffer_entry->address.is_virtual? 1: 0;
if (buffer_entry->address.is_virtual) {
info->instancePtr = buffer_entry->virtual_info.instance_ptr.address;
info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
info->veId = buffer_entry->virtual_info.ve_id;
}
else {
info->aperture = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
}
info->isFromCpu = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
info->physOnManaged = on_managed_phys? 1 : 0;
info->address = buffer_entry->address;
info->instancePtr = buffer_entry->instance_ptr.address;
info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->instance_ptr.aperture];
info->veId = buffer_entry->ve_id;
info->value = buffer_entry->counter_value;
info->subGranularity = buffer_entry->sub_granularity;
info->bank = buffer_entry->bank;
@@ -1368,18 +1351,13 @@ void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
uvm_up_read(&va_space->tools.lock);
}
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
const uvm_access_counter_buffer_entry_t *buffer_entry,
bool on_managed_phys)
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu, const uvm_access_counter_buffer_entry_t *buffer_entry)
{
uvm_va_space_t *va_space;
uvm_down_read(&g_tools_va_space_list_lock);
list_for_each_entry(va_space, &g_tools_va_space_list, tools.node) {
uvm_tools_record_access_counter(va_space,
gpu->id,
buffer_entry,
on_managed_phys);
uvm_tools_record_access_counter(va_space, gpu->id, buffer_entry);
}
uvm_up_read(&g_tools_va_space_list_lock);
}

View File

@@ -111,14 +111,11 @@ void uvm_tools_broadcast_replay(uvm_gpu_t *gpu, uvm_push_t *push, NvU32 batch_id
void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_client_type_t client_type);
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
const uvm_access_counter_buffer_entry_t *buffer_entry,
bool on_managed_phys);
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu, const uvm_access_counter_buffer_entry_t *buffer_entry);
void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
uvm_gpu_id_t gpu_id,
const uvm_access_counter_buffer_entry_t *buffer_entry,
bool on_managed_phys);
const uvm_access_counter_buffer_entry_t *buffer_entry);
void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space);

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -37,10 +37,10 @@ void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_turing_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Turing covers 128 TB and that's the minimum
@@ -79,8 +79,6 @@ void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2021 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -25,42 +25,174 @@
#include "uvm_global.h"
#include "uvm_gpu.h"
#include "uvm_hal.h"
#include "clc365.h"
static void clear_access_counter_notifications_interrupt(uvm_parent_gpu_t *parent_gpu)
typedef struct {
NvU8 bufferEntry[NVC365_NOTIFY_BUF_SIZE];
} access_counter_buffer_entry_c365_t;
void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters)
{
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntr;
mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
reg = access_counters->rm_info.pHubIntrEnSet;
mask = access_counters->rm_info.accessCounterMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
}
void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu)
static void clear_access_counter_notifications_interrupt(uvm_access_counter_buffer_t *access_counters)
{
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntrEnClear;
mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
reg = access_counters->rm_info.pHubIntr;
mask = access_counters->rm_info.accessCounterMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
}
void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters)
{
volatile NvU32 *reg;
NvU32 mask;
reg = access_counters->rm_info.pHubIntrEnClear;
mask = access_counters->rm_info.accessCounterMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
wmb();
// See the comment in uvm_hal_turing_disable_replayable_faults
clear_access_counter_notifications_interrupt(parent_gpu);
clear_access_counter_notifications_interrupt(access_counters);
}
void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get)
void uvm_hal_turing_clear_access_counter_notifications(uvm_access_counter_buffer_t *access_counters, NvU32 get)
{
clear_access_counter_notifications_interrupt(parent_gpu);
clear_access_counter_notifications_interrupt(access_counters);
wmb();
// Write GET to force the re-evaluation of the interrupt condition after the
// interrupt bit has been cleared.
UVM_GPU_WRITE_ONCE(*parent_gpu->access_counter_buffer_info.rm_info.pAccessCntrBufferGet, get);
UVM_GPU_WRITE_ONCE(*access_counters->rm_info.pAccessCntrBufferGet, get);
}
NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu)
{
return NVC365_NOTIFY_BUF_SIZE;
}
static uvm_aperture_t get_access_counter_inst_aperture(NvU32 *access_counter_entry)
{
NvU32 hw_aperture_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_APERTURE);
switch (hw_aperture_value) {
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM:
return UVM_APERTURE_VID;
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT:
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT:
return UVM_APERTURE_SYS;
}
UVM_ASSERT_MSG(false, "Invalid inst aperture value: %d\n", hw_aperture_value);
return UVM_APERTURE_MAX;
}
static NvU64 get_address(uvm_parent_gpu_t *parent_gpu, NvU32 *access_counter_entry)
{
NvU64 address;
NvU64 addr_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_HI);
NvU64 addr_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_LO);
NvU32 addr_type_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_TYPE);
UVM_ASSERT(addr_type_value == NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GVA);
address = addr_lo + (addr_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, ADDR_LO));
address = uvm_parent_gpu_canonical_address(parent_gpu, address);
return address;
}
static NvU32 *get_access_counter_buffer_entry(uvm_access_counter_buffer_t *access_counters, NvU32 index)
{
access_counter_buffer_entry_c365_t *buffer_start;
NvU32 *access_counter_entry;
UVM_ASSERT(index < access_counters->max_notifications);
buffer_start = (access_counter_buffer_entry_c365_t *)access_counters->rm_info.bufferAddress;
access_counter_entry = (NvU32 *)&buffer_start[index];
return access_counter_entry;
}
bool uvm_hal_turing_access_counter_buffer_entry_is_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index)
{
NvU32 *access_counter_entry;
bool is_valid;
access_counter_entry = get_access_counter_buffer_entry(access_counters, index);
is_valid = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID);
return is_valid;
}
void uvm_hal_turing_access_counter_buffer_entry_clear_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index)
{
NvU32 *access_counter_entry;
access_counter_entry = get_access_counter_buffer_entry(access_counters, index);
WRITE_HWCONST_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID, FALSE);
}
void uvm_hal_turing_access_counter_buffer_parse_entry(uvm_access_counter_buffer_t *access_counters,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry)
{
NvU32 *access_counter_entry;
NvU64 inst_hi, inst_lo;
// Valid bit must be set before this function is called
UVM_ASSERT(uvm_hal_turing_access_counter_buffer_entry_is_valid(access_counters, index));
access_counter_entry = get_access_counter_buffer_entry(access_counters, index);
UVM_ASSERT(READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, TYPE) != NVC365_NOTIFY_BUF_ENTRY_TYPE_CPU);
buffer_entry->address = get_address(access_counters->parent_gpu, access_counter_entry);
inst_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_HI);
inst_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_LO);
buffer_entry->instance_ptr.address = inst_lo + (inst_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, INST_LO));
// HW value contains the 4K page number. Shift to build the full address
buffer_entry->instance_ptr.address <<= 12;
buffer_entry->instance_ptr.aperture = get_access_counter_inst_aperture(access_counter_entry);
buffer_entry->mmu_engine_id = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, MMU_ENGINE_ID);
buffer_entry->mmu_engine_type = UVM_MMU_ENGINE_TYPE_GRAPHICS;
// MMU engine id aligns with the fault buffer packets. Therefore, we reuse
// the helper to compute the VE ID from the fault buffer class.
buffer_entry->ve_id = access_counters->parent_gpu->fault_buffer_hal->get_ve_id(buffer_entry->mmu_engine_id,
buffer_entry->mmu_engine_type);
buffer_entry->counter_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, COUNTER_VAL);
buffer_entry->sub_granularity = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, SUB_GRANULARITY);
buffer_entry->bank = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, BANK);
buffer_entry->tag = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, NOTIFY_TAG);
// Automatically clear valid bit for the entry in the access counter buffer
uvm_hal_turing_access_counter_buffer_entry_clear_valid(access_counters, index);
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021-2024 NVIDIA Corporation
Copyright (c) 2021-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -32,8 +32,8 @@ static void clear_replayable_faults_interrupt(uvm_parent_gpu_t *parent_gpu)
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntr;
mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntr;
mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
}
@@ -54,8 +54,8 @@ void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu)
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntrEnClear;
mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntrEnClear;
mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;
UVM_GPU_WRITE_ONCE(*reg, mask);

View File

@@ -361,3 +361,24 @@ void uvm_hal_turing_host_tlb_invalidate_test(uvm_push_t *push,
if (params->membar == UvmInvalidateTlbMemBarLocal)
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
}
void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push)
{
NV_PUSH_4U(C46F, MEM_OP_A, 0,
MEM_OP_B, 0,
MEM_OP_C, 0,
MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
HWCONST(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, ALL));
}
void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry)
{
NV_PUSH_4U(C46F, MEM_OP_A, 0,
MEM_OP_B, 0,
MEM_OP_C, HWVALUE(C46F, MEM_OP_C, ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG, buffer_entry->tag),
MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
HWCONST(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, TARGETED) |
HWCONST(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_TYPE, MIMC) |
HWVALUE(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_BANK, buffer_entry->bank));
}

View File

@@ -1323,14 +1323,11 @@ typedef struct
NvU8 aperture;
NvU8 instancePtrAperture;
NvU8 isVirtual;
NvU8 isFromCpu;
NvU8 padding8bits;
NvU8 veId;
// The physical access counter notification was triggered on a managed
// memory region. This is not set for virtual access counter notifications.
NvU8 physOnManaged;
NvU16 padding16bits;
NvU32 value;
NvU32 subGranularity;
@@ -1348,26 +1345,21 @@ typedef struct
// data in a queue.
//
NvU8 eventType;
// See uvm_access_counter_buffer_entry_t for details
NvU8 aperture;
NvU8 instancePtrAperture;
NvU8 isVirtual;
NvU8 isFromCpu;
NvU8 veId;
// The physical access counter notification was triggered on a managed
// memory region. This is not set for virtual access counter notifications.
NvU8 physOnManaged;
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU8 padding8bits;
NvU16 padding16bits;
NvU16 srcIndex; // index of the gpu that received the access counter
// notification
NvU16 padding16bits;
NvU32 value;
NvU32 subGranularity;
NvU32 tag;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -664,10 +664,11 @@ static void uvm_va_block_cpu_clear_resident_region(uvm_va_block_t *va_block, int
block_update_cpu_resident_mask(va_block);
}
// Clear residency bits from any/all processors that might have had pages resident.
// Note that both the destination processor and any CPU NUMA nodes where pages are
// migrating to need to be skipped as the block logic sets the new page residency
// before clearing the old ones (see uvm_va_block_make_resident_finish()).
// Clear residency bits from any/all processors that might have had pages
// resident. Note that both the destination processor and any CPU NUMA nodes
// where pages are migrating to need to be skipped as the block logic sets the
// new page residency before clearing the old ones
// (see uvm_va_block_make_resident_finish()).
static void uvm_va_block_cpu_clear_resident_all_chunks(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_mask_t *page_mask)
@@ -1328,40 +1329,18 @@ static void cpu_chunk_remove_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk, uvm_gpu_
if (gpu_mapping_addr == 0)
return;
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings, gpu_mapping_addr);
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
}
static NV_STATUS cpu_chunk_add_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk,
uvm_va_block_t *block,
uvm_page_index_t page_index,
uvm_gpu_t *gpu)
static NV_STATUS cpu_chunk_add_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
{
NV_STATUS status;
uvm_chunk_size_t chunk_size;
// When the Confidential Computing feature is enabled the transfers don't
// use the DMA mapping of CPU chunks (since it's protected memory), but
// the DMA address of the unprotected dma buffer.
if (g_uvm_global.conf_computing_enabled)
return NV_OK;
status = uvm_cpu_chunk_map_gpu(chunk, gpu);
if (status != NV_OK)
return status;
chunk_size = uvm_cpu_chunk_get_size(chunk);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings,
uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu),
uvm_va_block_cpu_page_address(block, page_index),
chunk_size,
block,
UVM_ID_CPU);
if (status != NV_OK)
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
return status;
return uvm_cpu_chunk_map_gpu(chunk, gpu);
}
static void block_gpu_unmap_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu_t *gpu)
@@ -1393,7 +1372,7 @@ static NV_STATUS block_gpu_map_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu
uvm_id_value(gpu->id),
uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu));
status = cpu_chunk_add_sysmem_gpu_mapping(chunk, block, page_index, gpu);
status = cpu_chunk_add_sysmem_gpu_mapping(chunk, gpu);
if (status != NV_OK)
goto error;
}
@@ -1468,14 +1447,10 @@ void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *block,
}
}
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *block,
uvm_cpu_chunk_t *chunk,
uvm_page_index_t page_index)
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *block, uvm_cpu_chunk_t *chunk)
{
NV_STATUS status;
uvm_gpu_id_t id;
uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(chunk);
uvm_va_block_region_t chunk_region = uvm_va_block_chunk_region(block, chunk_size, page_index);
// We can't iterate over va_space->registered_gpus because we might be
// on the eviction path, which does not have the VA space lock held. We have
@@ -1489,7 +1464,7 @@ NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *block,
continue;
gpu = uvm_gpu_get(id);
status = cpu_chunk_add_sysmem_gpu_mapping(chunk, block, chunk_region.first, gpu);
status = cpu_chunk_add_sysmem_gpu_mapping(chunk, gpu);
if (status != NV_OK)
goto error;
}
@@ -1756,7 +1731,7 @@ static NV_STATUS block_populate_overlapping_cpu_chunks(uvm_va_block_t *block,
// before mapping.
chunk_ptr = split_chunks[i];
split_chunks[i] = NULL;
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr, running_page_index);
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr);
if (status != NV_OK)
goto done;
}
@@ -1793,7 +1768,7 @@ static NV_STATUS block_populate_overlapping_cpu_chunks(uvm_va_block_t *block,
// before mapping.
chunk_ptr = small_chunks[j];
small_chunks[j] = NULL;
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr, running_page_index);
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr);
if (status != NV_OK)
goto done;
}
@@ -1860,7 +1835,7 @@ static NV_STATUS block_add_cpu_chunk(uvm_va_block_t *block,
if (status != NV_OK)
goto out;
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk, page_index);
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk);
if (status != NV_OK) {
uvm_cpu_chunk_remove_from_block(block, uvm_cpu_chunk_get_numa_node(chunk), page_index);
goto out;
@@ -3155,8 +3130,8 @@ static NV_STATUS block_populate_pages(uvm_va_block_t *block,
uvm_page_mask_or(pages_staged, pages_staged, scratch_page_mask);
}
// 2. Remove any pages in pages_staged that are on any resident processor
// dest_id can copy from.
// 2. Remove any pages in pages_staged that are on any resident
// processor dest_id can copy from.
if (uvm_processor_mask_and(tmp_processor_mask, can_copy_from_processors, &block->resident)) {
for_each_id_in_mask(id, tmp_processor_mask) {
id_resident_mask = uvm_va_block_resident_mask_get(block, id, NUMA_NO_NODE);
@@ -3210,14 +3185,21 @@ static uvm_gpu_chunk_t *block_phys_page_chunk(uvm_va_block_t *block, block_phys_
return chunk;
}
typedef enum {
REMOTE_EGM_ALLOWED = 0,
REMOTE_EGM_NOT_ALLOWED = 1,
} remote_egm_mode_t;
// Get the physical GPU address of a block's page from the POV of the specified
// GPU. This is the address that should be used for making PTEs for the
// specified GPU.
static uvm_gpu_phys_address_t block_phys_page_address(uvm_va_block_t *block,
block_phys_page_t block_page,
uvm_gpu_t *gpu)
uvm_gpu_t *gpu,
remote_egm_mode_t egm_mode)
{
uvm_va_block_gpu_state_t *accessing_gpu_state = uvm_va_block_gpu_state_get(block, gpu->id);
bool allow_remote_egm = egm_mode == REMOTE_EGM_ALLOWED;
size_t chunk_offset;
uvm_gpu_chunk_t *chunk;
@@ -3231,7 +3213,7 @@ static uvm_gpu_phys_address_t block_phys_page_address(uvm_va_block_t *block,
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
uvm_parent_gpu_t *routing_gpu = uvm_va_space_get_egm_routing_gpu(va_space, gpu, block_page.nid);
if (routing_gpu) {
if (routing_gpu && (allow_remote_egm || routing_gpu == gpu->parent)) {
struct page *page = uvm_cpu_chunk_get_cpu_page(block, chunk, block_page.page_index);
phys_addr = page_to_phys(page);
@@ -3296,9 +3278,14 @@ static uvm_gpu_address_t block_phys_page_copy_address(uvm_va_block_t *block,
// CPU and local GPU accesses can rely on block_phys_page_address, but the
// resulting physical address may need to be converted into virtual.
if (UVM_ID_IS_CPU(block_page.processor) || uvm_id_equal(block_page.processor, gpu->id)) {
uvm_gpu_phys_address_t phys_addr = block_phys_page_address(block, block_page, gpu);
// Do not use remote EGM addresses internally until
// NVLINK STO handling is updated to handle EGM.
// TODO: Bug: 5068688 [UVM] Detect STO and prevent data leaks
// when accessing EGM memory
// TODO: Bug: 5007527 [UVM] Extend STO recovery to EGM enabled
// systems
uvm_gpu_phys_address_t phys_addr = block_phys_page_address(block, block_page, gpu, REMOTE_EGM_NOT_ALLOWED);
// EGM mappings use physical addresses with a PEER aperture.
if (uvm_aperture_is_peer(phys_addr.aperture)) {
UVM_ASSERT(block_check_egm_peer(uvm_va_block_get_va_space(block), gpu, block_page.nid, phys_addr));
return uvm_gpu_address_from_phys(phys_addr);
@@ -3334,7 +3321,7 @@ uvm_gpu_phys_address_t uvm_va_block_res_phys_page_address(uvm_va_block_t *va_blo
UVM_ASSERT(nid != NUMA_NO_NODE);
}
return block_phys_page_address(va_block, block_phys_page(residency, nid, page_index), gpu);
return block_phys_page_address(va_block, block_phys_page(residency, nid, page_index), gpu, REMOTE_EGM_ALLOWED);
}
uvm_gpu_phys_address_t uvm_va_block_gpu_phys_page_address(uvm_va_block_t *va_block,
@@ -3949,9 +3936,9 @@ static NV_STATUS block_copy_pages(uvm_va_block_t *va_block,
UVM_ASSERT(uvm_cpu_chunk_get_size(src_chunk) >= uvm_va_block_region_size(region));
UVM_ASSERT(uvm_va_block_region_size(region) <= uvm_cpu_chunk_get_size(dst_chunk));
// CPU-to-CPU copies using memcpy() don't have any inherent ordering with
// copies using GPU CEs. So, we have to make sure that all previously
// submitted work is complete.
// CPU-to-CPU copies using memcpy() don't have any inherent ordering
// with copies using GPU CEs. So, we have to make sure that all
// previously submitted work is complete.
status = uvm_tracker_wait(&va_block->tracker);
if (status != NV_OK)
return status;
@@ -4204,9 +4191,9 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
uvm_processor_mask_set(&block_context->make_resident.all_involved_processors, copying_gpu->id);
// This function is called just once per VA block and needs to
// receive the "main" cause for the migration (it mainly checks if
// we are in the eviction path). Therefore, we pass cause instead
// of contig_cause
// receive the "main" cause for the migration (it mainly checks
// if we are in the eviction path). Therefore, we pass cause
// instead of contig_cause.
uvm_tools_record_block_migration_begin(block,
&push,
dst_id,
@@ -4233,8 +4220,8 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
contig_cause = page_cause;
if (block_copy_should_use_push(block, &copy_state)) {
// When CC is enabled, transfers between GPU and CPU don't rely on
// any GPU mapping of CPU chunks, physical or virtual.
// When CC is enabled, transfers between GPU and CPU don't rely
// on any GPU mapping of CPU chunks, physical or virtual.
if (UVM_ID_IS_CPU(src_id) && g_uvm_global.conf_computing_enabled)
can_cache_src_phys_addr = false;
@@ -4244,8 +4231,8 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
// Computing the physical address is a non-trivial operation and
// seems to be a performance limiter on systems with 2 or more
// NVLINK links. Therefore, for physically-contiguous block
// storage, we cache the start address and compute the page address
// using the page index.
// storage, we cache the start address and compute the page
// address using the page index.
if (can_cache_src_phys_addr) {
copy_state.src.gpu_address = block_phys_page_copy_address(block,
block_phys_page(src_id,
@@ -5187,12 +5174,13 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
if (!scratch_residency_mask)
return NV_ERR_NO_MEMORY;
// We cannot read-duplicate on different CPU NUMA nodes since there is only one
// CPU page table. So, the page has to migrate from the source NUMA node to the
// destination one.
// We cannot read-duplicate on different CPU NUMA nodes since there is only
// one CPU page table. So, the page has to migrate from the source NUMA node
// to the destination one.
// In order to correctly map pages on the destination NUMA node, all pages
// resident on other NUMA nodes have to be unmapped. Otherwise, their WRITE
// permission will be revoked but they'll remain mapped on the source NUMA node.
// permission will be revoked but they'll remain mapped on the source NUMA
// node.
if (uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) &&
UVM_ID_IS_CPU(va_block_context->make_resident.dest_id)) {
uvm_page_mask_t *dest_nid_resident = uvm_va_block_resident_mask_get(va_block,
@@ -5623,7 +5611,8 @@ static bool block_check_mappings_page(uvm_va_block_t *block,
}
// atomic mappings from GPUs with disabled system-wide atomics are treated
// as write mappings. Therefore, we remove them from the atomic mappings mask
// as write mappings. Therefore, we remove them from the atomic mappings
// mask
uvm_processor_mask_and(atomic_mappings, atomic_mappings, &va_space->system_wide_atomics_enabled_processors);
if (!uvm_processor_mask_empty(read_mappings)) {
@@ -5696,7 +5685,8 @@ static bool block_check_mappings_page(uvm_va_block_t *block,
*residency_has_native_atomics->bitmap,
*va_space->system_wide_atomics_enabled_processors.bitmap);
// Only one processor outside of the native group can have atomics enabled
// Only one processor outside of the native group can have atomics
// enabled
UVM_ASSERT_MSG(uvm_processor_mask_get_count(atomic_mappings) == 1,
"Too many atomics mappings to %s from processors with non-native atomics\n"
"Resident: 0x%lx - Mappings R: 0x%lx W: 0x%lx A: 0x%lx -"
@@ -5714,9 +5704,9 @@ static bool block_check_mappings_page(uvm_va_block_t *block,
non_native_atomics = &mapping_masks->non_native_atomics;
// One or more processors within the native group have atomics enabled.
// All processors outside of that group may have write but not atomic
// permissions.
// One or more processors within the native group have atomics
// enabled. All processors outside of that group may have write but
// not atomic permissions.
uvm_processor_mask_andnot(non_native_atomics, atomic_mappings, residency_has_native_atomics);
UVM_ASSERT_MSG(uvm_processor_mask_empty(non_native_atomics),
@@ -6143,7 +6133,10 @@ static void block_gpu_pte_write_4k(uvm_va_block_t *block,
if (page_index >= contig_region.outer || nid != contig_nid) {
contig_region = block_phys_contig_region(block, page_index, resident_id, nid);
contig_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, contig_region.first), gpu);
contig_addr = block_phys_page_address(block,
block_phys_page(resident_id, nid, contig_region.first),
gpu,
REMOTE_EGM_ALLOWED);
page_addr = contig_addr;
contig_nid = nid;
}
@@ -6368,7 +6361,10 @@ static void block_gpu_pte_write_big(uvm_va_block_t *block,
if (big_region.first >= contig_region.outer || nid != contig_nid) {
contig_region = block_phys_contig_region(block, big_region.first, resident_id, nid);
contig_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, contig_region.first), gpu);
contig_addr = block_phys_page_address(block,
block_phys_page(resident_id, nid, contig_region.first),
gpu,
REMOTE_EGM_ALLOWED);
page_addr = contig_addr;
contig_nid = nid;
}
@@ -6520,7 +6516,7 @@ static void block_gpu_pte_write_2m(uvm_va_block_t *block,
block_mark_cpu_page_dirty(block, 0, nid);
}
page_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, 0), gpu);
page_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, 0), gpu, REMOTE_EGM_ALLOWED);
pte_val = tree->hal->make_pte(page_addr.aperture, page_addr.address, new_prot, pte_flags);
uvm_pte_batch_write_pte(pte_batch, pte_addr, pte_val, pte_size);
@@ -10037,16 +10033,8 @@ static NV_STATUS block_split_cpu_chunk_one(uvm_va_block_t *block, uvm_page_index
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(block, nid, page_index);
uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(chunk);
uvm_chunk_size_t new_size;
uvm_gpu_t *gpu;
NvU64 gpu_mapping_addr;
uvm_processor_mask_t *gpu_split_mask;
uvm_gpu_id_t id;
NV_STATUS status;
gpu_split_mask = uvm_processor_mask_cache_alloc();
if (!gpu_split_mask)
return NV_ERR_NO_MEMORY;
if (chunk_size == UVM_CHUNK_SIZE_2M)
new_size = UVM_CHUNK_SIZE_64K;
else
@@ -10054,45 +10042,11 @@ static NV_STATUS block_split_cpu_chunk_one(uvm_va_block_t *block, uvm_page_index
UVM_ASSERT(IS_ALIGNED(chunk_size, new_size));
uvm_processor_mask_zero(gpu_split_mask);
for_each_gpu_id(id) {
if (!uvm_va_block_gpu_state_get(block, id))
continue;
gpu = uvm_gpu_get(id);
// If the parent chunk has not been mapped, there is nothing to split.
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
if (gpu_mapping_addr == 0)
continue;
status = uvm_pmm_sysmem_mappings_split_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
gpu_mapping_addr,
new_size);
if (status != NV_OK)
goto merge;
uvm_processor_mask_set(gpu_split_mask, id);
}
if (new_size == UVM_CHUNK_SIZE_64K)
status = block_split_cpu_chunk_to_64k(block, nid);
else
status = block_split_cpu_chunk_to_4k(block, page_index, nid);
if (status != NV_OK) {
merge:
for_each_gpu_id_in_mask(id, gpu_split_mask) {
gpu = uvm_gpu_get(id);
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
gpu_mapping_addr,
chunk_size);
}
}
uvm_processor_mask_cache_free(gpu_split_mask);
return status;
}
@@ -10109,8 +10063,8 @@ static NV_STATUS block_prealloc_cpu_chunk_storage(uvm_va_block_t *existing, uvm_
UVM_ASSERT(uvm_cpu_storage_get_type(node_state) == UVM_CPU_CHUNK_STORAGE_MIXED);
existing_mixed = uvm_cpu_storage_get_ptr(node_state);
// Pre-allocate chunk storage for the new block. By definition, the new block
// will contain either 64K and/or 4K chunks.
// Pre-allocate chunk storage for the new block. By definition, the new
// block will contain either 64K and/or 4K chunks.
//
// We do this here so there are no failures in block_split_cpu().
new_mixed = uvm_kvmalloc_zero(sizeof(*new_mixed));
@@ -10182,8 +10136,8 @@ static NV_STATUS block_presplit_cpu_chunks(uvm_va_block_t *existing, uvm_va_bloc
for_each_possible_uvm_node(nid) {
splitting_chunk = uvm_cpu_chunk_get_chunk_for_page(existing, nid, page_index);
// If the page covering the split point has not been populated, there is no
// need to split.
// If the page covering the split point has not been populated, there is
// no need to split.
if (!splitting_chunk)
continue;
@@ -10247,7 +10201,6 @@ static void block_merge_cpu_chunks_to_2m(uvm_va_block_t *block, uvm_page_index_t
static void block_merge_cpu_chunks_one(uvm_va_block_t *block, uvm_page_index_t page_index, int nid)
{
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(block, nid, page_index);
uvm_gpu_id_t id;
if (!chunk)
return;
@@ -10259,25 +10212,6 @@ static void block_merge_cpu_chunks_one(uvm_va_block_t *block, uvm_page_index_t p
UVM_ASSERT(uvm_cpu_chunk_get_size(chunk) == UVM_CHUNK_SIZE_64K);
block_merge_cpu_chunks_to_2m(block, page_index, nid);
}
chunk = uvm_cpu_chunk_get_chunk_for_page(block, nid, page_index);
for_each_gpu_id(id) {
NvU64 gpu_mapping_addr;
uvm_gpu_t *gpu;
if (!uvm_va_block_gpu_state_get(block, id))
continue;
gpu = uvm_gpu_get(id);
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
if (gpu_mapping_addr == 0)
continue;
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
gpu_mapping_addr,
uvm_cpu_chunk_get_size(chunk));
}
}
static void block_merge_cpu_chunks(uvm_va_block_t *existing, uvm_va_block_t *new)
@@ -10695,9 +10629,6 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
size_t new_pages = uvm_va_block_num_cpu_pages(new);
size_t existing_pages, existing_pages_4k, existing_pages_big, new_pages_big;
uvm_pte_bits_gpu_t pte_bit;
uvm_cpu_chunk_t *cpu_chunk;
uvm_page_index_t page_index;
int nid;
if (!existing_gpu_state)
return;
@@ -10711,14 +10642,6 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
UVM_ASSERT(PAGE_ALIGNED(existing->start));
existing_pages = (new->start - existing->start) / PAGE_SIZE;
for_each_possible_uvm_node(nid) {
for_each_cpu_chunk_in_block(cpu_chunk, page_index, new, nid) {
uvm_pmm_sysmem_mappings_reparent_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings,
uvm_cpu_chunk_get_gpu_phys_addr(cpu_chunk, gpu),
new);
}
}
block_copy_split_gpu_chunks(existing, new, gpu);
block_split_page_mask(&existing_gpu_state->resident,
@@ -10727,8 +10650,10 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
new_pages);
for (pte_bit = 0; pte_bit < UVM_PTE_BITS_GPU_MAX; pte_bit++) {
block_split_page_mask(&existing_gpu_state->pte_bits[pte_bit], existing_pages,
&new_gpu_state->pte_bits[pte_bit], new_pages);
block_split_page_mask(&existing_gpu_state->pte_bits[pte_bit],
existing_pages,
&new_gpu_state->pte_bits[pte_bit],
new_pages);
}
// Adjust page table ranges.
@@ -11113,7 +11038,8 @@ static NV_STATUS do_block_add_mappings_after_migration(uvm_va_block_t *va_block,
bool map_processor_has_enabled_system_wide_atomics =
uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, map_processor_id);
// Write mappings from processors with disabled system-wide atomics are treated like atomics
// Write mappings from processors with disabled system-wide atomics are
// treated like atomics
if (new_map_prot == UVM_PROT_READ_WRITE && !map_processor_has_enabled_system_wide_atomics)
final_map_prot = UVM_PROT_READ_WRITE_ATOMIC;
else
@@ -11346,14 +11272,17 @@ uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block
block_page_authorized_processors(va_block, page_index, UVM_PROT_READ_WRITE_ATOMIC, atomic_mappings);
// Exclude processors with system-wide atomics disabled from atomic_mappings
// Exclude processors with system-wide atomics disabled from
// atomic_mappings
uvm_processor_mask_and(atomic_mappings, atomic_mappings, &va_space->system_wide_atomics_enabled_processors);
// Exclude the processor for which the mapping protections are being computed
// Exclude the processor for which the mapping protections are being
// computed
uvm_processor_mask_clear(atomic_mappings, processor_id);
// If there is any processor with atomic mapping, check if it has native atomics to the processor
// with the resident copy. If it does not, we can only map READ ONLY
// If there is any processor with atomic mapping, check if it has native
// atomics to the processor with the resident copy. If it does not, we
// can only map READ ONLY
atomic_id = uvm_processor_mask_find_first_id(atomic_mappings);
if (UVM_ID_IS_VALID(atomic_id) &&
!uvm_processor_mask_test(&va_space->has_native_atomics[uvm_id_value(residency)], atomic_id)) {
@@ -11364,7 +11293,8 @@ uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block
block_page_authorized_processors(va_block, page_index, UVM_PROT_READ_WRITE, write_mappings);
// Exclude the processor for which the mapping protections are being computed
// Exclude the processor for which the mapping protections are being
// computed
uvm_processor_mask_clear(write_mappings, processor_id);
// At this point, any processor with atomic mappings either has native
@@ -11639,31 +11569,32 @@ static uvm_processor_id_t block_select_processor_residency(uvm_va_block_t *va_bl
uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(preferred_location)], processor_id))
return preferred_location;
// Check if we should map the closest resident processor remotely on remote CPU fault
// Check if we should map the closest resident processor remotely on remote
// CPU fault
//
// When faulting on CPU, there's a linux process on behalf of it, which is associated
// with a unique VM pointed by current->mm. A block of memory residing on GPU is also
// associated with VM, pointed by va_block_context->mm. If they match, it's a regular
// (local) fault, and we may want to migrate a page from GPU to CPU.
// If it's a 'remote' fault, i.e. linux process differs from one associated with block
// VM, we might preserve residence.
// When faulting on CPU, there's a linux process on behalf of it, which is
// associated with a unique VM pointed by current->mm. A block of memory
// residing on GPU is also associated with VM, pointed by
// va_block_context->mm. If they match, it's a regular (local) fault, and we
// may want to migrate a page from GPU to CPU. If it's a 'remote' fault,
// i.e., linux process differs from one associated with block VM, we might
// preserve residence.
//
// Establishing a remote fault without access counters means the memory could stay in
// the wrong spot for a long time, which is why we prefer to avoid creating remote
// mappings. However when NIC accesses a memory residing on GPU, it's worth to keep it
// in place for NIC accesses.
// Establishing a remote fault without access counters means the memory
// could stay in the wrong spot for a long time, which is why we prefer to
// avoid creating remote mappings. However when NIC accesses a memory
// residing on GPU, it's worth to keep it in place for NIC accesses.
//
// The logic that's used to detect remote faulting also keeps memory in place for
// ptrace accesses. We would prefer to control those policies separately, but the
// NIC case takes priority.
// If the accessing processor is CPU, we're either handling a fault
// from other than owning process, or we're handling an MOMC
// notification. Only prevent migration for the former.
// The logic that's used to detect remote faulting also keeps memory in
// place for ptrace accesses. We would prefer to control those policies
// separately, but the NIC case takes priority. If the accessing processor
// is the CPU, we're handling a fault from other than the owning process,
// we want to prevent a migration.
if (UVM_ID_IS_CPU(processor_id) &&
operation != UVM_SERVICE_OPERATION_ACCESS_COUNTERS &&
uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(closest_resident_processor)], processor_id) &&
va_block_context->mm != current->mm) {
UVM_ASSERT(va_block_context->mm != NULL);
UVM_ASSERT(operation != UVM_SERVICE_OPERATION_ACCESS_COUNTERS);
return closest_resident_processor;
}
@@ -11693,7 +11624,8 @@ static int block_select_node_residency(uvm_va_block_t *va_block,
// For HMM allocations UVM doesn't always control allocation of the
// destination page as the kernel may have already allocated one. Therefore
// we can't respect the preferred node ID for HMM pages.
// TODO: Bug 4453874: [UVM-HMM] Respect the preferred CPU NUMA Node ID when making a HMM page resident
// TODO: Bug 4453874: [UVM-HMM] Respect the preferred CPU NUMA Node ID when
// making a HMM page resident
if (uvm_va_block_is_hmm(va_block))
return NUMA_NO_NODE;
@@ -11867,9 +11799,12 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
break;
case UVM_SERVICE_OPERATION_ACCESS_COUNTERS:
cause = UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
service_context->block_context->make_resident.access_counters_buffer_index =
service_context->access_counters_buffer_index;
break;
default:
UVM_ASSERT_MSG(false, "Invalid operation value %d\n", service_context->operation);
// Set cause to silence compiler warning that it may be unused.
cause = UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
break;
@@ -11955,16 +11890,21 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
if (status != NV_OK)
return status;
// TODO: Bug 5069427: [uvm] Fix the migration STO error checks.
// Same as above for nvlink errors. Check the source GPU as well
// as all its peers.
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
for_each_gpu_in_mask(peer_gpu, &gpu->peer_info.peer_gpu_mask) {
status = uvm_gpu_check_nvlink_error_no_rm(peer_gpu);
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
uvm_processor_mask_set(&service_context->gpus_to_check_for_nvlink_errors, peer_gpu->id);
if (status != NV_OK)
if (status != NV_OK) {
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
return status;
}
}
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
status = uvm_gpu_check_nvlink_error_no_rm(gpu);
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
@@ -13542,7 +13482,7 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
}
else {
params->resident_physical_address[count] =
block_phys_page_address(block, block_page, uvm_gpu_get(id)).address;
block_phys_page_address(block, block_page, uvm_gpu_get(id), REMOTE_EGM_ALLOWED).address;
}
++count;
@@ -13572,7 +13512,7 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
block_page = block_phys_page(processor_to_map, nid, page_index);
if (!UVM_ID_IS_CPU(id)) {
uvm_gpu_t *gpu = uvm_gpu_get(id);
uvm_gpu_phys_address_t gpu_phys_addr = block_phys_page_address(block, block_page, gpu);
uvm_gpu_phys_address_t gpu_phys_addr = block_phys_page_address(block, block_page, gpu, REMOTE_EGM_ALLOWED);
NvU64 phys_addr = gpu_phys_addr.address;
if (UVM_ID_IS_CPU(block_page.processor)) {

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -205,12 +205,12 @@ typedef struct
//
// The indices represent the corresponding big PTEs in the block's interior.
// For example, a block with alignment and size of one 4k page on either
// side of a big page will only use bit 0. Use uvm_va_block_big_page_index to look
// the big_ptes index of a page.
// side of a big page will only use bit 0. Use uvm_va_block_big_page_index
// to look up the big_ptes index of a page.
//
// The block might not be able to fit any big PTEs, in which case this
// bitmap is always zero. Use uvm_va_block_gpu_num_big_pages to find the number of
// valid bits in this mask.
// bitmap is always zero. Use uvm_va_block_gpu_num_big_pages to find the
// number of valid bits in this mask.
DECLARE_BITMAP(big_ptes, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
// See the comments for uvm_va_block_mmap_t::cpu.pte_bits.
@@ -565,8 +565,8 @@ struct uvm_va_block_wrapper_struct
// testing only.
bool inject_eviction_error;
// Force the next successful chunk allocation to then fail. Used for testing
// only to simulate driver metadata allocation failure.
// Force the next successful chunk allocation to then fail. Used for
// testing only to simulate driver metadata allocation failure.
bool inject_populate_error;
// Force the next split on this block to fail.
@@ -1250,8 +1250,8 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
// context.
//
// service_context must not be NULL and policy for service_context->region must
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// service_context->prefetch_hint is set by this function.
//
@@ -1282,8 +1282,8 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
// pages to new_residency.
//
// service_context must not be NULL and policy for service_context->region must
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// service_context->prefetch_hint should be set before calling this function.
//
@@ -1311,8 +1311,8 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
// to the new residency (which may be remote).
//
// service_context must not be NULL and policy for service_context->region must
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// service_context must be initialized by calling uvm_va_block_service_copy()
// before calling this function.
@@ -1499,8 +1499,8 @@ uvm_gpu_chunk_t *uvm_va_block_lookup_gpu_chunk(uvm_va_block_t *va_block, uvm_gpu
//
// service_context and service_context->block_context must not be NULL and
// policy for the region must match. See the comments for
// uvm_va_block_check_policy_is_valid(). If va_block is a HMM block,
// service->block_context->hmm.vma must be valid. See the comments for
// uvm_va_block_check_policy_is_valid(). If va_block is a HMM block,
// service->block_context->hmm.vma must be valid. See the comments for
// uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
//
// LOCKING: The caller must hold the va_block lock. If
@@ -1550,7 +1550,8 @@ void uvm_va_block_retry_init(uvm_va_block_retry_t *uvm_va_block_retry);
// Frees all the remaining free chunks and unpins all the used chunks.
void uvm_va_block_retry_deinit(uvm_va_block_retry_t *uvm_va_block_retry, uvm_va_block_t *va_block);
// Evict all chunks from the block that are subchunks of the passed in root_chunk.
// Evict all chunks from the block that are subchunks of the passed in
// root_chunk.
//
// Add all the work tracking the eviction to the tracker.
//
@@ -2139,16 +2140,12 @@ struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block, uvm_cpu_chunk_
struct page *uvm_va_block_get_cpu_page(uvm_va_block_t *va_block, uvm_page_index_t page_index);
// Physically map a CPU chunk so it is DMA'able from all registered GPUs.
// nid cannot be NUMA_NO_NODE.
// Locking: The va_block lock must be held.
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
uvm_cpu_chunk_t *chunk,
uvm_page_index_t page_index);
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk);
// Physically unmap a CPU chunk from all registered GPUs.
// Locking: The va_block lock must be held.
void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
uvm_cpu_chunk_t *chunk);
void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk);
// Remove any CPU chunks in the given region.
// Locking: The va_block lock must be held.
@@ -2163,8 +2160,7 @@ NvU64 uvm_va_block_get_physical_size(uvm_va_block_t *block,
uvm_page_index_t page_index);
// Get CPU page size or 0 if it is not mapped
NvU64 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block,
uvm_page_index_t page_index);
NvU64 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block, uvm_page_index_t page_index);
// Get GPU page size or 0 if it is not mapped on the given GPU
NvU64 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index);
@@ -2262,8 +2258,8 @@ NV_STATUS uvm_va_block_populate_page_cpu(uvm_va_block_t *va_block,
// otherwise it will be initialized and deinitialized by the macro.
//
// The macro also locks and unlocks the block's lock internally as it's expected
// that the block's lock has been unlocked and relocked whenever the function call
// returns NV_ERR_MORE_PROCESSING_REQUIRED and this makes it clear that the
// that the block's lock has been unlocked and relocked whenever the function
// call returns NV_ERR_MORE_PROCESSING_REQUIRED and this makes it clear that the
// block's state is not locked across these calls.
#define UVM_VA_BLOCK_LOCK_RETRY(va_block, block_retry, call) ({ \
NV_STATUS __status; \

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2023 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -235,6 +235,10 @@ typedef struct
// Event that triggered the call
uvm_make_resident_cause_t cause;
// Access counters notification buffer index. Only valid when cause is
// UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER.
NvU32 access_counters_buffer_index;
} make_resident;
// State used by the mapping APIs (unmap, map, revoke). This could be used

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -558,7 +558,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
nv_kthread_q_flush(&gpu->parent->isr.kill_channel_q);
if (gpu->parent->access_counters_supported)
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
uvm_gpu_access_counters_disable(gpu, va_space);
}
@@ -576,7 +576,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
uvm_deferred_free_object_list(&deferred_free_list);
// Normally we'd expect this to happen as part of uvm_mm_release()
// Normally we'd expect this to happen as part of uvm_release_mm()
// but if userspace never initialized uvm_mm_fd that won't happen.
// We don't have to take the va_space_mm spinlock and update state
// here because we know no other thread can be in or subsequently
@@ -760,7 +760,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
bool gpu_can_access_sysmem = true;
uvm_processor_mask_t *peers_to_release = NULL;
status = uvm_gpu_retain_by_uuid(gpu_uuid, user_rm_device, &gpu);
status = uvm_gpu_retain_by_uuid(gpu_uuid, user_rm_device, &va_space->test.parent_gpu_error, &gpu);
if (status != NV_OK)
return status;
@@ -936,7 +936,7 @@ done:
// registered GPU: the enablement step would have failed before even
// discovering that the GPU is already registered.
if (uvm_parent_gpu_access_counters_required(gpu->parent))
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
uvm_gpu_access_counters_disable(gpu, va_space);
uvm_gpu_release(gpu);
}
@@ -1011,7 +1011,7 @@ NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcesso
// acquires the VA space lock after the unregistration does. Both outcomes
// result on valid states.
if (disable_access_counters)
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
uvm_gpu_access_counters_disable(gpu, va_space);
// mmap_lock is needed to establish CPU mappings to any pages evicted from
// the GPU if accessed by CPU is set for them.
@@ -2207,6 +2207,17 @@ NV_STATUS uvm_test_va_space_inject_error(UVM_TEST_VA_SPACE_INJECT_ERROR_PARAMS *
atomic_set(&va_space->test.migrate_vma_allocation_fail_nth, params->migrate_vma_allocation_fail_nth);
atomic_set(&va_space->test.va_block_allocation_fail_nth, params->va_block_allocation_fail_nth);
va_space->test.parent_gpu_error.access_counters_alloc_buffer = params->gpu_access_counters_alloc_buffer;
va_space->test.parent_gpu_error.access_counters_alloc_block_context =
params->gpu_access_counters_alloc_block_context;
va_space->test.parent_gpu_error.access_counters_batch_context_notifications =
params->access_counters_batch_context_notifications;
va_space->test.parent_gpu_error.access_counters_batch_context_notification_cache =
params->access_counters_batch_context_notification_cache;
va_space->test.parent_gpu_error.isr_access_counters_alloc = params->gpu_isr_access_counters_alloc;
va_space->test.parent_gpu_error.isr_access_counters_alloc_stats_cpu =
params->gpu_isr_access_counters_alloc_stats_cpu;
return NV_OK;
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -424,6 +424,8 @@ struct uvm_va_space_struct
bool force_cpu_to_cpu_copy_with_ce;
bool allow_allocation_from_movable;
uvm_test_parent_gpu_inject_error_t parent_gpu_error;
} test;
// Queue item for deferred f_ops->release() handling

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -37,10 +37,10 @@ void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_volta_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Volta covers 128 TB and that's the minimum
@@ -82,9 +82,9 @@ void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->non_replayable_faults_supported = true;
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = true;
// Although access counters are supported in HW, it only notifies memory
// accesses using physical addresses, which is not supported in SW.
parent_gpu->access_counters_supported = false;
parent_gpu->fault_cancel_va_supported = true;

View File

@@ -1,228 +0,0 @@
/*******************************************************************************
Copyright (c) 2016-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_linux.h"
#include "uvm_global.h"
#include "uvm_gpu.h"
#include "uvm_hal.h"
#include "clc365.h"
#include "uvm_volta_fault_buffer.h"
typedef struct {
NvU8 bufferEntry[NVC365_NOTIFY_BUF_SIZE];
} access_counter_buffer_entry_c365_t;
void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu)
{
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntrEnSet;
mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
}
void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu)
{
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntrEnClear;
mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
}
void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get)
{
// No-op, this function is only used by pulse-based interrupt GPUs.
}
NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu)
{
return NVC365_NOTIFY_BUF_SIZE;
}
static uvm_aperture_t get_access_counter_inst_aperture(NvU32 *access_counter_entry)
{
NvU32 hw_aperture_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_APERTURE);
switch (hw_aperture_value) {
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM:
return UVM_APERTURE_VID;
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT:
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT:
return UVM_APERTURE_SYS;
}
UVM_ASSERT_MSG(false, "Invalid inst aperture value: %d\n", hw_aperture_value);
return UVM_APERTURE_MAX;
}
static uvm_aperture_t get_access_counter_aperture(NvU32 *access_counter_entry)
{
NvU32 hw_aperture_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, APERTURE);
NvU32 peer_id = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, PEER_ID);
switch (hw_aperture_value) {
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM:
return UVM_APERTURE_VID;
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_PEER_MEM:
return UVM_APERTURE_PEER(peer_id);
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT:
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT:
return UVM_APERTURE_SYS;
}
UVM_ASSERT_MSG(false, "Invalid aperture value: %d\n", hw_aperture_value);
return UVM_APERTURE_MAX;
}
static uvm_gpu_address_t get_address(uvm_parent_gpu_t *parent_gpu, NvU32 *access_counter_entry)
{
NvU64 address;
bool is_virtual;
NvU64 addr_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_HI);
NvU64 addr_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_LO);
NvU32 addr_type_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_TYPE);
address = addr_lo + (addr_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, ADDR_LO));
is_virtual = (addr_type_value == NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GVA);
if (is_virtual) {
address = uvm_parent_gpu_canonical_address(parent_gpu, address);
return uvm_gpu_address_virtual(address);
}
else {
uvm_aperture_t aperture = get_access_counter_aperture(access_counter_entry);
UVM_ASSERT(parent_gpu->access_counters_can_use_physical_addresses);
UVM_ASSERT_MSG(addr_type_value == NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GPA,
"Invalid address type%u\n", addr_type_value);
return uvm_gpu_address_physical(aperture, address);
}
}
static uvm_access_counter_type_t get_access_counter_type(NvU32 *access_counter_entry)
{
NvU32 type_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, TYPE);
if (type_value == NVC365_NOTIFY_BUF_ENTRY_TYPE_CPU)
return UVM_ACCESS_COUNTER_TYPE_MOMC;
else
return UVM_ACCESS_COUNTER_TYPE_MIMC;
}
static NvU32 *get_access_counter_buffer_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index)
{
access_counter_buffer_entry_c365_t *buffer_start;
NvU32 *access_counter_entry;
UVM_ASSERT(index < parent_gpu->access_counter_buffer_info.max_notifications);
buffer_start = (access_counter_buffer_entry_c365_t *)parent_gpu->access_counter_buffer_info.rm_info.bufferAddress;
access_counter_entry = (NvU32 *)&buffer_start[index];
return access_counter_entry;
}
bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index)
{
NvU32 *access_counter_entry;
bool is_valid;
access_counter_entry = get_access_counter_buffer_entry(parent_gpu, index);
is_valid = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID);
return is_valid;
}
void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index)
{
NvU32 *access_counter_entry;
access_counter_entry = get_access_counter_buffer_entry(parent_gpu, index);
WRITE_HWCONST_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID, FALSE);
}
void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry)
{
NvU32 *access_counter_entry;
// Valid bit must be set before this function is called
UVM_ASSERT(uvm_hal_volta_access_counter_buffer_entry_is_valid(parent_gpu, index));
access_counter_entry = get_access_counter_buffer_entry(parent_gpu, index);
buffer_entry->counter_type = get_access_counter_type(access_counter_entry);
buffer_entry->address = get_address(parent_gpu, access_counter_entry);
if (buffer_entry->address.is_virtual) {
NvU64 inst_hi, inst_lo;
inst_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_HI);
inst_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_LO);
buffer_entry->virtual_info.instance_ptr.address =
inst_lo + (inst_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, INST_LO));
// HW value contains the 4K page number. Shift to build the full address
buffer_entry->virtual_info.instance_ptr.address <<= 12;
buffer_entry->virtual_info.instance_ptr.aperture = get_access_counter_inst_aperture(access_counter_entry);
buffer_entry->virtual_info.mmu_engine_id =
READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, MMU_ENGINE_ID);
buffer_entry->virtual_info.mmu_engine_type = UVM_MMU_ENGINE_TYPE_GRAPHICS;
// MMU engine id aligns with the fault buffer packets. Therefore, we
// reuse the helper to compute the VE ID from the fault buffer class.
buffer_entry->virtual_info.ve_id =
parent_gpu->fault_buffer_hal->get_ve_id(buffer_entry->virtual_info.mmu_engine_id,
buffer_entry->virtual_info.mmu_engine_type);
}
else if (buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC) {
// Ignore any set bit beyond 47 since it is the maximum physical address
// supported by the GPU. See the definition of
// uvm_gpu_t::dma_addressable_start for why higher bits might be set.
const NvU64 mask_46_0 = (0x1UL << 47) - 1;
buffer_entry->address.address &= mask_46_0;
}
buffer_entry->counter_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, COUNTER_VAL);
buffer_entry->sub_granularity = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, SUB_GRANULARITY);
buffer_entry->bank = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, BANK);
buffer_entry->tag = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, NOTIFY_TAG);
// Automatically clear valid bit for the entry in the access counter buffer
uvm_hal_volta_access_counter_buffer_entry_clear_valid(parent_gpu, index);
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -38,7 +38,7 @@ typedef struct {
NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu)
{
NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferPut);
NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferPut);
NvU32 index = READ_HWVALUE(put, _PFB_PRI_MMU, FAULT_BUFFER_PUT, PTR);
UVM_ASSERT(READ_HWVALUE(put, _PFB_PRI_MMU, FAULT_BUFFER_PUT, GETPTR_CORRUPTED) ==
NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_GETPTR_CORRUPTED_NO);
@@ -48,8 +48,8 @@ NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu)
NvU32 uvm_hal_volta_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu)
{
NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet);
UVM_ASSERT(get < parent_gpu->fault_buffer_info.replayable.max_faults);
NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet);
UVM_ASSERT(get < parent_gpu->fault_buffer.replayable.max_faults);
return READ_HWVALUE(get, _PFB_PRI_MMU, FAULT_BUFFER_GET, PTR);
}
@@ -58,7 +58,7 @@ void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 in
{
NvU32 get = HWVALUE(_PFB_PRI_MMU, FAULT_BUFFER_GET, PTR, index);
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
// If HW has detected an overflow condition (PUT == GET - 1 and a fault has
// arrived, which is dropped due to no more space in the fault buffer), it
@@ -70,7 +70,7 @@ void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 in
// resulting in the overflow condition being instantly reasserted. However,
// if the index is updated first and then the OVERFLOW bit is cleared such
// a collision will not cause a reassertion of the overflow condition.
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet, get);
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet, get);
// Clearing GETPTR_CORRUPTED and OVERFLOW is not needed when GSP-RM owns
// the HW replayable fault buffer, because UVM does not write to the actual
@@ -82,7 +82,7 @@ void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 in
// Clear the GETPTR_CORRUPTED and OVERFLOW bits.
get |= HWCONST(_PFB_PRI_MMU, FAULT_BUFFER_GET, GETPTR_CORRUPTED, CLEAR) |
HWCONST(_PFB_PRI_MMU, FAULT_BUFFER_GET, OVERFLOW, CLEAR);
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet, get);
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet, get);
}
// TODO: Bug 1835884: [uvm] Query the maximum number of subcontexts from RM
@@ -234,9 +234,9 @@ static NvU32 *get_fault_buffer_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index)
fault_buffer_entry_c369_t *buffer_start;
NvU32 *fault_entry;
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
buffer_start = (fault_buffer_entry_c369_t *)parent_gpu->fault_buffer_info.rm_info.replayable.bufferAddress;
buffer_start = (fault_buffer_entry_c369_t *)parent_gpu->fault_buffer.rm_info.replayable.bufferAddress;
fault_entry = (NvU32 *)&buffer_start[index];
return fault_entry;
@@ -247,10 +247,10 @@ static UvmFaultMetadataPacket *get_fault_buffer_entry_metadata(uvm_parent_gpu_t
{
UvmFaultMetadataPacket *fault_entry_metadata;
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
fault_entry_metadata = parent_gpu->fault_buffer_info.rm_info.replayable.bufferMetadata;
fault_entry_metadata = parent_gpu->fault_buffer.rm_info.replayable.bufferMetadata;
UVM_ASSERT(fault_entry_metadata != NULL);
return fault_entry_metadata + index;
@@ -359,7 +359,7 @@ static void parse_fault_entry_common(uvm_parent_gpu_t *parent_gpu,
UVM_ASSERT(gpc_utlb_id < parent_gpu->utlb_per_gpc_count);
utlb_id = buffer_entry->fault_source.gpc_id * parent_gpu->utlb_per_gpc_count + gpc_utlb_id;
UVM_ASSERT(utlb_id < parent_gpu->fault_buffer_info.replayable.utlb_count);
UVM_ASSERT(utlb_id < parent_gpu->fault_buffer.replayable.utlb_count);
buffer_entry->fault_source.utlb_id = utlb_id;
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2021 NVIDIA Corporation
Copyright (c) 2016-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -136,64 +136,6 @@ void uvm_hal_volta_host_clear_faulted_channel_method(uvm_push_t *push,
clear_type_value);
}
void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push)
{
NV_PUSH_4U(C36F, MEM_OP_A, 0,
MEM_OP_B, 0,
MEM_OP_C, 0,
MEM_OP_D, HWCONST(C36F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
HWCONST(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, ALL));
}
static NvU32 get_access_counter_type_value(uvm_access_counter_type_t type)
{
if (type == UVM_ACCESS_COUNTER_TYPE_MIMC)
return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC;
else if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC;
else
UVM_ASSERT_MSG(false, "Invalid access counter type %u\n", type);
return 0;
}
static NvU32 get_access_counter_targeted_type_value(uvm_access_counter_type_t type)
{
if (type == UVM_ACCESS_COUNTER_TYPE_MIMC)
return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC;
else if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC;
else
UVM_ASSERT_MSG(false, "Invalid access counter type %u\n", type);
return 0;
}
void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type)
{
NvU32 type_value = get_access_counter_type_value(type);
NV_PUSH_4U(C36F, MEM_OP_A, 0,
MEM_OP_B, 0,
MEM_OP_C, 0,
MEM_OP_D, HWCONST(C36F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
HWVALUE(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, type_value));
}
void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry)
{
NvU32 targeted_type_value = get_access_counter_targeted_type_value(buffer_entry->counter_type);
NV_PUSH_4U(C36F, MEM_OP_A, 0,
MEM_OP_B, 0,
MEM_OP_C, HWVALUE(C36F, MEM_OP_C, ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG, buffer_entry->tag),
MEM_OP_D, HWCONST(C36F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
HWCONST(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, TARGETED) |
HWVALUE(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_TYPE, targeted_type_value) |
HWVALUE(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_BANK, buffer_entry->bank));
}
void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,