550.40.07

This commit is contained in:
Bernhard Stoeckner
2024-01-24 17:51:53 +01:00
parent bb2dac1f20
commit 91676d6628
1411 changed files with 261367 additions and 145959 deletions

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -40,13 +40,13 @@ struct uvm_global_struct
// Note that GPUs are added to this mask as the last step of add_gpu() and
// removed from it as the first step of remove_gpu() implying that a GPU
// that's being initialized or deinitialized will not be in it.
uvm_global_processor_mask_t retained_gpus;
uvm_processor_mask_t retained_gpus;
// Array of the parent GPUs registered with UVM. Note that GPUs will have
// ids offset by 1 to accomodate the UVM_GLOBAL_ID_CPU so e.g.
// parent_gpus[0] will have GPU id = 1. A GPU entry is unused iff it does
// not exist (is a NULL pointer) in this table.
uvm_parent_gpu_t *parent_gpus[UVM_MAX_GPUS];
// ids offset by 1 to accomodate the UVM_ID_CPU so e.g., parent_gpus[0]
// will have GPU id = 1. A GPU entry is unused iff it does not exist
// (is a NULL pointer) in this table.
uvm_parent_gpu_t *parent_gpus[UVM_PARENT_ID_MAX_GPUS];
// A global RM session (RM client)
// Created on module load and destroyed on module unload
@@ -143,11 +143,16 @@ struct uvm_global_struct
struct page *page;
} unload_state;
// AMD Secure Encrypted Virtualization (SEV) status. True if VM has SEV
// enabled. This field is set once during global initialization
// (uvm_global_init), and can be read afterwards without acquiring any
// locks.
bool sev_enabled;
// True if the VM has AMD's SEV, or equivalent HW security extensions such
// as Intel's TDX, enabled. The flag is always false on the host.
//
// This value moves in tandem with that of Confidential Computing in the
// GPU(s) in all supported configurations, so it is used as a proxy for the
// Confidential Computing state.
//
// This field is set once during global initialization (uvm_global_init),
// and can be read afterwards without acquiring any locks.
bool conf_computing_enabled;
};
// Initialize global uvm state
@@ -167,7 +172,7 @@ NV_STATUS uvm_resume_entry(void);
// LOCKING: requires that you hold the global lock and gpu_table_lock
static void uvm_global_add_parent_gpu(uvm_parent_gpu_t *parent_gpu)
{
NvU32 gpu_index = uvm_id_gpu_index(parent_gpu->id);
NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
@@ -181,7 +186,7 @@ static void uvm_global_add_parent_gpu(uvm_parent_gpu_t *parent_gpu)
// LOCKING: requires that you hold the global lock and gpu_table_lock
static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
{
NvU32 gpu_index = uvm_id_gpu_index(parent_gpu->id);
NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
@@ -196,41 +201,25 @@ static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
//
// LOCKING: requires that you hold the gpu_table_lock, the global lock, or have
// retained at least one of the child GPUs.
static uvm_parent_gpu_t *uvm_parent_gpu_get(uvm_gpu_id_t id)
static uvm_parent_gpu_t *uvm_parent_gpu_get(uvm_parent_gpu_id_t id)
{
return g_uvm_global.parent_gpus[uvm_id_gpu_index(id)];
return g_uvm_global.parent_gpus[uvm_parent_id_gpu_index(id)];
}
// Get a gpu by its global id.
// Get a gpu by its GPU id.
// Returns a pointer to the GPU object, or NULL if not found.
//
// LOCKING: requires that you hold the gpu_table_lock, the global_lock, or have
// retained the gpu.
static uvm_gpu_t *uvm_gpu_get(uvm_global_gpu_id_t global_gpu_id)
static uvm_gpu_t *uvm_gpu_get(uvm_gpu_id_t gpu_id)
{
uvm_parent_gpu_t *parent_gpu;
parent_gpu = g_uvm_global.parent_gpus[uvm_id_gpu_index_from_global_gpu_id(global_gpu_id)];
parent_gpu = g_uvm_global.parent_gpus[uvm_parent_id_gpu_index_from_gpu_id(gpu_id)];
if (!parent_gpu)
return NULL;
return parent_gpu->gpus[uvm_global_id_sub_processor_index(global_gpu_id)];
}
// Get a gpu by its processor id.
// Returns a pointer to the GPU object, or NULL if not found.
//
// LOCKING: requires that you hold the gpu_table_lock, the global_lock, or have
// retained the gpu.
static uvm_gpu_t *uvm_gpu_get_by_processor_id(uvm_processor_id_t id)
{
uvm_global_gpu_id_t global_id = uvm_global_gpu_id_from_gpu_id(id);
uvm_gpu_t *gpu = uvm_gpu_get(global_id);
if (gpu)
UVM_ASSERT(!gpu->parent->smc.enabled);
return gpu;
return parent_gpu->gpus[uvm_id_sub_processor_index(gpu_id)];
}
static uvmGpuSessionHandle uvm_global_session_handle(void)
@@ -287,56 +276,57 @@ static NV_STATUS uvm_global_get_status(void)
// reset call was made.
NV_STATUS uvm_global_reset_fatal_error(void);
static uvm_gpu_t *uvm_global_processor_mask_find_first_gpu(const uvm_global_processor_mask_t *global_gpus)
static uvm_gpu_t *uvm_processor_mask_find_first_gpu(const uvm_processor_mask_t *gpus)
{
uvm_gpu_t *gpu;
uvm_global_gpu_id_t gpu_id = uvm_global_processor_mask_find_first_gpu_id(global_gpus);
uvm_gpu_id_t gpu_id = uvm_processor_mask_find_first_gpu_id(gpus);
if (UVM_GLOBAL_ID_IS_INVALID(gpu_id))
if (UVM_ID_IS_INVALID(gpu_id))
return NULL;
gpu = uvm_gpu_get(gpu_id);
// If there is valid GPU id in the mask, assert that the corresponding
// uvm_gpu_t is present. Otherwise it would stop a
// for_each_global_gpu_in_mask() loop pre-maturely. Today, this could only
// for_each_gpu_in_mask() loop pre-maturely. Today, this could only
// happen in remove_gpu() because the GPU being removed is deleted from the
// global table very early.
UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_global_id_value(gpu_id));
UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_id_value(gpu_id));
return gpu;
}
static uvm_gpu_t *__uvm_global_processor_mask_find_next_gpu(const uvm_global_processor_mask_t *global_gpus, uvm_gpu_t *gpu)
static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t *gpus, uvm_gpu_t *gpu)
{
uvm_global_gpu_id_t gpu_id;
uvm_gpu_id_t gpu_id;
UVM_ASSERT(gpu);
gpu_id = uvm_global_processor_mask_find_next_id(global_gpus, uvm_global_gpu_id_next(gpu->global_id));
if (UVM_GLOBAL_ID_IS_INVALID(gpu_id))
gpu_id = uvm_processor_mask_find_next_id(gpus, uvm_gpu_id_next(gpu->id));
if (UVM_ID_IS_INVALID(gpu_id))
return NULL;
gpu = uvm_gpu_get(gpu_id);
// See comment in uvm_global_processor_mask_find_first_gpu().
UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_global_id_value(gpu_id));
// See comment in uvm_processor_mask_find_first_gpu().
UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_id_value(gpu_id));
return gpu;
}
// Helper to iterate over all GPUs in the input mask
#define for_each_global_gpu_in_mask(gpu, global_mask) \
for (gpu = uvm_global_processor_mask_find_first_gpu(global_mask); \
gpu != NULL; \
gpu = __uvm_global_processor_mask_find_next_gpu(global_mask, gpu))
#define for_each_gpu_in_mask(gpu, mask) \
for (gpu = uvm_processor_mask_find_first_gpu(mask); \
gpu != NULL; \
gpu = __uvm_processor_mask_find_next_gpu(mask, gpu))
// Helper to iterate over all GPUs retained by the UVM driver (across all va spaces)
#define for_each_global_gpu(gpu) \
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
gpu = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);}); \
gpu != NULL; \
gpu = __uvm_global_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
// Helper to iterate over all GPUs retained by the UVM driver
// (across all va spaces).
#define for_each_gpu(gpu) \
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
gpu = uvm_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);}); \
gpu != NULL; \
gpu = __uvm_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
// LOCKING: Must hold either the global_lock or the gpu_table_lock
static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *parent_gpu)
@@ -344,7 +334,7 @@ static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *paren
NvU32 i;
if (parent_gpu) {
NvU32 gpu_index = uvm_id_gpu_index(parent_gpu->id);
NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
i = gpu_index + 1;
}
else {
@@ -353,7 +343,7 @@ static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *paren
parent_gpu = NULL;
while (i < UVM_MAX_GPUS) {
while (i < UVM_PARENT_ID_MAX_GPUS) {
if (g_uvm_global.parent_gpus[i]) {
parent_gpu = g_uvm_global.parent_gpus[i];
break;
@@ -369,18 +359,18 @@ static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *paren
static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent_gpu, uvm_gpu_t *cur_gpu)
{
uvm_gpu_t *gpu = NULL;
uvm_global_gpu_id_t global_gpu_id;
uvm_gpu_id_t gpu_id;
NvU32 sub_processor_index;
NvU32 cur_sub_processor_index;
UVM_ASSERT(parent_gpu);
global_gpu_id = uvm_global_gpu_id_from_gpu_id(parent_gpu->id);
cur_sub_processor_index = cur_gpu ? uvm_global_id_sub_processor_index(cur_gpu->global_id) : -1;
gpu_id = uvm_gpu_id_from_parent_gpu_id(parent_gpu->id);
cur_sub_processor_index = cur_gpu ? uvm_id_sub_processor_index(cur_gpu->id) : -1;
sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_ID_MAX_SUB_PROCESSORS, cur_sub_processor_index + 1);
if (sub_processor_index < UVM_ID_MAX_SUB_PROCESSORS) {
gpu = uvm_gpu_get(uvm_global_id_from_value(uvm_global_id_value(global_gpu_id) + sub_processor_index));
sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS, cur_sub_processor_index + 1);
if (sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS) {
gpu = uvm_gpu_get(uvm_id_from_value(uvm_id_value(gpu_id) + sub_processor_index));
UVM_ASSERT(gpu != NULL);
}
@@ -400,18 +390,18 @@ static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent
(gpu) != NULL; \
(gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), (gpu)))
// Helper which calls uvm_gpu_retain on each GPU in mask
void uvm_global_mask_retain(const uvm_global_processor_mask_t *mask);
// Helper which calls uvm_gpu_retain() on each GPU in mask.
void uvm_global_gpu_retain(const uvm_processor_mask_t *mask);
// Helper which calls uvm_gpu_release_locked on each GPU in mask.
//
// LOCKING: this function takes and releases the global lock if the input mask
// is not empty
void uvm_global_mask_release(const uvm_global_processor_mask_t *mask);
void uvm_global_gpu_release(const uvm_processor_mask_t *mask);
// Check for ECC errors for all GPUs in a mask
// Notably this check cannot be performed where it's not safe to call into RM.
NV_STATUS uvm_global_mask_check_ecc_error(uvm_global_processor_mask_t *gpus);
NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus);
// Pre-allocate fault service contexts.
NV_STATUS uvm_service_block_context_init(void);