545.23.06

This commit is contained in:
Andy Ritger
2023-10-17 09:25:29 -07:00
parent f59818b751
commit b5bf85a8e3
917 changed files with 132480 additions and 110015 deletions

View File

@@ -1180,7 +1180,11 @@ static void mark_fault_fatal(uvm_fault_service_batch_context_t *batch_context,
fault_entry->replayable.cancel_va_mode = cancel_va_mode;
utlb->has_fatal_faults = true;
batch_context->has_fatal_faults = true;
if (!batch_context->fatal_va_space) {
UVM_ASSERT(fault_entry->va_space);
batch_context->fatal_va_space = fault_entry->va_space;
}
}
static void fault_entry_duplicate_flags(uvm_fault_service_batch_context_t *batch_context,
@@ -1230,7 +1234,7 @@ static uvm_fault_access_type_t check_fault_access_permissions(uvm_gpu_t *gpu,
UvmEventFatalReason fatal_reason;
uvm_fault_cancel_va_mode_t cancel_va_mode;
uvm_fault_access_type_t ret = UVM_FAULT_ACCESS_TYPE_COUNT;
uvm_va_block_context_t *va_block_context = &service_block_context->block_context;
uvm_va_block_context_t *va_block_context = service_block_context->block_context;
perm_status = uvm_va_block_check_logical_permissions(va_block,
va_block_context,
@@ -1345,7 +1349,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
if (uvm_va_block_is_hmm(va_block)) {
policy = uvm_hmm_find_policy_end(va_block,
block_context->block_context.hmm.vma,
block_context->block_context->hmm.vma,
ordered_fault_cache[first_fault_index]->fault_address,
&end);
}
@@ -1469,7 +1473,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
// Compute new residency and update the masks
new_residency = uvm_va_block_select_residency(va_block,
&block_context->block_context,
block_context->block_context,
page_index,
gpu->id,
service_access_type_mask,
@@ -1511,8 +1515,8 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
++block_context->num_retries;
if (status == NV_OK && batch_context->has_fatal_faults)
status = uvm_va_block_set_cancel(va_block, &block_context->block_context, gpu);
if (status == NV_OK && batch_context->fatal_va_space)
status = uvm_va_block_set_cancel(va_block, block_context->block_context, gpu);
return status;
}
@@ -1860,7 +1864,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
uvm_va_block_t *va_block;
uvm_gpu_t *gpu = gpu_va_space->gpu;
uvm_va_block_context_t *va_block_context =
&gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[fault_index];
struct mm_struct *mm = va_block_context->mm;
NvU64 fault_address = current_entry->fault_address;
@@ -1937,14 +1941,198 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
return status;
}
// Called when a fault in the batch has been marked fatal. Flush the buffer
// under the VA and mmap locks to remove any potential stale fatal faults, then
// service all new faults for just that VA space and cancel those which are
// fatal. Faults in other VA spaces are replayed when done and will be processed
// when normal fault servicing resumes.
static NV_STATUS service_fault_batch_for_cancel(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
{
NV_STATUS status = NV_OK;
NvU32 i;
uvm_va_space_t *va_space = batch_context->fatal_va_space;
uvm_gpu_va_space_t *gpu_va_space = NULL;
struct mm_struct *mm;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.replayable.block_service_context;
uvm_va_block_context_t *va_block_context = service_context->block_context;
UVM_ASSERT(gpu->parent->replayable_faults_supported);
UVM_ASSERT(va_space);
// Perform the flush and re-fetch while holding the mmap_lock and the
// VA space lock. This avoids stale faults because it prevents any vma
// modifications (mmap, munmap, mprotect) from happening between the time HW
// takes the fault and we cancel it.
mm = uvm_va_space_mm_retain_lock(va_space);
uvm_va_block_context_init(va_block_context, mm);
uvm_va_space_down_read(va_space);
// We saw fatal faults in this VA space before. Flush while holding
// mmap_lock to make sure those faults come back (aren't stale).
//
// We need to wait until all old fault messages have arrived before
// flushing, hence UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT.
status = fault_buffer_flush_locked(gpu,
UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
UVM_FAULT_REPLAY_TYPE_START,
batch_context);
if (status != NV_OK)
goto done;
// Wait for the flush's replay to finish to give the legitimate faults a
// chance to show up in the buffer again.
status = uvm_tracker_wait(&replayable_faults->replay_tracker);
if (status != NV_OK)
goto done;
// We expect all replayed faults to have arrived in the buffer so we can re-
// service them. The replay-and-wait sequence above will ensure they're all
// in the HW buffer. When GSP owns the HW buffer, we also have to wait for
// GSP to copy all available faults from the HW buffer into the shadow
// buffer.
//
// TODO: Bug 2533557: This flush does not actually guarantee that GSP will
// copy over all faults.
status = hw_fault_buffer_flush_locked(gpu->parent);
if (status != NV_OK)
goto done;
// If there is no GPU VA space for the GPU, ignore all faults in the VA
// space. This can happen if the GPU VA space has been destroyed since we
// unlocked the VA space in service_fault_batch. That means the fatal faults
// are stale, because unregistering the GPU VA space requires preempting the
// context and detaching all channels in that VA space. Restart fault
// servicing from the top.
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
if (!gpu_va_space)
goto done;
// Re-parse the new faults
batch_context->num_invalid_prefetch_faults = 0;
batch_context->num_duplicate_faults = 0;
batch_context->num_replays = 0;
batch_context->fatal_va_space = NULL;
batch_context->has_throttled_faults = false;
status = fetch_fault_buffer_entries(gpu, batch_context, FAULT_FETCH_MODE_ALL);
if (status != NV_OK)
goto done;
// No more faults left. Either the previously-seen fatal entry was stale, or
// RM killed the context underneath us.
if (batch_context->num_cached_faults == 0)
goto done;
++batch_context->batch_id;
status = preprocess_fault_batch(gpu, batch_context);
if (status != NV_OK) {
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
// Another flush happened due to stale faults or a context-fatal
// error. The previously-seen fatal fault might not exist anymore,
// so restart fault servicing from the top.
status = NV_OK;
}
goto done;
}
// Search for the target VA space
for (i = 0; i < batch_context->num_coalesced_faults; i++) {
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
UVM_ASSERT(current_entry->va_space);
if (current_entry->va_space == va_space)
break;
}
while (i < batch_context->num_coalesced_faults) {
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
if (current_entry->va_space != va_space)
break;
// service_fault_batch_dispatch() doesn't expect unserviceable faults.
// Just cancel them directly.
if (current_entry->is_fatal) {
status = cancel_fault_precise_va(gpu, current_entry, UVM_FAULT_CANCEL_VA_MODE_ALL);
if (status != NV_OK)
break;
++i;
}
else {
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
NvU32 block_faults;
ats_invalidate->write_faults_in_batch = false;
uvm_hmm_service_context_init(service_context);
// Service all the faults that we can. We only really need to search
// for fatal faults, but attempting to service all is the easiest
// way to do that.
status = service_fault_batch_dispatch(va_space, gpu_va_space, batch_context, i, &block_faults, false);
if (status != NV_OK) {
// TODO: Bug 3900733: clean up locking in service_fault_batch().
// We need to drop lock and retry. That means flushing and
// starting over.
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
status = NV_OK;
break;
}
// Invalidate TLBs before cancel to ensure that fatal faults don't
// get stuck in HW behind non-fatal faults to the same line.
status = uvm_ats_invalidate_tlbs(gpu_va_space, ats_invalidate, &batch_context->tracker);
if (status != NV_OK)
break;
while (block_faults-- > 0) {
current_entry = batch_context->ordered_fault_cache[i];
if (current_entry->is_fatal) {
status = cancel_fault_precise_va(gpu, current_entry, current_entry->replayable.cancel_va_mode);
if (status != NV_OK)
break;
}
++i;
}
}
}
done:
uvm_va_space_up_read(va_space);
uvm_va_space_mm_release_unlock(va_space, mm);
if (status == NV_OK) {
// There are two reasons to flush the fault buffer here.
//
// 1) Functional. We need to replay both the serviced non-fatal faults
// and the skipped faults in other VA spaces. The former need to be
// restarted and the latter need to be replayed so the normal fault
// service mechanism can fetch and process them.
//
// 2) Performance. After cancelling the fatal faults, a flush removes
// any potential duplicated fault that may have been added while
// processing the faults in this batch. This flush also avoids doing
// unnecessary processing after the fatal faults have been cancelled,
// so all the rest are unlikely to remain after a replay because the
// context is probably in the process of dying.
status = fault_buffer_flush_locked(gpu,
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
UVM_FAULT_REPLAY_TYPE_START,
batch_context);
}
return status;
}
// Scan the ordered view of faults and group them by different va_blocks
// (managed faults) and service faults for each va_block, in batch.
// Service non-managed faults one at a time as they are encountered during the
// scan.
//
// This function returns NV_WARN_MORE_PROCESSING_REQUIRED if the fault buffer
// was flushed because the needs_fault_buffer_flush flag was set on some GPU VA
// space
// Fatal faults are marked for later processing by the caller.
static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
fault_service_mode_t service_mode,
uvm_fault_service_batch_context_t *batch_context)
@@ -1959,7 +2147,7 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
uvm_service_block_context_t *service_context =
&gpu->parent->fault_buffer_info.replayable.block_service_context;
uvm_va_block_context_t *va_block_context = &service_context->block_context;
uvm_va_block_context_t *va_block_context = service_context->block_context;
UVM_ASSERT(gpu->parent->replayable_faults_supported);
@@ -1995,41 +2183,28 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
// to remain valid until we release. If no mm is registered, we
// can only service managed faults, not ATS/HMM faults.
mm = uvm_va_space_mm_retain_lock(va_space);
va_block_context->mm = mm;
uvm_va_block_context_init(va_block_context, mm);
uvm_va_space_down_read(va_space);
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
if (uvm_processor_mask_test_and_clear_atomic(&va_space->needs_fault_buffer_flush, gpu->id)) {
status = fault_buffer_flush_locked(gpu,
UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
UVM_FAULT_REPLAY_TYPE_START,
batch_context);
if (status == NV_OK)
status = NV_WARN_MORE_PROCESSING_REQUIRED;
break;
}
// The case where there is no valid GPU VA space for the GPU in this
// VA space is handled next
}
// Some faults could be already fatal if they cannot be handled by
// the UVM driver
if (current_entry->is_fatal) {
++i;
batch_context->has_fatal_faults = true;
if (!batch_context->fatal_va_space)
batch_context->fatal_va_space = va_space;
utlb->has_fatal_faults = true;
UVM_ASSERT(utlb->num_pending_faults > 0);
continue;
}
if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id)) {
if (!gpu_va_space) {
// If there is no GPU VA space for the GPU, ignore the fault. This
// can happen if a GPU VA space is destroyed without explicitly
// freeing all memory ranges (destroying the VA range triggers a
// flush of the fault buffer) and there are stale entries in the
// freeing all memory ranges and there are stale entries in the
// buffer that got fixed by the servicing in a previous batch.
++i;
continue;
@@ -2057,7 +2232,7 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
i += block_faults;
// Don't issue replays in cancel mode
if (replay_per_va_block && !batch_context->has_fatal_faults) {
if (replay_per_va_block && !batch_context->fatal_va_space) {
status = push_replay_on_gpu(gpu, UVM_FAULT_REPLAY_TYPE_START, batch_context);
if (status != NV_OK)
goto fail;
@@ -2069,8 +2244,6 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
}
}
// Only clobber status if invalidate_status != NV_OK, since status may also
// contain NV_WARN_MORE_PROCESSING_REQUIRED.
if (va_space != NULL) {
NV_STATUS invalidate_status = uvm_ats_invalidate_tlbs(gpu_va_space, ats_invalidate, &batch_context->tracker);
if (invalidate_status != NV_OK)
@@ -2278,64 +2451,6 @@ static NvU32 is_fatal_fault_in_buffer(uvm_fault_service_batch_context_t *batch_c
return false;
}
// Cancel just the faults flagged as fatal in the given fault service batch
// context.
static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
{
NV_STATUS status = NV_OK;
NV_STATUS fault_status;
uvm_va_space_t *va_space = NULL;
NvU32 i;
UVM_ASSERT(gpu->parent->fault_cancel_va_supported);
for (i = 0; i < batch_context->num_coalesced_faults; ++i) {
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
UVM_ASSERT(current_entry->va_space);
if (current_entry->va_space != va_space) {
// Fault on a different va_space, drop the lock of the old one...
if (va_space != NULL)
uvm_va_space_up_read(va_space);
va_space = current_entry->va_space;
// ... and take the lock of the new one
uvm_va_space_down_read(va_space);
// We don't need to check whether a buffer flush is required
// (due to VA range destruction). Once a fault is flagged as fatal
// we need to cancel it, even if its VA range no longer exists.
}
// See the comment for the same check in cancel_faults_all
if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id))
continue;
if (current_entry->is_fatal) {
status = cancel_fault_precise_va(gpu, current_entry, current_entry->replayable.cancel_va_mode);
if (status != NV_OK)
break;
}
}
if (va_space != NULL)
uvm_va_space_up_read(va_space);
// See the comment on flushing in cancel_faults_all
fault_status = fault_buffer_flush_locked(gpu,
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
UVM_FAULT_REPLAY_TYPE_START,
batch_context);
// We report the first encountered error.
if (status == NV_OK)
status = fault_status;
return status;
}
// Cancel all faults in the given fault service batch context, even those not
// marked as fatal.
static NV_STATUS cancel_faults_all(uvm_gpu_t *gpu,
@@ -2344,56 +2459,51 @@ static NV_STATUS cancel_faults_all(uvm_gpu_t *gpu,
{
NV_STATUS status = NV_OK;
NV_STATUS fault_status;
uvm_va_space_t *va_space = NULL;
NvU32 i;
NvU32 i = 0;
UVM_ASSERT(gpu->parent->fault_cancel_va_supported);
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
for (i = 0; i < batch_context->num_coalesced_faults; ++i) {
while (i < batch_context->num_coalesced_faults && status == NV_OK) {
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
uvm_fault_cancel_va_mode_t cancel_va_mode;
uvm_va_space_t *va_space = current_entry->va_space;
bool skip_va_space;
UVM_ASSERT(current_entry->va_space);
UVM_ASSERT(va_space);
if (current_entry->va_space != va_space) {
// Fault on a different va_space, drop the lock of the old one...
if (va_space != NULL)
uvm_va_space_up_read(va_space);
uvm_va_space_down_read(va_space);
va_space = current_entry->va_space;
// If there is no GPU VA space for the GPU, ignore all faults in
// that VA space. This can happen if the GPU VA space has been
// destroyed since we unlocked the VA space in service_fault_batch.
// Ignoring the fault avoids targetting a PDB that might have been
// reused by another process.
skip_va_space = !uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
// ... and take the lock of the new one
uvm_va_space_down_read(va_space);
for (;
i < batch_context->num_coalesced_faults && current_entry->va_space == va_space;
current_entry = batch_context->ordered_fault_cache[++i]) {
uvm_fault_cancel_va_mode_t cancel_va_mode;
if (skip_va_space)
continue;
if (current_entry->is_fatal) {
UVM_ASSERT(current_entry->fatal_reason != UvmEventFatalReasonInvalid);
cancel_va_mode = current_entry->replayable.cancel_va_mode;
}
else {
current_entry->fatal_reason = reason;
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
}
status = cancel_fault_precise_va(gpu, current_entry, cancel_va_mode);
if (status != NV_OK)
break;
}
if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id)) {
// If there is no GPU VA space for the GPU, ignore the fault.
// This can happen if the GPU VA did not exist in
// service_fault_batch(), or it was destroyed since then.
// This is to avoid targetting a PDB that might have been reused
// by another process.
continue;
}
// If the fault was already marked fatal, use its reason and cancel
// mode. Otherwise use the provided reason.
if (current_entry->is_fatal) {
UVM_ASSERT(current_entry->fatal_reason != UvmEventFatalReasonInvalid);
cancel_va_mode = current_entry->replayable.cancel_va_mode;
}
else {
current_entry->fatal_reason = reason;
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
}
status = cancel_fault_precise_va(gpu, current_entry, cancel_va_mode);
if (status != NV_OK)
break;
}
if (va_space != NULL)
uvm_va_space_up_read(va_space);
}
// Because each cancel itself triggers a replay, there may be a large number
// of new duplicated faults in the buffer after cancelling all the known
@@ -2537,7 +2647,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
batch_context->num_invalid_prefetch_faults = 0;
batch_context->num_replays = 0;
batch_context->has_fatal_faults = false;
batch_context->fatal_va_space = NULL;
batch_context->has_throttled_faults = false;
// 5) Fetch all faults from buffer
@@ -2584,9 +2694,6 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
// 8) Service all non-fatal faults and mark all non-serviceable faults
// as fatal
status = service_fault_batch(gpu, FAULT_SERVICE_MODE_CANCEL, batch_context);
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
continue;
UVM_ASSERT(batch_context->num_replays == 0);
if (status == NV_ERR_NO_MEMORY)
continue;
@@ -2594,7 +2701,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
break;
// No more fatal faults left, we are done
if (!batch_context->has_fatal_faults)
if (!batch_context->fatal_va_space)
break;
// 9) Search for uTLBs that contain fatal faults and meet the
@@ -2616,9 +2723,9 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
static NV_STATUS cancel_faults_precise(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
{
UVM_ASSERT(batch_context->has_fatal_faults);
UVM_ASSERT(batch_context->fatal_va_space);
if (gpu->parent->fault_cancel_va_supported)
return cancel_faults_precise_va(gpu, batch_context);
return service_fault_batch_for_cancel(gpu, batch_context);
return cancel_faults_precise_tlb(gpu, batch_context);
}
@@ -2674,7 +2781,7 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
batch_context->num_invalid_prefetch_faults = 0;
batch_context->num_duplicate_faults = 0;
batch_context->num_replays = 0;
batch_context->has_fatal_faults = false;
batch_context->fatal_va_space = NULL;
batch_context->has_throttled_faults = false;
status = fetch_fault_buffer_entries(gpu, batch_context, FAULT_FETCH_MODE_BATCH_READY);
@@ -2702,9 +2809,6 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
// was flushed
num_replays += batch_context->num_replays;
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
continue;
enable_disable_prefetch_faults(gpu->parent, batch_context);
if (status != NV_OK) {
@@ -2718,10 +2822,17 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
break;
}
if (batch_context->has_fatal_faults) {
if (batch_context->fatal_va_space) {
status = uvm_tracker_wait(&batch_context->tracker);
if (status == NV_OK)
if (status == NV_OK) {
status = cancel_faults_precise(gpu, batch_context);
if (status == NV_OK) {
// Cancel handling should've issued at least one replay
UVM_ASSERT(batch_context->num_replays > 0);
++num_batches;
continue;
}
}
break;
}