570.123.07

This commit is contained in:
russellcnv
2025-03-25 12:40:01 -07:00
parent 5e6ad2b575
commit 4d941c0b6e
146 changed files with 53927 additions and 54744 deletions

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -70,8 +70,8 @@ typedef struct
struct
{
// Number of the bottom-half invocations for this interrupt on a GPU over
// its lifetime
// Number of the bottom-half invocations for this interrupt on a GPU
// over its lifetime.
NvU64 bottom_half_count;
// A bitmask of the CPUs on which the bottom half has executed. The
@@ -110,20 +110,20 @@ typedef struct
// bottom-half per interrupt type.
nv_kthread_q_t bottom_half_q;
// Protects the state of interrupts (enabled/disabled) and whether the GPU is
// currently handling them. Taken in both interrupt and process context.
// Protects the state of interrupts (enabled/disabled) and whether the GPU
// is currently handling them. Taken in both interrupt and process context.
uvm_spinlock_irqsave_t interrupts_lock;
uvm_intr_handler_t replayable_faults;
uvm_intr_handler_t non_replayable_faults;
uvm_intr_handler_t access_counters;
uvm_intr_handler_t *access_counters;
// Kernel thread used to kill channels on fatal non-replayable faults.
// This is needed because we cannot call into RM from the bottom-half to
// avoid deadlocks.
nv_kthread_q_t kill_channel_q;
// Number of top-half ISRs called for this GPU over its lifetime
// Number of top-half ISRs called for this GPU over its lifetime.
NvU64 interrupt_count;
} uvm_isr_info_t;
@@ -133,7 +133,7 @@ NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid);
// Initialize ISR handling state
NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);
// Flush any currently scheduled bottom halves. This is called during GPU
// Flush any currently scheduled bottom halves. This is called during GPU
// removal.
void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);
@@ -146,7 +146,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
// Take parent_gpu->isr.replayable_faults.service_lock from a non-top/bottom
// half thread. This will also disable replayable page fault interrupts (if
// half thread. This will also disable replayable page fault interrupts (if
// supported by the GPU) because the top half attempts to take this lock, and we
// would cause an interrupt storm if we didn't disable them first.
//
@@ -154,49 +154,48 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
// Unlock parent_gpu->isr.replayable_faults.service_lock. This call may
// re-enable replayable page fault interrupts. Unlike
// uvm_parent_gpu_replayable_faults_isr_lock(), which should only called from
// re-enable replayable page fault interrupts. Unlike
// uvm_parent_gpu_replayable_faults_isr_lock(), which should only be called from
// non-top/bottom half threads, this can be called by any thread.
void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
// Lock/unlock routines for non-replayable faults. These do not need to prevent
// interrupt storms since the GPU fault buffers for non-replayable faults are
// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
// under the parent need to have been previously retained.
void uvm_parent_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
// See uvm_parent_gpu_replayable_faults_isr_lock/unlock
void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters);
void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters);
// Increments the reference count tracking whether access counter interrupts
// should be disabled. The caller is guaranteed that access counter interrupts
// are disabled upon return. Interrupts might already be disabled prior to
// making this call. Each call is ref-counted, so this must be paired with a
// call to uvm_parent_gpu_access_counters_intr_enable().
// call to uvm_access_counters_intr_enable().
//
// parent_gpu->isr.interrupts_lock must be held to call this function.
void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters);
// Decrements the reference count tracking whether access counter interrupts
// should be disabled. Only once the count reaches 0 are the HW interrupts
// actually enabled, so this call does not guarantee that the interrupts have
// been re-enabled upon return.
//
// uvm_parent_gpu_access_counters_intr_disable() must have been called prior to
// calling this function.
// uvm_access_counters_intr_disable() must have been called prior to calling
// this function.
//
// NOTE: For pulse-based interrupts, the caller is responsible for re-arming
// the interrupt.
//
// parent_gpu->isr.interrupts_lock must be held to call this function.
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters);
// Return the first valid GPU given the parent GPU or NULL if no MIG instances
// are registered. This should only be called from bottom halves or if the
// g_uvm_global.global_lock is held so that the returned pointer remains valid.
//
uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu);
#endif // __UVM_GPU_ISR_H__