550.40.07

This commit is contained in:
Bernhard Stoeckner
2024-01-24 17:51:53 +01:00
parent bb2dac1f20
commit 91676d6628
1411 changed files with 261367 additions and 145959 deletions

View File

@@ -57,14 +57,16 @@
typedef struct
{
// Number of faults from this uTLB that have been fetched but have not been serviced yet
// Number of faults from this uTLB that have been fetched but have not been
// serviced yet.
NvU32 num_pending_faults;
// Whether the uTLB contains fatal faults
bool has_fatal_faults;
// We have issued a replay of type START_ACK_ALL while containing fatal faults. This puts
// the uTLB in lockdown mode and no new translations are accepted
// We have issued a replay of type START_ACK_ALL while containing fatal
// faults. This puts the uTLB in lockdown mode and no new translations are
// accepted.
bool in_lockdown;
// We have issued a cancel on this uTLB
@@ -126,8 +128,8 @@ struct uvm_service_block_context_struct
struct list_head service_context_list;
// A mask of GPUs that need to be checked for ECC errors before the CPU
// fault handler returns, but after the VA space lock has been unlocked to
// avoid the RM/UVM VA space lock deadlocks.
// fault handler returns, but after the VA space lock has been unlocked
// to avoid the RM/UVM VA space lock deadlocks.
uvm_processor_mask_t gpus_to_check_for_ecc;
// This is set to throttle page fault thrashing.
@@ -160,9 +162,9 @@ struct uvm_service_block_context_struct
struct
{
// Per-processor mask with the pages that will be resident after servicing.
// We need one mask per processor because we may coalesce faults that
// trigger migrations to different processors.
// Per-processor mask with the pages that will be resident after
// servicing. We need one mask per processor because we may coalesce
// faults that trigger migrations to different processors.
uvm_page_mask_t new_residency;
} per_processor_masks[UVM_ID_MAX_PROCESSORS];
@@ -179,23 +181,28 @@ struct uvm_service_block_context_struct
typedef struct
{
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
// VMA. Used for batching ATS faults in a vma.
// VMA. Used for batching ATS faults in a vma. This is unused for access
// counter service requests.
uvm_page_mask_t read_fault_mask;
// Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
// SAM VMA. Used for batching ATS faults in a vma.
// SAM VMA. Used for batching ATS faults in a vma. This is unused for access
// counter service requests.
uvm_page_mask_t write_fault_mask;
// Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
// of a SAM VMA. Used to return ATS fault status.
// of a SAM VMA. Used to return ATS fault status. This is unused for access
// counter service requests.
uvm_page_mask_t faults_serviced_mask;
// Mask of successfully serviced read faults on pages in write_fault_mask.
// This is unused for access counter service requests.
uvm_page_mask_t reads_serviced_mask;
// Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
// SAM VMA. This is used as input to the prefetcher.
uvm_page_mask_t faulted_mask;
// Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
// VMA. This is used as input for access counter service requests and output
// of fault service requests.
uvm_page_mask_t accessed_mask;
// Client type of the service requestor.
uvm_fault_client_type_t client_type;
@@ -294,11 +301,8 @@ struct uvm_fault_service_batch_context_struct
struct uvm_ats_fault_invalidate_struct
{
// Whether the TLB batch contains any information
bool write_faults_in_batch;
// Batch of TLB entries to be invalidated
uvm_tlb_batch_t write_faults_tlb_batch;
bool tlb_batch_pending;
uvm_tlb_batch_t tlb_batch;
};
typedef struct
@@ -443,20 +447,9 @@ struct uvm_access_counter_service_batch_context_struct
NvU32 num_notifications;
// Boolean used to avoid sorting the fault batch by instance_ptr if we
// determine at fetch time that all the access counter notifications in the
// batch report the same instance_ptr
// determine at fetch time that all the access counter notifications in
// the batch report the same instance_ptr
bool is_single_instance_ptr;
// Scratch space, used to generate artificial physically addressed notifications.
// Virtual address notifications are always aligned to 64k. This means up to 16
// different physical locations could have been accessed to trigger one notification.
// The sub-granularity mask can correspond to any of them.
struct
{
uvm_processor_id_t resident_processors[16];
uvm_gpu_phys_address_t phys_addresses[16];
uvm_access_counter_buffer_entry_t phys_entry;
} scratch;
} virt;
struct
@@ -467,8 +460,8 @@ struct uvm_access_counter_service_batch_context_struct
NvU32 num_notifications;
// Boolean used to avoid sorting the fault batch by aperture if we
// determine at fetch time that all the access counter notifications in the
// batch report the same aperture
// determine at fetch time that all the access counter notifications in
// the batch report the same aperture
bool is_single_aperture;
} phys;
@@ -478,6 +471,9 @@ struct uvm_access_counter_service_batch_context_struct
// Structure used to coalesce access counter servicing in a VA block
uvm_service_block_context_t block_service_context;
// Structure used to service access counter migrations in an ATS block.
uvm_ats_fault_context_t ats_context;
// Unique id (per-GPU) generated for tools events recording
NvU32 batch_id;
};
@@ -610,10 +606,22 @@ typedef enum
UVM_GPU_PEER_COPY_MODE_COUNT
} uvm_gpu_peer_copy_mode_t;
// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
// partitions within the parent. The parent GPU and partition GPU have
// different "id" and "uuid".
struct uvm_gpu_struct
{
uvm_parent_gpu_t *parent;
// The gpu's GI uuid if SMC is enabled; otherwise, a copy of parent->uuid.
NvProcessorUuid uuid;
// Nice printable name in the format: ID: 999: UVM-GPU-<parent_uuid>.
// UVM_GPU_UUID_TEXT_BUFFER_LENGTH includes the null character.
char name[9 + UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
// Refcount of the gpu, i.e. how many times it has been retained. This is
// roughly a count of how many times it has been registered with a VA space,
// except that some paths retain the GPU temporarily without a VA space.
@@ -632,13 +640,9 @@ struct uvm_gpu_struct
// user can create a lot of va spaces and register the gpu with them).
atomic64_t retained_count;
// A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS); this is a copy
// of the parent's id.
// A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS).
uvm_gpu_id_t id;
// A unique uvm global_gpu id in range [1, UVM_GLOBAL_ID_MAX_PROCESSORS)
uvm_global_gpu_id_t global_id;
// Should be UVM_GPU_MAGIC_VALUE. Used for memory checking.
NvU64 magic;
@@ -664,8 +668,8 @@ struct uvm_gpu_struct
struct
{
// Big page size used by the internal UVM VA space
// Notably it may be different than the big page size used by a user's VA
// space in general.
// Notably it may be different than the big page size used by a user's
// VA space in general.
NvU32 internal_size;
} big_page;
@@ -691,8 +695,8 @@ struct uvm_gpu_struct
// lazily-populated array of peer GPUs, indexed by the peer's GPU index
uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];
// Leaf spinlock used to synchronize access to the peer_gpus table so that
// it can be safely accessed from the access counters bottom half
// Leaf spinlock used to synchronize access to the peer_gpus table so
// that it can be safely accessed from the access counters bottom half
uvm_spinlock_t peer_gpus_lock;
} peer_info;
@@ -852,6 +856,11 @@ struct uvm_gpu_struct
bool uvm_test_force_upper_pushbuffer_segment;
};
// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
// partitions within the parent. The parent GPU and partition GPU have
// different "id" and "uuid".
struct uvm_parent_gpu_struct
{
// Reference count for how many places are holding on to a parent GPU
@@ -864,11 +873,11 @@ struct uvm_parent_gpu_struct
// The number of uvm_gpu_ts referencing this uvm_parent_gpu_t.
NvU32 num_retained_gpus;
uvm_gpu_t *gpus[UVM_ID_MAX_SUB_PROCESSORS];
uvm_gpu_t *gpus[UVM_PARENT_ID_MAX_SUB_PROCESSORS];
// Bitmap of valid child entries in the gpus[] table. Used to retrieve a
// usable child GPU in bottom-halves.
DECLARE_BITMAP(valid_gpus, UVM_ID_MAX_SUB_PROCESSORS);
DECLARE_BITMAP(valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);
// The gpu's uuid
NvProcessorUuid uuid;
@@ -880,8 +889,8 @@ struct uvm_parent_gpu_struct
// hardware classes, etc.).
UvmGpuInfo rm_info;
// A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS)
uvm_gpu_id_t id;
// A unique uvm gpu id in range [1, UVM_PARENT_ID_MAX_PROCESSORS)
uvm_parent_gpu_id_t id;
// Reference to the Linux PCI device
//
@@ -916,12 +925,13 @@ struct uvm_parent_gpu_struct
// dma_addressable_start (in bifSetupDmaWindow_IMPL()) and hence when
// referencing sysmem from the GPU, dma_addressable_start should be
// subtracted from the physical address. The DMA mapping helpers like
// uvm_gpu_map_cpu_pages() and uvm_gpu_dma_alloc_page() take care of that.
// uvm_parent_gpu_map_cpu_pages() and uvm_parent_gpu_dma_alloc_page() take
// care of that.
NvU64 dma_addressable_start;
NvU64 dma_addressable_limit;
// Total size (in bytes) of physically mapped (with uvm_gpu_map_cpu_pages)
// sysmem pages, used for leak detection.
// Total size (in bytes) of physically mapped (with
// uvm_parent_gpu_map_cpu_pages) sysmem pages, used for leak detection.
atomic64_t mapped_cpu_pages_size;
// Hardware Abstraction Layer
@@ -940,7 +950,11 @@ struct uvm_parent_gpu_struct
// Virtualization mode of the GPU.
UVM_VIRT_MODE virt_mode;
// Whether the GPU can trigger faults on prefetch instructions
// Pascal+ GPUs can trigger faults on prefetch instructions. If false, this
// feature must be disabled at all times in GPUs of the given architecture.
// If true, the feature can be toggled at will by SW.
//
// The field should not be used unless the GPU supports replayable faults.
bool prefetch_fault_supported;
// Number of membars required to flush out HSHUB following a TLB invalidate
@@ -955,6 +969,11 @@ struct uvm_parent_gpu_struct
bool access_counters_supported;
// If this is true, physical address based access counter notifications are
// potentially generated. If false, only virtual address based notifications
// are generated (assuming access_counters_supported is true too).
bool access_counters_can_use_physical_addresses;
bool fault_cancel_va_supported;
// True if the GPU has hardware support for scoped atomics
@@ -981,6 +1000,10 @@ struct uvm_parent_gpu_struct
bool plc_supported;
// If true, page_tree initialization pre-populates no_ats_ranges. It only
// affects ATS systems.
bool no_ats_range_required;
// Parameters used by the TLB batching API
struct
{
@@ -1052,14 +1075,16 @@ struct uvm_parent_gpu_struct
// Interrupt handling state and locks
uvm_isr_info_t isr;
// Fault buffer info. This is only valid if supports_replayable_faults is set to true
// Fault buffer info. This is only valid if supports_replayable_faults is
// set to true.
uvm_fault_buffer_info_t fault_buffer_info;
// PMM lazy free processing queue.
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
nv_kthread_q_t lazy_free_q;
// Access counter buffer info. This is only valid if supports_access_counters is set to true
// Access counter buffer info. This is only valid if
// supports_access_counters is set to true.
uvm_access_counter_buffer_info_t access_counter_buffer_info;
// Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
@@ -1109,7 +1134,7 @@ struct uvm_parent_gpu_struct
uvm_rb_tree_t instance_ptr_table;
uvm_spinlock_t instance_ptr_table_lock;
// This is set to true if the GPU belongs to an SLI group. Else, set to false.
// This is set to true if the GPU belongs to an SLI group.
bool sli_enabled;
struct
@@ -1136,8 +1161,8 @@ struct uvm_parent_gpu_struct
// environment, rather than using the peer-id field of the PTE (which can
// only address 8 gpus), all gpus are assigned a 47-bit physical address
// space by the fabric manager. Any physical address access to these
// physical address spaces are routed through the switch to the corresponding
// peer.
// physical address spaces are routed through the switch to the
// corresponding peer.
struct
{
bool is_nvswitch_connected;
@@ -1175,9 +1200,14 @@ struct uvm_parent_gpu_struct
} smmu_war;
};
static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
{
return parent_gpu->name;
}
static const char *uvm_gpu_name(uvm_gpu_t *gpu)
{
return gpu->parent->name;
return gpu->name;
}
static const NvProcessorUuid *uvm_gpu_uuid(uvm_gpu_t *gpu)
@@ -1362,7 +1392,8 @@ void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
// They must not be the same gpu.
uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu);
// Get the processor id accessible by the given GPU for the given physical address
// Get the processor id accessible by the given GPU for the given physical
// address.
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
// Get the P2P capabilities between the gpus with the given indexes
@@ -1407,10 +1438,11 @@ static bool uvm_gpus_are_indirect_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
// mapping covering the passed address, has been previously created.
static uvm_gpu_address_t uvm_gpu_address_virtual_from_vidmem_phys(uvm_gpu_t *gpu, NvU64 pa)
{
UVM_ASSERT(uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu));
UVM_ASSERT(uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent) ||
uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent));
UVM_ASSERT(pa <= gpu->mem_info.max_allocatable_address);
if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu))
if (uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent))
UVM_ASSERT(gpu->static_flat_mapping.ready);
return uvm_gpu_address_virtual(gpu->parent->flat_vidmem_va_base + pa);
@@ -1422,12 +1454,12 @@ static uvm_gpu_address_t uvm_gpu_address_virtual_from_vidmem_phys(uvm_gpu_t *gpu
//
// The actual GPU mapping only exists if a linear mapping covering the passed
// address has been previously created.
static uvm_gpu_address_t uvm_gpu_address_virtual_from_sysmem_phys(uvm_gpu_t *gpu, NvU64 pa)
static uvm_gpu_address_t uvm_parent_gpu_address_virtual_from_sysmem_phys(uvm_parent_gpu_t *parent_gpu, NvU64 pa)
{
UVM_ASSERT(uvm_mmu_gpu_needs_dynamic_sysmem_mapping(gpu));
UVM_ASSERT(pa <= (gpu->parent->dma_addressable_limit - gpu->parent->dma_addressable_start));
UVM_ASSERT(uvm_mmu_parent_gpu_needs_dynamic_sysmem_mapping(parent_gpu));
UVM_ASSERT(pa <= (parent_gpu->dma_addressable_limit - parent_gpu->dma_addressable_start));
return uvm_gpu_address_virtual(gpu->parent->flat_sysmem_va_base + pa);
return uvm_gpu_address_virtual(parent_gpu->flat_sysmem_va_base + pa);
}
// Given a GPU or CPU physical address (not peer), retrieve an address suitable
@@ -1437,11 +1469,12 @@ static uvm_gpu_address_t uvm_gpu_address_copy(uvm_gpu_t *gpu, uvm_gpu_phys_addre
UVM_ASSERT(phys_addr.aperture == UVM_APERTURE_VID || phys_addr.aperture == UVM_APERTURE_SYS);
if (phys_addr.aperture == UVM_APERTURE_VID) {
if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu))
if (uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent) ||
uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent))
return uvm_gpu_address_virtual_from_vidmem_phys(gpu, phys_addr.address);
}
else if (uvm_mmu_gpu_needs_dynamic_sysmem_mapping(gpu)) {
return uvm_gpu_address_virtual_from_sysmem_phys(gpu, phys_addr.address);
else if (uvm_mmu_parent_gpu_needs_dynamic_sysmem_mapping(gpu->parent)) {
return uvm_parent_gpu_address_virtual_from_sysmem_phys(gpu->parent, phys_addr.address);
}
return uvm_gpu_address_from_phys(phys_addr);
@@ -1459,9 +1492,9 @@ NV_STATUS uvm_gpu_check_ecc_error(uvm_gpu_t *gpu);
// Check for ECC errors without calling into RM
//
// Calling into RM is problematic in many places, this check is always safe to do.
// Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error and
// it's required to call uvm_gpu_check_ecc_error() to be sure.
// Calling into RM is problematic in many places, this check is always safe to
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error
// and it's required to call uvm_gpu_check_ecc_error() to be sure.
NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
// Map size bytes of contiguous sysmem on the GPU for physical access
@@ -1470,19 +1503,19 @@ NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
//
// Returns the physical address of the pages that can be used to access them on
// the GPU.
NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
// Unmap num_pages pages previously mapped with uvm_parent_gpu_map_cpu_pages().
void uvm_parent_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
static NV_STATUS uvm_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
static NV_STATUS uvm_parent_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
{
return uvm_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
return uvm_parent_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
}
static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
static void uvm_parent_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
{
uvm_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
uvm_parent_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
}
// Allocate and map a page of system DMA memory on the GPU for physical access
@@ -1491,13 +1524,13 @@ static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addre
// - the address of the page that can be used to access them on
// the GPU in the dma_address_out parameter.
// - the address of allocated memory in CPU virtual address space.
void *uvm_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu,
gfp_t gfp_flags,
NvU64 *dma_address_out);
void *uvm_parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu,
gfp_t gfp_flags,
NvU64 *dma_address_out);
// Unmap and free size bytes of contiguous sysmem DMA previously allocated
// with uvm_gpu_map_cpu_pages().
void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address);
// with uvm_parent_gpu_map_cpu_pages().
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address);
// Returns whether the given range is within the GPU's addressable VA ranges.
// It requires the input 'addr' to be in canonical form for platforms compliant
@@ -1518,6 +1551,8 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
// The GPU must be initialized before calling this function.
bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
bool uvm_platform_uses_canonical_form_address(void);
// Returns addr's canonical form for host systems that use canonical form
// addresses.
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
@@ -1527,47 +1562,49 @@ static bool uvm_parent_gpu_is_coherent(const uvm_parent_gpu_t *parent_gpu)
return parent_gpu->system_bus.memory_window_end > parent_gpu->system_bus.memory_window_start;
}
static bool uvm_gpu_has_pushbuffer_segments(uvm_gpu_t *gpu)
static bool uvm_parent_gpu_needs_pushbuffer_segments(uvm_parent_gpu_t *parent_gpu)
{
return gpu->parent->max_host_va > (1ull << 40);
return parent_gpu->max_host_va > (1ull << 40);
}
static bool uvm_gpu_supports_eviction(uvm_gpu_t *gpu)
static bool uvm_parent_gpu_supports_eviction(uvm_parent_gpu_t *parent_gpu)
{
// Eviction is supported only if the GPU supports replayable faults
return gpu->parent->replayable_faults_supported;
return parent_gpu->replayable_faults_supported;
}
static bool uvm_gpu_is_virt_mode_sriov_heavy(const uvm_gpu_t *gpu)
static bool uvm_parent_gpu_is_virt_mode_sriov_heavy(const uvm_parent_gpu_t *parent_gpu)
{
return gpu->parent->virt_mode == UVM_VIRT_MODE_SRIOV_HEAVY;
return parent_gpu->virt_mode == UVM_VIRT_MODE_SRIOV_HEAVY;
}
static bool uvm_gpu_is_virt_mode_sriov_standard(const uvm_gpu_t *gpu)
static bool uvm_parent_gpu_is_virt_mode_sriov_standard(const uvm_parent_gpu_t *parent_gpu)
{
return gpu->parent->virt_mode == UVM_VIRT_MODE_SRIOV_STANDARD;
return parent_gpu->virt_mode == UVM_VIRT_MODE_SRIOV_STANDARD;
}
// Returns true if the virtualization mode is SR-IOV heavy or SR-IOV standard.
static bool uvm_gpu_is_virt_mode_sriov(const uvm_gpu_t *gpu)
static bool uvm_parent_gpu_is_virt_mode_sriov(const uvm_parent_gpu_t *parent_gpu)
{
return uvm_gpu_is_virt_mode_sriov_heavy(gpu) || uvm_gpu_is_virt_mode_sriov_standard(gpu);
return uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu) ||
uvm_parent_gpu_is_virt_mode_sriov_standard(parent_gpu);
}
static bool uvm_gpu_uses_proxy_channel_pool(const uvm_gpu_t *gpu)
static bool uvm_parent_gpu_needs_proxy_channel_pool(const uvm_parent_gpu_t *parent_gpu)
{
return uvm_gpu_is_virt_mode_sriov_heavy(gpu);
return uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu);
}
uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu);
uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu);
// Debug print of GPU properties
void uvm_gpu_print(uvm_gpu_t *gpu);
// Add the given instance pointer -> user_channel mapping to this GPU. The bottom
// half GPU page fault handler uses this to look up the VA space for GPU faults.
NV_STATUS uvm_gpu_add_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);
void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);
// Add the given instance pointer -> user_channel mapping to this GPU. The
// bottom half GPU page fault handler uses this to look up the VA space for GPU
// faults.
NV_STATUS uvm_parent_gpu_add_user_channel(uvm_parent_gpu_t *parent_gpu, uvm_user_channel_t *user_channel);
void uvm_parent_gpu_remove_user_channel(uvm_parent_gpu_t *parent_gpu, uvm_user_channel_t *user_channel);
// Looks up an entry added by uvm_gpu_add_user_channel. Return codes:
// NV_OK Translation successful
@@ -1578,13 +1615,13 @@ void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channe
// out_va_space is valid if NV_OK is returned, otherwise it's NULL. The caller
// is responsibile for ensuring that the returned va_space can't be destroyed,
// so these functions should only be called from the bottom half.
NV_STATUS uvm_gpu_fault_entry_to_va_space(uvm_gpu_t *gpu,
uvm_fault_buffer_entry_t *fault,
uvm_va_space_t **out_va_space);
NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
uvm_fault_buffer_entry_t *fault,
uvm_va_space_t **out_va_space);
NV_STATUS uvm_gpu_access_counter_entry_to_va_space(uvm_gpu_t *gpu,
uvm_access_counter_buffer_entry_t *entry,
uvm_va_space_t **out_va_space);
NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_buffer_entry_t *entry,
uvm_va_space_t **out_va_space);
typedef enum
{