mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-02-04 07:10:19 +00:00
550.40.07
This commit is contained in:
@@ -57,14 +57,16 @@
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Number of faults from this uTLB that have been fetched but have not been serviced yet
|
||||
// Number of faults from this uTLB that have been fetched but have not been
|
||||
// serviced yet.
|
||||
NvU32 num_pending_faults;
|
||||
|
||||
// Whether the uTLB contains fatal faults
|
||||
bool has_fatal_faults;
|
||||
|
||||
// We have issued a replay of type START_ACK_ALL while containing fatal faults. This puts
|
||||
// the uTLB in lockdown mode and no new translations are accepted
|
||||
// We have issued a replay of type START_ACK_ALL while containing fatal
|
||||
// faults. This puts the uTLB in lockdown mode and no new translations are
|
||||
// accepted.
|
||||
bool in_lockdown;
|
||||
|
||||
// We have issued a cancel on this uTLB
|
||||
@@ -126,8 +128,8 @@ struct uvm_service_block_context_struct
|
||||
struct list_head service_context_list;
|
||||
|
||||
// A mask of GPUs that need to be checked for ECC errors before the CPU
|
||||
// fault handler returns, but after the VA space lock has been unlocked to
|
||||
// avoid the RM/UVM VA space lock deadlocks.
|
||||
// fault handler returns, but after the VA space lock has been unlocked
|
||||
// to avoid the RM/UVM VA space lock deadlocks.
|
||||
uvm_processor_mask_t gpus_to_check_for_ecc;
|
||||
|
||||
// This is set to throttle page fault thrashing.
|
||||
@@ -160,9 +162,9 @@ struct uvm_service_block_context_struct
|
||||
|
||||
struct
|
||||
{
|
||||
// Per-processor mask with the pages that will be resident after servicing.
|
||||
// We need one mask per processor because we may coalesce faults that
|
||||
// trigger migrations to different processors.
|
||||
// Per-processor mask with the pages that will be resident after
|
||||
// servicing. We need one mask per processor because we may coalesce
|
||||
// faults that trigger migrations to different processors.
|
||||
uvm_page_mask_t new_residency;
|
||||
} per_processor_masks[UVM_ID_MAX_PROCESSORS];
|
||||
|
||||
@@ -179,23 +181,28 @@ struct uvm_service_block_context_struct
|
||||
typedef struct
|
||||
{
|
||||
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
|
||||
// VMA. Used for batching ATS faults in a vma.
|
||||
// VMA. Used for batching ATS faults in a vma. This is unused for access
|
||||
// counter service requests.
|
||||
uvm_page_mask_t read_fault_mask;
|
||||
|
||||
// Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
|
||||
// SAM VMA. Used for batching ATS faults in a vma.
|
||||
// SAM VMA. Used for batching ATS faults in a vma. This is unused for access
|
||||
// counter service requests.
|
||||
uvm_page_mask_t write_fault_mask;
|
||||
|
||||
// Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. Used to return ATS fault status.
|
||||
// of a SAM VMA. Used to return ATS fault status. This is unused for access
|
||||
// counter service requests.
|
||||
uvm_page_mask_t faults_serviced_mask;
|
||||
|
||||
// Mask of successfully serviced read faults on pages in write_fault_mask.
|
||||
// This is unused for access counter service requests.
|
||||
uvm_page_mask_t reads_serviced_mask;
|
||||
|
||||
// Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
|
||||
// SAM VMA. This is used as input to the prefetcher.
|
||||
uvm_page_mask_t faulted_mask;
|
||||
// Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
|
||||
// VMA. This is used as input for access counter service requests and output
|
||||
// of fault service requests.
|
||||
uvm_page_mask_t accessed_mask;
|
||||
|
||||
// Client type of the service requestor.
|
||||
uvm_fault_client_type_t client_type;
|
||||
@@ -294,11 +301,8 @@ struct uvm_fault_service_batch_context_struct
|
||||
|
||||
struct uvm_ats_fault_invalidate_struct
|
||||
{
|
||||
// Whether the TLB batch contains any information
|
||||
bool write_faults_in_batch;
|
||||
|
||||
// Batch of TLB entries to be invalidated
|
||||
uvm_tlb_batch_t write_faults_tlb_batch;
|
||||
bool tlb_batch_pending;
|
||||
uvm_tlb_batch_t tlb_batch;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
@@ -443,20 +447,9 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
NvU32 num_notifications;
|
||||
|
||||
// Boolean used to avoid sorting the fault batch by instance_ptr if we
|
||||
// determine at fetch time that all the access counter notifications in the
|
||||
// batch report the same instance_ptr
|
||||
// determine at fetch time that all the access counter notifications in
|
||||
// the batch report the same instance_ptr
|
||||
bool is_single_instance_ptr;
|
||||
|
||||
// Scratch space, used to generate artificial physically addressed notifications.
|
||||
// Virtual address notifications are always aligned to 64k. This means up to 16
|
||||
// different physical locations could have been accessed to trigger one notification.
|
||||
// The sub-granularity mask can correspond to any of them.
|
||||
struct
|
||||
{
|
||||
uvm_processor_id_t resident_processors[16];
|
||||
uvm_gpu_phys_address_t phys_addresses[16];
|
||||
uvm_access_counter_buffer_entry_t phys_entry;
|
||||
} scratch;
|
||||
} virt;
|
||||
|
||||
struct
|
||||
@@ -467,8 +460,8 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
NvU32 num_notifications;
|
||||
|
||||
// Boolean used to avoid sorting the fault batch by aperture if we
|
||||
// determine at fetch time that all the access counter notifications in the
|
||||
// batch report the same aperture
|
||||
// determine at fetch time that all the access counter notifications in
|
||||
// the batch report the same aperture
|
||||
bool is_single_aperture;
|
||||
} phys;
|
||||
|
||||
@@ -478,6 +471,9 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
// Structure used to coalesce access counter servicing in a VA block
|
||||
uvm_service_block_context_t block_service_context;
|
||||
|
||||
// Structure used to service access counter migrations in an ATS block.
|
||||
uvm_ats_fault_context_t ats_context;
|
||||
|
||||
// Unique id (per-GPU) generated for tools events recording
|
||||
NvU32 batch_id;
|
||||
};
|
||||
@@ -610,10 +606,22 @@ typedef enum
|
||||
UVM_GPU_PEER_COPY_MODE_COUNT
|
||||
} uvm_gpu_peer_copy_mode_t;
|
||||
|
||||
// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
|
||||
// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
|
||||
// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
|
||||
// partitions within the parent. The parent GPU and partition GPU have
|
||||
// different "id" and "uuid".
|
||||
struct uvm_gpu_struct
|
||||
{
|
||||
uvm_parent_gpu_t *parent;
|
||||
|
||||
// The gpu's GI uuid if SMC is enabled; otherwise, a copy of parent->uuid.
|
||||
NvProcessorUuid uuid;
|
||||
|
||||
// Nice printable name in the format: ID: 999: UVM-GPU-<parent_uuid>.
|
||||
// UVM_GPU_UUID_TEXT_BUFFER_LENGTH includes the null character.
|
||||
char name[9 + UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
|
||||
// Refcount of the gpu, i.e. how many times it has been retained. This is
|
||||
// roughly a count of how many times it has been registered with a VA space,
|
||||
// except that some paths retain the GPU temporarily without a VA space.
|
||||
@@ -632,13 +640,9 @@ struct uvm_gpu_struct
|
||||
// user can create a lot of va spaces and register the gpu with them).
|
||||
atomic64_t retained_count;
|
||||
|
||||
// A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS); this is a copy
|
||||
// of the parent's id.
|
||||
// A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS).
|
||||
uvm_gpu_id_t id;
|
||||
|
||||
// A unique uvm global_gpu id in range [1, UVM_GLOBAL_ID_MAX_PROCESSORS)
|
||||
uvm_global_gpu_id_t global_id;
|
||||
|
||||
// Should be UVM_GPU_MAGIC_VALUE. Used for memory checking.
|
||||
NvU64 magic;
|
||||
|
||||
@@ -664,8 +668,8 @@ struct uvm_gpu_struct
|
||||
struct
|
||||
{
|
||||
// Big page size used by the internal UVM VA space
|
||||
// Notably it may be different than the big page size used by a user's VA
|
||||
// space in general.
|
||||
// Notably it may be different than the big page size used by a user's
|
||||
// VA space in general.
|
||||
NvU32 internal_size;
|
||||
} big_page;
|
||||
|
||||
@@ -691,8 +695,8 @@ struct uvm_gpu_struct
|
||||
// lazily-populated array of peer GPUs, indexed by the peer's GPU index
|
||||
uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];
|
||||
|
||||
// Leaf spinlock used to synchronize access to the peer_gpus table so that
|
||||
// it can be safely accessed from the access counters bottom half
|
||||
// Leaf spinlock used to synchronize access to the peer_gpus table so
|
||||
// that it can be safely accessed from the access counters bottom half
|
||||
uvm_spinlock_t peer_gpus_lock;
|
||||
} peer_info;
|
||||
|
||||
@@ -852,6 +856,11 @@ struct uvm_gpu_struct
|
||||
bool uvm_test_force_upper_pushbuffer_segment;
|
||||
};
|
||||
|
||||
// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
|
||||
// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
|
||||
// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
|
||||
// partitions within the parent. The parent GPU and partition GPU have
|
||||
// different "id" and "uuid".
|
||||
struct uvm_parent_gpu_struct
|
||||
{
|
||||
// Reference count for how many places are holding on to a parent GPU
|
||||
@@ -864,11 +873,11 @@ struct uvm_parent_gpu_struct
|
||||
// The number of uvm_gpu_ts referencing this uvm_parent_gpu_t.
|
||||
NvU32 num_retained_gpus;
|
||||
|
||||
uvm_gpu_t *gpus[UVM_ID_MAX_SUB_PROCESSORS];
|
||||
uvm_gpu_t *gpus[UVM_PARENT_ID_MAX_SUB_PROCESSORS];
|
||||
|
||||
// Bitmap of valid child entries in the gpus[] table. Used to retrieve a
|
||||
// usable child GPU in bottom-halves.
|
||||
DECLARE_BITMAP(valid_gpus, UVM_ID_MAX_SUB_PROCESSORS);
|
||||
DECLARE_BITMAP(valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
|
||||
// The gpu's uuid
|
||||
NvProcessorUuid uuid;
|
||||
@@ -880,8 +889,8 @@ struct uvm_parent_gpu_struct
|
||||
// hardware classes, etc.).
|
||||
UvmGpuInfo rm_info;
|
||||
|
||||
// A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS)
|
||||
uvm_gpu_id_t id;
|
||||
// A unique uvm gpu id in range [1, UVM_PARENT_ID_MAX_PROCESSORS)
|
||||
uvm_parent_gpu_id_t id;
|
||||
|
||||
// Reference to the Linux PCI device
|
||||
//
|
||||
@@ -916,12 +925,13 @@ struct uvm_parent_gpu_struct
|
||||
// dma_addressable_start (in bifSetupDmaWindow_IMPL()) and hence when
|
||||
// referencing sysmem from the GPU, dma_addressable_start should be
|
||||
// subtracted from the physical address. The DMA mapping helpers like
|
||||
// uvm_gpu_map_cpu_pages() and uvm_gpu_dma_alloc_page() take care of that.
|
||||
// uvm_parent_gpu_map_cpu_pages() and uvm_parent_gpu_dma_alloc_page() take
|
||||
// care of that.
|
||||
NvU64 dma_addressable_start;
|
||||
NvU64 dma_addressable_limit;
|
||||
|
||||
// Total size (in bytes) of physically mapped (with uvm_gpu_map_cpu_pages)
|
||||
// sysmem pages, used for leak detection.
|
||||
// Total size (in bytes) of physically mapped (with
|
||||
// uvm_parent_gpu_map_cpu_pages) sysmem pages, used for leak detection.
|
||||
atomic64_t mapped_cpu_pages_size;
|
||||
|
||||
// Hardware Abstraction Layer
|
||||
@@ -940,7 +950,11 @@ struct uvm_parent_gpu_struct
|
||||
// Virtualization mode of the GPU.
|
||||
UVM_VIRT_MODE virt_mode;
|
||||
|
||||
// Whether the GPU can trigger faults on prefetch instructions
|
||||
// Pascal+ GPUs can trigger faults on prefetch instructions. If false, this
|
||||
// feature must be disabled at all times in GPUs of the given architecture.
|
||||
// If true, the feature can be toggled at will by SW.
|
||||
//
|
||||
// The field should not be used unless the GPU supports replayable faults.
|
||||
bool prefetch_fault_supported;
|
||||
|
||||
// Number of membars required to flush out HSHUB following a TLB invalidate
|
||||
@@ -955,6 +969,11 @@ struct uvm_parent_gpu_struct
|
||||
|
||||
bool access_counters_supported;
|
||||
|
||||
// If this is true, physical address based access counter notifications are
|
||||
// potentially generated. If false, only virtual address based notifications
|
||||
// are generated (assuming access_counters_supported is true too).
|
||||
bool access_counters_can_use_physical_addresses;
|
||||
|
||||
bool fault_cancel_va_supported;
|
||||
|
||||
// True if the GPU has hardware support for scoped atomics
|
||||
@@ -981,6 +1000,10 @@ struct uvm_parent_gpu_struct
|
||||
|
||||
bool plc_supported;
|
||||
|
||||
// If true, page_tree initialization pre-populates no_ats_ranges. It only
|
||||
// affects ATS systems.
|
||||
bool no_ats_range_required;
|
||||
|
||||
// Parameters used by the TLB batching API
|
||||
struct
|
||||
{
|
||||
@@ -1052,14 +1075,16 @@ struct uvm_parent_gpu_struct
|
||||
// Interrupt handling state and locks
|
||||
uvm_isr_info_t isr;
|
||||
|
||||
// Fault buffer info. This is only valid if supports_replayable_faults is set to true
|
||||
// Fault buffer info. This is only valid if supports_replayable_faults is
|
||||
// set to true.
|
||||
uvm_fault_buffer_info_t fault_buffer_info;
|
||||
|
||||
// PMM lazy free processing queue.
|
||||
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
|
||||
nv_kthread_q_t lazy_free_q;
|
||||
|
||||
// Access counter buffer info. This is only valid if supports_access_counters is set to true
|
||||
// Access counter buffer info. This is only valid if
|
||||
// supports_access_counters is set to true.
|
||||
uvm_access_counter_buffer_info_t access_counter_buffer_info;
|
||||
|
||||
// Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
|
||||
@@ -1109,7 +1134,7 @@ struct uvm_parent_gpu_struct
|
||||
uvm_rb_tree_t instance_ptr_table;
|
||||
uvm_spinlock_t instance_ptr_table_lock;
|
||||
|
||||
// This is set to true if the GPU belongs to an SLI group. Else, set to false.
|
||||
// This is set to true if the GPU belongs to an SLI group.
|
||||
bool sli_enabled;
|
||||
|
||||
struct
|
||||
@@ -1136,8 +1161,8 @@ struct uvm_parent_gpu_struct
|
||||
// environment, rather than using the peer-id field of the PTE (which can
|
||||
// only address 8 gpus), all gpus are assigned a 47-bit physical address
|
||||
// space by the fabric manager. Any physical address access to these
|
||||
// physical address spaces are routed through the switch to the corresponding
|
||||
// peer.
|
||||
// physical address spaces are routed through the switch to the
|
||||
// corresponding peer.
|
||||
struct
|
||||
{
|
||||
bool is_nvswitch_connected;
|
||||
@@ -1175,9 +1200,14 @@ struct uvm_parent_gpu_struct
|
||||
} smmu_war;
|
||||
};
|
||||
|
||||
static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return parent_gpu->name;
|
||||
}
|
||||
|
||||
static const char *uvm_gpu_name(uvm_gpu_t *gpu)
|
||||
{
|
||||
return gpu->parent->name;
|
||||
return gpu->name;
|
||||
}
|
||||
|
||||
static const NvProcessorUuid *uvm_gpu_uuid(uvm_gpu_t *gpu)
|
||||
@@ -1362,7 +1392,8 @@ void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
|
||||
// They must not be the same gpu.
|
||||
uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu);
|
||||
|
||||
// Get the processor id accessible by the given GPU for the given physical address
|
||||
// Get the processor id accessible by the given GPU for the given physical
|
||||
// address.
|
||||
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
|
||||
|
||||
// Get the P2P capabilities between the gpus with the given indexes
|
||||
@@ -1407,10 +1438,11 @@ static bool uvm_gpus_are_indirect_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
// mapping covering the passed address, has been previously created.
|
||||
static uvm_gpu_address_t uvm_gpu_address_virtual_from_vidmem_phys(uvm_gpu_t *gpu, NvU64 pa)
|
||||
{
|
||||
UVM_ASSERT(uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu));
|
||||
UVM_ASSERT(uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent) ||
|
||||
uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent));
|
||||
UVM_ASSERT(pa <= gpu->mem_info.max_allocatable_address);
|
||||
|
||||
if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu))
|
||||
if (uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent))
|
||||
UVM_ASSERT(gpu->static_flat_mapping.ready);
|
||||
|
||||
return uvm_gpu_address_virtual(gpu->parent->flat_vidmem_va_base + pa);
|
||||
@@ -1422,12 +1454,12 @@ static uvm_gpu_address_t uvm_gpu_address_virtual_from_vidmem_phys(uvm_gpu_t *gpu
|
||||
//
|
||||
// The actual GPU mapping only exists if a linear mapping covering the passed
|
||||
// address has been previously created.
|
||||
static uvm_gpu_address_t uvm_gpu_address_virtual_from_sysmem_phys(uvm_gpu_t *gpu, NvU64 pa)
|
||||
static uvm_gpu_address_t uvm_parent_gpu_address_virtual_from_sysmem_phys(uvm_parent_gpu_t *parent_gpu, NvU64 pa)
|
||||
{
|
||||
UVM_ASSERT(uvm_mmu_gpu_needs_dynamic_sysmem_mapping(gpu));
|
||||
UVM_ASSERT(pa <= (gpu->parent->dma_addressable_limit - gpu->parent->dma_addressable_start));
|
||||
UVM_ASSERT(uvm_mmu_parent_gpu_needs_dynamic_sysmem_mapping(parent_gpu));
|
||||
UVM_ASSERT(pa <= (parent_gpu->dma_addressable_limit - parent_gpu->dma_addressable_start));
|
||||
|
||||
return uvm_gpu_address_virtual(gpu->parent->flat_sysmem_va_base + pa);
|
||||
return uvm_gpu_address_virtual(parent_gpu->flat_sysmem_va_base + pa);
|
||||
}
|
||||
|
||||
// Given a GPU or CPU physical address (not peer), retrieve an address suitable
|
||||
@@ -1437,11 +1469,12 @@ static uvm_gpu_address_t uvm_gpu_address_copy(uvm_gpu_t *gpu, uvm_gpu_phys_addre
|
||||
UVM_ASSERT(phys_addr.aperture == UVM_APERTURE_VID || phys_addr.aperture == UVM_APERTURE_SYS);
|
||||
|
||||
if (phys_addr.aperture == UVM_APERTURE_VID) {
|
||||
if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu))
|
||||
if (uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent) ||
|
||||
uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent))
|
||||
return uvm_gpu_address_virtual_from_vidmem_phys(gpu, phys_addr.address);
|
||||
}
|
||||
else if (uvm_mmu_gpu_needs_dynamic_sysmem_mapping(gpu)) {
|
||||
return uvm_gpu_address_virtual_from_sysmem_phys(gpu, phys_addr.address);
|
||||
else if (uvm_mmu_parent_gpu_needs_dynamic_sysmem_mapping(gpu->parent)) {
|
||||
return uvm_parent_gpu_address_virtual_from_sysmem_phys(gpu->parent, phys_addr.address);
|
||||
}
|
||||
|
||||
return uvm_gpu_address_from_phys(phys_addr);
|
||||
@@ -1459,9 +1492,9 @@ NV_STATUS uvm_gpu_check_ecc_error(uvm_gpu_t *gpu);
|
||||
|
||||
// Check for ECC errors without calling into RM
|
||||
//
|
||||
// Calling into RM is problematic in many places, this check is always safe to do.
|
||||
// Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error and
|
||||
// it's required to call uvm_gpu_check_ecc_error() to be sure.
|
||||
// Calling into RM is problematic in many places, this check is always safe to
|
||||
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error
|
||||
// and it's required to call uvm_gpu_check_ecc_error() to be sure.
|
||||
NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
|
||||
|
||||
// Map size bytes of contiguous sysmem on the GPU for physical access
|
||||
@@ -1470,19 +1503,19 @@ NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
|
||||
//
|
||||
// Returns the physical address of the pages that can be used to access them on
|
||||
// the GPU.
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
|
||||
// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
|
||||
// Unmap num_pages pages previously mapped with uvm_parent_gpu_map_cpu_pages().
|
||||
void uvm_parent_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
|
||||
|
||||
static NV_STATUS uvm_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
|
||||
static NV_STATUS uvm_parent_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
|
||||
{
|
||||
return uvm_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
|
||||
return uvm_parent_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
|
||||
}
|
||||
|
||||
static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
|
||||
static void uvm_parent_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
|
||||
{
|
||||
uvm_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
|
||||
uvm_parent_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
|
||||
}
|
||||
|
||||
// Allocate and map a page of system DMA memory on the GPU for physical access
|
||||
@@ -1491,13 +1524,13 @@ static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addre
|
||||
// - the address of the page that can be used to access them on
|
||||
// the GPU in the dma_address_out parameter.
|
||||
// - the address of allocated memory in CPU virtual address space.
|
||||
void *uvm_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu,
|
||||
gfp_t gfp_flags,
|
||||
NvU64 *dma_address_out);
|
||||
void *uvm_parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu,
|
||||
gfp_t gfp_flags,
|
||||
NvU64 *dma_address_out);
|
||||
|
||||
// Unmap and free size bytes of contiguous sysmem DMA previously allocated
|
||||
// with uvm_gpu_map_cpu_pages().
|
||||
void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address);
|
||||
// with uvm_parent_gpu_map_cpu_pages().
|
||||
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address);
|
||||
|
||||
// Returns whether the given range is within the GPU's addressable VA ranges.
|
||||
// It requires the input 'addr' to be in canonical form for platforms compliant
|
||||
@@ -1518,6 +1551,8 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
// The GPU must be initialized before calling this function.
|
||||
bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
|
||||
bool uvm_platform_uses_canonical_form_address(void);
|
||||
|
||||
// Returns addr's canonical form for host systems that use canonical form
|
||||
// addresses.
|
||||
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
|
||||
@@ -1527,47 +1562,49 @@ static bool uvm_parent_gpu_is_coherent(const uvm_parent_gpu_t *parent_gpu)
|
||||
return parent_gpu->system_bus.memory_window_end > parent_gpu->system_bus.memory_window_start;
|
||||
}
|
||||
|
||||
static bool uvm_gpu_has_pushbuffer_segments(uvm_gpu_t *gpu)
|
||||
static bool uvm_parent_gpu_needs_pushbuffer_segments(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return gpu->parent->max_host_va > (1ull << 40);
|
||||
return parent_gpu->max_host_va > (1ull << 40);
|
||||
}
|
||||
|
||||
static bool uvm_gpu_supports_eviction(uvm_gpu_t *gpu)
|
||||
static bool uvm_parent_gpu_supports_eviction(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
// Eviction is supported only if the GPU supports replayable faults
|
||||
return gpu->parent->replayable_faults_supported;
|
||||
return parent_gpu->replayable_faults_supported;
|
||||
}
|
||||
|
||||
static bool uvm_gpu_is_virt_mode_sriov_heavy(const uvm_gpu_t *gpu)
|
||||
static bool uvm_parent_gpu_is_virt_mode_sriov_heavy(const uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return gpu->parent->virt_mode == UVM_VIRT_MODE_SRIOV_HEAVY;
|
||||
return parent_gpu->virt_mode == UVM_VIRT_MODE_SRIOV_HEAVY;
|
||||
}
|
||||
|
||||
static bool uvm_gpu_is_virt_mode_sriov_standard(const uvm_gpu_t *gpu)
|
||||
static bool uvm_parent_gpu_is_virt_mode_sriov_standard(const uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return gpu->parent->virt_mode == UVM_VIRT_MODE_SRIOV_STANDARD;
|
||||
return parent_gpu->virt_mode == UVM_VIRT_MODE_SRIOV_STANDARD;
|
||||
}
|
||||
|
||||
// Returns true if the virtualization mode is SR-IOV heavy or SR-IOV standard.
|
||||
static bool uvm_gpu_is_virt_mode_sriov(const uvm_gpu_t *gpu)
|
||||
static bool uvm_parent_gpu_is_virt_mode_sriov(const uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return uvm_gpu_is_virt_mode_sriov_heavy(gpu) || uvm_gpu_is_virt_mode_sriov_standard(gpu);
|
||||
return uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu) ||
|
||||
uvm_parent_gpu_is_virt_mode_sriov_standard(parent_gpu);
|
||||
}
|
||||
|
||||
static bool uvm_gpu_uses_proxy_channel_pool(const uvm_gpu_t *gpu)
|
||||
static bool uvm_parent_gpu_needs_proxy_channel_pool(const uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return uvm_gpu_is_virt_mode_sriov_heavy(gpu);
|
||||
return uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu);
|
||||
}
|
||||
|
||||
uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu);
|
||||
uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Debug print of GPU properties
|
||||
void uvm_gpu_print(uvm_gpu_t *gpu);
|
||||
|
||||
// Add the given instance pointer -> user_channel mapping to this GPU. The bottom
|
||||
// half GPU page fault handler uses this to look up the VA space for GPU faults.
|
||||
NV_STATUS uvm_gpu_add_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);
|
||||
void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);
|
||||
// Add the given instance pointer -> user_channel mapping to this GPU. The
|
||||
// bottom half GPU page fault handler uses this to look up the VA space for GPU
|
||||
// faults.
|
||||
NV_STATUS uvm_parent_gpu_add_user_channel(uvm_parent_gpu_t *parent_gpu, uvm_user_channel_t *user_channel);
|
||||
void uvm_parent_gpu_remove_user_channel(uvm_parent_gpu_t *parent_gpu, uvm_user_channel_t *user_channel);
|
||||
|
||||
// Looks up an entry added by uvm_gpu_add_user_channel. Return codes:
|
||||
// NV_OK Translation successful
|
||||
@@ -1578,13 +1615,13 @@ void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channe
|
||||
// out_va_space is valid if NV_OK is returned, otherwise it's NULL. The caller
|
||||
// is responsibile for ensuring that the returned va_space can't be destroyed,
|
||||
// so these functions should only be called from the bottom half.
|
||||
NV_STATUS uvm_gpu_fault_entry_to_va_space(uvm_gpu_t *gpu,
|
||||
uvm_fault_buffer_entry_t *fault,
|
||||
uvm_va_space_t **out_va_space);
|
||||
NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_fault_buffer_entry_t *fault,
|
||||
uvm_va_space_t **out_va_space);
|
||||
|
||||
NV_STATUS uvm_gpu_access_counter_entry_to_va_space(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_buffer_entry_t *entry,
|
||||
uvm_va_space_t **out_va_space);
|
||||
NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_buffer_entry_t *entry,
|
||||
uvm_va_space_t **out_va_space);
|
||||
|
||||
typedef enum
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user