520.61.05

This commit is contained in:
Andy Ritger
2022-10-10 14:59:24 -07:00
parent fe0728787f
commit 90eb10774f
758 changed files with 88383 additions and 26493 deletions

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation
Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -41,10 +41,6 @@ void uvm_hal_maxwell_ce_init(uvm_push_t *push);
void uvm_hal_maxwell_host_init_noop(uvm_push_t *push);
void uvm_hal_pascal_host_init(uvm_push_t *push);
// Host method validation
typedef bool (*uvm_hal_host_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
@@ -118,12 +114,10 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar);
void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar);
// Issue a TLB invalidate applying to the specified VA range in a PDB.
//
@@ -183,15 +177,13 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
NvU64 size,
NvU32 page_size,
uvm_membar_t membar);
void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
uvm_membar_t membar);
typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
@@ -205,11 +197,9 @@ void uvm_hal_pascal_host_tlb_invalidate_test(uvm_push_t *push,
void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
// By default all semaphore release operations include a membar sys before the
// operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
@@ -217,18 +207,10 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
typedef void (*uvm_hal_semaphore_release_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
// Release a semaphore including a timestamp at the specific GPU VA.
//
@@ -238,31 +220,28 @@ void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32
typedef void (*uvm_hal_semaphore_timestamp_t)(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_volta_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_hopper_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
typedef void (*uvm_hal_semaphore_acquire_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_turing_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
typedef void (*uvm_hal_host_set_gpfifo_noop_t)(NvU64 *fifo_entry);
void uvm_hal_maxwell_host_set_gpfifo_noop(NvU64 *fifo_entry);
typedef void (*uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va);
void uvm_hal_maxwell_host_set_gpfifo_pushbuffer_segment_base_unsupported(NvU64 *fifo_entry, NvU64 pushbuffer_va);
void uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base(NvU64 *fifo_entry, NvU64 pushbuffer_va);
typedef void (*uvm_hal_host_write_gpu_put_t)(uvm_channel_t *channel, NvU32 gpu_put);
void uvm_hal_maxwell_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put);
@@ -277,16 +256,12 @@ NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu);
typedef void (*uvm_hal_ce_offset_out_t)(uvm_push_t *push, NvU64 offset);
void uvm_hal_maxwell_ce_offset_out(uvm_push_t *push, NvU64 offset);
void uvm_hal_pascal_ce_offset_out(uvm_push_t *push, NvU64 offset);
void uvm_hal_hopper_ce_offset_out(uvm_push_t *push, NvU64 offset);
typedef void (*uvm_hal_ce_offset_in_out_t)(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
void uvm_hal_pascal_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
void uvm_hal_hopper_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
typedef NvU32 (*uvm_hal_ce_phys_mode_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
NvU32 uvm_hal_maxwell_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
@@ -354,11 +329,9 @@ void uvm_hal_maxwell_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32
void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);
void uvm_hal_hopper_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
// Increments the semaphore by 1, or resets to 0 if the incremented value would
// exceed the payload.
@@ -369,9 +342,7 @@ void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value,
typedef void (*uvm_hal_semaphore_reduction_inc_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
// Initialize GPU architecture dependent properties
typedef void (*uvm_hal_arch_init_properties_t)(uvm_parent_gpu_t *parent_gpu);
@@ -380,12 +351,8 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
// Retrieve the page-tree HAL for a given big page size
typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU32 big_page_size);
@@ -396,9 +363,7 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size);
void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
@@ -412,18 +377,14 @@ uvm_mmu_engine_type_t uvm_hal_maxwell_mmu_engine_id_to_type_unsupported(NvU16 mm
uvm_mmu_engine_type_t uvm_hal_volta_mmu_engine_id_to_type(NvU16 mmu_engine_id);
uvm_mmu_engine_type_t uvm_hal_turing_mmu_engine_id_to_type(NvU16 mmu_engine_id);
uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id);
uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id);
typedef NvU16 (*uvm_hal_mmu_client_id_to_utlb_id_t)(NvU16 client_id);
NvU16 uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported(NvU16 client_id);
NvU16 uvm_hal_pascal_mmu_client_id_to_utlb_id(NvU16 client_id);
NvU16 uvm_hal_volta_mmu_client_id_to_utlb_id(NvU16 client_id);
NvU16 uvm_hal_ampere_mmu_client_id_to_utlb_id(NvU16 client_id);
NvU16 uvm_hal_hopper_mmu_client_id_to_utlb_id(NvU16 client_id);
// Replayable faults
typedef void (*uvm_hal_enable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
@@ -477,9 +438,7 @@ void uvm_hal_volta_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
uvm_fault_buffer_entry_t *buffer_entry);
void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_turing_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
@@ -529,12 +488,10 @@ void uvm_hal_volta_cancel_faults_va(uvm_push_t *push,
const uvm_fault_buffer_entry_t *fault_entry,
uvm_fault_cancel_va_mode_t cancel_va_mode);
void uvm_hal_hopper_cancel_faults_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
const uvm_fault_buffer_entry_t *fault_entry,
uvm_fault_cancel_va_mode_t cancel_va_mode);
typedef void (*uvm_hal_host_clear_faulted_channel_method_t)(uvm_push_t *push,
uvm_user_channel_t *user_channel,
@@ -619,39 +576,6 @@ void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
struct uvm_host_hal_struct
{
uvm_hal_init_t init;
@@ -666,6 +590,8 @@ struct uvm_host_hal_struct
uvm_hal_semaphore_acquire_t semaphore_acquire;
uvm_hal_semaphore_timestamp_t semaphore_timestamp;
uvm_hal_host_set_gpfifo_entry_t set_gpfifo_entry;
uvm_hal_host_set_gpfifo_noop_t set_gpfifo_noop;
uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t set_gpfifo_pushbuffer_segment_base;
uvm_hal_host_write_gpu_put_t write_gpu_put;
uvm_hal_host_tlb_invalidate_all_t tlb_invalidate_all;
uvm_hal_host_tlb_invalidate_va_t tlb_invalidate_va;
@@ -742,17 +668,6 @@ struct uvm_access_counter_buffer_hal_struct
uvm_hal_access_counter_buffer_entry_size_t entry_size;
};
typedef struct
{
// id is either a hardware class or GPU architecture
@@ -775,10 +690,6 @@ typedef struct
// access_counter_buffer_ops: id is an architecture
uvm_access_counter_buffer_hal_t access_counter_buffer_ops;
} u;
} uvm_hal_class_ops_t;