560.28.03

2026-02-05 15:49:58 +00:00 · 2024-07-19 15:45:15 -07:00
parent 5fdf5032fb
commit 448d5cc656
859 changed files with 165424 additions and 91129 deletions
--- a/kernel-open/nvidia-uvm/uvm_hal.h
+++ b/kernel-open/nvidia-uvm/uvm_hal.h
@@ -124,6 +124,10 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
                                            uvm_gpu_phys_address_t pdb,
                                            NvU32 depth,
                                            uvm_membar_t membar);
+void uvm_hal_blackwell_host_tlb_invalidate_all(uvm_push_t *push,
+                                               uvm_gpu_phys_address_t pdb,
+                                               NvU32 depth,
+                                               uvm_membar_t membar);

 // Issue a TLB invalidate applying to the specified VA range in a PDB.
 //
@@ -197,6 +201,13 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
                                           NvU64 size,
                                           NvU64 page_size,
                                           uvm_membar_t membar);
+void uvm_hal_blackwell_host_tlb_invalidate_va(uvm_push_t *push,
+                                              uvm_gpu_phys_address_t pdb,
+                                              NvU32 depth,
+                                              NvU64 base,
+                                              NvU64 size,
+                                              NvU64 page_size,
+                                              uvm_membar_t membar);

 typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
                                                   uvm_gpu_phys_address_t pdb,
@@ -216,6 +227,9 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
 void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
                                             uvm_gpu_phys_address_t pdb,
                                             UVM_TEST_INVALIDATE_TLB_PARAMS *params);
+void uvm_hal_blackwell_host_tlb_invalidate_test(uvm_push_t *push,
+                                                uvm_gpu_phys_address_t pdb,
+                                                UVM_TEST_INVALIDATE_TLB_PARAMS *params);

 // By default all semaphore release operations include a membar sys before the
 // operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
@@ -457,6 +471,7 @@ void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu);

 // Retrieve the page-tree HAL for a given big page size
 typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU64 big_page_size);
@@ -468,27 +483,19 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU64 big_page_size);
 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU64 big_page_size);
 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size);
 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size);
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell(NvU64 big_page_size);
 void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_pascal_mmu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);

-// Convert a faulted MMU engine ID to a UVM engine type. Only engines which have
-// faults serviced by UVM are handled. On Pascal the only such engine is
-// GRAPHICS, so no translation is provided.
-typedef uvm_mmu_engine_type_t (*uvm_hal_mmu_engine_id_to_type_t)(NvU16 mmu_engine_id);
-uvm_mmu_engine_type_t uvm_hal_maxwell_mmu_engine_id_to_type_unsupported(NvU16 mmu_engine_id);
-uvm_mmu_engine_type_t uvm_hal_volta_mmu_engine_id_to_type(NvU16 mmu_engine_id);
-uvm_mmu_engine_type_t uvm_hal_turing_mmu_engine_id_to_type(NvU16 mmu_engine_id);
-uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id);
-uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id);
-
 typedef NvU16 (*uvm_hal_mmu_client_id_to_utlb_id_t)(NvU16 client_id);
 NvU16 uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported(NvU16 client_id);
 NvU16 uvm_hal_pascal_mmu_client_id_to_utlb_id(NvU16 client_id);
 NvU16 uvm_hal_volta_mmu_client_id_to_utlb_id(NvU16 client_id);
 NvU16 uvm_hal_ampere_mmu_client_id_to_utlb_id(NvU16 client_id);
 NvU16 uvm_hal_hopper_mmu_client_id_to_utlb_id(NvU16 client_id);
+NvU16 uvm_hal_blackwell_mmu_client_id_to_utlb_id(NvU16 client_id);

 // Replayable faults
 typedef void (*uvm_hal_enable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
@@ -498,6 +505,9 @@ typedef NvU32 (*uvm_hal_fault_buffer_read_put_t)(uvm_parent_gpu_t *parent_gpu);
 typedef NvU32 (*uvm_hal_fault_buffer_read_get_t)(uvm_parent_gpu_t *parent_gpu);
 typedef void (*uvm_hal_fault_buffer_write_get_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
 typedef NvU8 (*uvm_hal_fault_buffer_get_ve_id_t)(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
+typedef uvm_mmu_engine_type_t (*uvm_hal_fault_buffer_get_mmu_engine_type_t)(NvU16 mmu_engine_id,
+                                                                            uvm_fault_client_type_t client_type,
+                                                                            NvU16 client_id);

 // Parse the replayable entry at the given buffer index. This also clears the
 // valid bit of the entry in the buffer.
@@ -535,6 +545,9 @@ NvU32 uvm_hal_maxwell_fault_buffer_read_put_unsupported(uvm_parent_gpu_t *parent
 NvU32 uvm_hal_maxwell_fault_buffer_read_get_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_maxwell_fault_buffer_write_get_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
 NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
+uvm_mmu_engine_type_t uvm_hal_maxwell_fault_buffer_get_mmu_engine_type_unsupported(NvU16 mmu_engine_id,
+                                                                                   uvm_fault_client_type_t client_type,
+                                                                                   NvU16 client_id);
 uvm_fault_type_t uvm_hal_maxwell_fault_buffer_get_fault_type_unsupported(const NvU32 *fault_entry);

 void uvm_hal_pascal_enable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
@@ -550,12 +563,31 @@ NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
 NvU32 uvm_hal_volta_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
 NvU8 uvm_hal_volta_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
+uvm_mmu_engine_type_t uvm_hal_volta_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
+                                                                     uvm_fault_client_type_t client_type,
+                                                                     NvU16 client_id);

 uvm_fault_type_t uvm_hal_volta_fault_buffer_get_fault_type(const NvU32 *fault_entry);

 void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_turing_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+uvm_mmu_engine_type_t uvm_hal_turing_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
+                                                                      uvm_fault_client_type_t client_type,
+                                                                      NvU16 client_id);
+
+uvm_mmu_engine_type_t uvm_hal_ampere_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
+                                                                      uvm_fault_client_type_t client_type,
+                                                                      NvU16 client_id);
+
 NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
+uvm_mmu_engine_type_t uvm_hal_hopper_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
+                                                                      uvm_fault_client_type_t client_type,
+                                                                      NvU16 client_id);
+
+uvm_mmu_engine_type_t uvm_hal_blackwell_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
+                                                                         uvm_fault_client_type_t client_type,
+                                                                         NvU16 client_id);
+uvm_fault_type_t uvm_hal_blackwell_fault_buffer_get_fault_type(const NvU32 *fault_entry);

 bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
 void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
@@ -779,7 +811,6 @@ struct uvm_arch_hal_struct
    uvm_hal_lookup_mode_hal_t mmu_mode_hal;
    uvm_hal_mmu_enable_prefetch_faults_t enable_prefetch_faults;
    uvm_hal_mmu_disable_prefetch_faults_t disable_prefetch_faults;
-    uvm_hal_mmu_engine_id_to_type_t mmu_engine_id_to_type;
    uvm_hal_mmu_client_id_to_utlb_id_t mmu_client_id_to_utlb_id;
 };

@@ -792,6 +823,7 @@ struct uvm_fault_buffer_hal_struct
    uvm_hal_fault_buffer_read_get_t read_get;
    uvm_hal_fault_buffer_write_get_t write_get;
    uvm_hal_fault_buffer_get_ve_id_t get_ve_id;
+    uvm_hal_fault_buffer_get_mmu_engine_type_t get_mmu_engine_type;
    uvm_hal_fault_buffer_parse_replayable_entry_t parse_replayable_entry;
    uvm_hal_fault_buffer_entry_is_valid_t entry_is_valid;
    uvm_hal_fault_buffer_entry_clear_valid_t entry_clear_valid;