520.61.05

2026-02-28 19:03:58 +00:00 · 2022-10-10 14:59:24 -07:00
parent fe0728787f
commit 90eb10774f
758 changed files with 88383 additions and 26493 deletions
--- a/kernel-open/nvidia-uvm/uvm_hal.h
+++ b/kernel-open/nvidia-uvm/uvm_hal.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2021 NVIDIA Corporation
+    Copyright (c) 2015-2022 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -41,10 +41,6 @@ void uvm_hal_maxwell_ce_init(uvm_push_t *push);
 void uvm_hal_maxwell_host_init_noop(uvm_push_t *push);
 void uvm_hal_pascal_host_init(uvm_push_t *push);

-
-
-
-
 // Host method validation
 typedef bool (*uvm_hal_host_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
 bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
@@ -118,12 +114,10 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
                                            uvm_gpu_phys_address_t pdb,
                                            NvU32 depth,
                                            uvm_membar_t membar);
-
-
-
-
-
-
+void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
+                                            uvm_gpu_phys_address_t pdb,
+                                            NvU32 depth,
+                                            uvm_membar_t membar);

 // Issue a TLB invalidate applying to the specified VA range in a PDB.
 //
@@ -183,15 +177,13 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
                                           NvU64 size,
                                           NvU32 page_size,
                                           uvm_membar_t membar);
-
-
-
-
-
-
-
-
-
+void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
+                                           uvm_gpu_phys_address_t pdb,
+                                           NvU32 depth,
+                                           NvU64 base,
+                                           NvU64 size,
+                                           NvU32 page_size,
+                                           uvm_membar_t membar);

 typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
                                                   uvm_gpu_phys_address_t pdb,
@@ -205,11 +197,9 @@ void uvm_hal_pascal_host_tlb_invalidate_test(uvm_push_t *push,
 void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
                                             uvm_gpu_phys_address_t pdb,
                                             UVM_TEST_INVALIDATE_TLB_PARAMS *params);
-
-
-
-
-
+void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
+                                             uvm_gpu_phys_address_t pdb,
+                                             UVM_TEST_INVALIDATE_TLB_PARAMS *params);

 // By default all semaphore release operations include a membar sys before the
 // operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
@@ -217,18 +207,10 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
 typedef void (*uvm_hal_semaphore_release_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
-
-
-
 void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
-
-
-
-
-
-
-
+void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);

 // Release a semaphore including a timestamp at the specific GPU VA.
 //
@@ -238,31 +220,28 @@ void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32
 typedef void (*uvm_hal_semaphore_timestamp_t)(uvm_push_t *push, NvU64 gpu_va);
 void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
 void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
-
-
-
+void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);

 void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
 void uvm_hal_volta_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
-
-
-
-
-
-
-
-
+void uvm_hal_hopper_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);

 typedef void (*uvm_hal_semaphore_acquire_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_turing_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
-
-
-
+void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);

 typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
 void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
 void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
+void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
+
+typedef void (*uvm_hal_host_set_gpfifo_noop_t)(NvU64 *fifo_entry);
+void uvm_hal_maxwell_host_set_gpfifo_noop(NvU64 *fifo_entry);
+
+typedef void (*uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va);
+void uvm_hal_maxwell_host_set_gpfifo_pushbuffer_segment_base_unsupported(NvU64 *fifo_entry, NvU64 pushbuffer_va);
+void uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base(NvU64 *fifo_entry, NvU64 pushbuffer_va);

 typedef void (*uvm_hal_host_write_gpu_put_t)(uvm_channel_t *channel, NvU32 gpu_put);
 void uvm_hal_maxwell_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put);
@@ -277,16 +256,12 @@ NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu);
 typedef void (*uvm_hal_ce_offset_out_t)(uvm_push_t *push, NvU64 offset);
 void uvm_hal_maxwell_ce_offset_out(uvm_push_t *push, NvU64 offset);
 void uvm_hal_pascal_ce_offset_out(uvm_push_t *push, NvU64 offset);
-
-
-
+void uvm_hal_hopper_ce_offset_out(uvm_push_t *push, NvU64 offset);

 typedef void (*uvm_hal_ce_offset_in_out_t)(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
 void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
 void uvm_hal_pascal_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
-
-
-
+void uvm_hal_hopper_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);

 typedef NvU32 (*uvm_hal_ce_phys_mode_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
 NvU32 uvm_hal_maxwell_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
@@ -354,11 +329,9 @@ void uvm_hal_maxwell_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32
 void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
 void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);

-
-
-
-
-
+void uvm_hal_hopper_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
+void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
+void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);

 // Increments the semaphore by 1, or resets to 0 if the incremented value would
 // exceed the payload.
@@ -369,9 +342,7 @@ void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value,
 typedef void (*uvm_hal_semaphore_reduction_inc_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
-
-
-
+void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);

 // Initialize GPU architecture dependent properties
 typedef void (*uvm_hal_arch_init_properties_t)(uvm_parent_gpu_t *parent_gpu);
@@ -380,12 +351,8 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
-
-
-
-
-
-
+void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu);

 // Retrieve the page-tree HAL for a given big page size
 typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU32 big_page_size);
@@ -396,9 +363,7 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size);
 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size);
 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size);
 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size);
-
-
-
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size);
 void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
@@ -412,18 +377,14 @@ uvm_mmu_engine_type_t uvm_hal_maxwell_mmu_engine_id_to_type_unsupported(NvU16 mm
 uvm_mmu_engine_type_t uvm_hal_volta_mmu_engine_id_to_type(NvU16 mmu_engine_id);
 uvm_mmu_engine_type_t uvm_hal_turing_mmu_engine_id_to_type(NvU16 mmu_engine_id);
 uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id);
-
-
-
+uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id);

 typedef NvU16 (*uvm_hal_mmu_client_id_to_utlb_id_t)(NvU16 client_id);
 NvU16 uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported(NvU16 client_id);
 NvU16 uvm_hal_pascal_mmu_client_id_to_utlb_id(NvU16 client_id);
 NvU16 uvm_hal_volta_mmu_client_id_to_utlb_id(NvU16 client_id);
 NvU16 uvm_hal_ampere_mmu_client_id_to_utlb_id(NvU16 client_id);
-
-
-
+NvU16 uvm_hal_hopper_mmu_client_id_to_utlb_id(NvU16 client_id);

 // Replayable faults
 typedef void (*uvm_hal_enable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
@@ -477,9 +438,7 @@ void uvm_hal_volta_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
                                            uvm_fault_buffer_entry_t *buffer_entry);
 void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_turing_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
-
-
-
+NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);

 bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
 void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
@@ -529,12 +488,10 @@ void uvm_hal_volta_cancel_faults_va(uvm_push_t *push,
                                    const uvm_fault_buffer_entry_t *fault_entry,
                                    uvm_fault_cancel_va_mode_t cancel_va_mode);

-
-
-
-
-
-
+void uvm_hal_hopper_cancel_faults_va(uvm_push_t *push,
+                                     uvm_gpu_phys_address_t pdb,
+                                     const uvm_fault_buffer_entry_t *fault_entry,
+                                     uvm_fault_cancel_va_mode_t cancel_va_mode);

 typedef void (*uvm_hal_host_clear_faulted_channel_method_t)(uvm_push_t *push,
                                                            uvm_user_channel_t *user_channel,
@@ -619,39 +576,6 @@ void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
 void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 struct uvm_host_hal_struct
 {
    uvm_hal_init_t init;
@@ -666,6 +590,8 @@ struct uvm_host_hal_struct
    uvm_hal_semaphore_acquire_t semaphore_acquire;
    uvm_hal_semaphore_timestamp_t semaphore_timestamp;
    uvm_hal_host_set_gpfifo_entry_t set_gpfifo_entry;
+    uvm_hal_host_set_gpfifo_noop_t set_gpfifo_noop;
+    uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t set_gpfifo_pushbuffer_segment_base;
    uvm_hal_host_write_gpu_put_t write_gpu_put;
    uvm_hal_host_tlb_invalidate_all_t tlb_invalidate_all;
    uvm_hal_host_tlb_invalidate_va_t tlb_invalidate_va;
@@ -742,17 +668,6 @@ struct uvm_access_counter_buffer_hal_struct
    uvm_hal_access_counter_buffer_entry_size_t entry_size;
 };

-
-
-
-
-
-
-
-
-
-
-
 typedef struct
 {
    // id is either a hardware class or GPU architecture
@@ -775,10 +690,6 @@ typedef struct
        // access_counter_buffer_ops: id is an architecture
        uvm_access_counter_buffer_hal_t access_counter_buffer_ops;

-
-
-
-
    } u;
 } uvm_hal_class_ops_t;