mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-31 05:29:47 +00:00
580.94.10
This commit is contained in:
@@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
|
||||
ccflags-y += -I$(src)
|
||||
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
|
||||
ccflags-y += -DNV_VERSION_STRING=\"580.94.06\"
|
||||
ccflags-y += -DNV_VERSION_STRING=\"580.94.10\"
|
||||
|
||||
# Include and link Tegra out-of-tree modules.
|
||||
ifneq ($(wildcard /usr/src/nvidia/nvidia-oot),)
|
||||
|
||||
@@ -165,6 +165,7 @@ NV_STATUS_CODE(NV_ERR_FABRIC_STATE_OUT_OF_SYNC, 0x00000087, "NVLink fabri
|
||||
NV_STATUS_CODE(NV_ERR_BUFFER_FULL, 0x00000088, "Buffer is full")
|
||||
NV_STATUS_CODE(NV_ERR_BUFFER_EMPTY, 0x00000089, "Buffer is empty")
|
||||
NV_STATUS_CODE(NV_ERR_MC_FLA_OFFSET_TABLE_FULL, 0x0000008A, "Multicast FLA offset table has no available slots")
|
||||
NV_STATUS_CODE(NV_ERR_DMA_XFER_FAILED, 0x0000008B, "DMA transfer failed")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
|
||||
@@ -62,6 +62,11 @@ struct os_work_queue;
|
||||
/* Each OS defines its own version of this opaque type */
|
||||
typedef struct os_wait_queue os_wait_queue;
|
||||
|
||||
/* Flags needed by os_get_current_proccess_flags */
|
||||
#define OS_CURRENT_PROCESS_FLAG_NONE 0x0
|
||||
#define OS_CURRENT_PROCESS_FLAG_KERNEL_THREAD 0x1
|
||||
#define OS_CURRENT_PROCESS_FLAG_EXITING 0x2
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------
|
||||
*
|
||||
@@ -194,6 +199,7 @@ NV_STATUS NV_API_CALL os_open_readonly_file (const char *, void **
|
||||
NV_STATUS NV_API_CALL os_open_and_read_file (const char *, NvU8 *, NvU64);
|
||||
NvBool NV_API_CALL os_is_nvswitch_present (void);
|
||||
NV_STATUS NV_API_CALL os_get_random_bytes (NvU8 *, NvU16);
|
||||
NvU32 NV_API_CALL os_get_current_process_flags (void);
|
||||
NV_STATUS NV_API_CALL os_alloc_wait_queue (os_wait_queue **);
|
||||
void NV_API_CALL os_free_wait_queue (os_wait_queue *);
|
||||
void NV_API_CALL os_wait_uninterruptible (os_wait_queue *);
|
||||
|
||||
@@ -461,3 +461,29 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
||||
void uvm_hal_ampere_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
NvU32 aperture_value;
|
||||
|
||||
if (aperture == UVM_APERTURE_SYS) {
|
||||
aperture_value = HWCONST(C56F, MEM_OP_D, OPERATION, L2_SYSMEM_INVALIDATE);
|
||||
}
|
||||
else if (uvm_aperture_is_peer(aperture)) {
|
||||
aperture_value = HWCONST(C56F, MEM_OP_D, OPERATION, L2_PEERMEM_INVALIDATE);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT_MSG(false, "Invalid aperture_type %d\n", aperture);
|
||||
return;
|
||||
}
|
||||
|
||||
uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
|
||||
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, 0,
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, 0,
|
||||
MEM_OP_D, aperture_value);
|
||||
|
||||
uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
|
||||
}
|
||||
|
||||
@@ -347,10 +347,23 @@ uvm_hal_blackwell_access_counter_query_clear_op_gb20x(uvm_parent_gpu_t *parent_g
|
||||
return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED;
|
||||
}
|
||||
|
||||
// Host-specific L2 cache invalidate for non-coherent sysmem
|
||||
void uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem(uvm_push_t *push)
|
||||
void uvm_hal_blackwell_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
NvU32 aperture_value;
|
||||
|
||||
if (!gpu->parent->is_integrated_gpu) {
|
||||
return uvm_hal_ampere_host_l2_invalidate(push, aperture);
|
||||
}
|
||||
|
||||
switch (aperture) {
|
||||
case UVM_APERTURE_SYS:
|
||||
aperture_value = HWCONST(C96F, MEM_OP_D, OPERATION, L2_SYSMEM_NCOH_INVALIDATE);
|
||||
break;
|
||||
default:
|
||||
UVM_ASSERT_MSG(false, "Invalid aperture_type %d\n", aperture);
|
||||
return;
|
||||
}
|
||||
|
||||
// First sysmembar
|
||||
uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
|
||||
@@ -363,7 +376,7 @@ void uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem(uvm_push_t *push)
|
||||
NV_PUSH_4U(C96F, MEM_OP_A, 0,
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, 0,
|
||||
MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, L2_SYSMEM_NCOH_INVALIDATE));
|
||||
MEM_OP_D, aperture_value);
|
||||
// Final sysmembar
|
||||
uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
|
||||
}
|
||||
|
||||
@@ -221,7 +221,7 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
|
||||
.access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
|
||||
.access_counter_query_clear_op = uvm_hal_maxwell_access_counter_query_clear_op_unsupported,
|
||||
.l2_invalidate_noncoh_sysmem = uvm_hal_host_l2_invalidate_noncoh_sysmem_unsupported,
|
||||
.l2_invalidate = uvm_hal_host_l2_invalidate_unsupported,
|
||||
.get_time = uvm_hal_maxwell_get_time,
|
||||
}
|
||||
},
|
||||
@@ -287,6 +287,7 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.tlb_invalidate_all = uvm_hal_ampere_host_tlb_invalidate_all,
|
||||
.tlb_invalidate_va = uvm_hal_ampere_host_tlb_invalidate_va,
|
||||
.tlb_invalidate_test = uvm_hal_ampere_host_tlb_invalidate_test,
|
||||
.l2_invalidate = uvm_hal_ampere_host_l2_invalidate,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -315,8 +316,8 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.tlb_invalidate_phys = uvm_hal_blackwell_host_tlb_invalidate_phys,
|
||||
.tlb_invalidate_test = uvm_hal_blackwell_host_tlb_invalidate_test,
|
||||
.tlb_flush_prefetch = uvm_hal_blackwell_host_tlb_flush_prefetch,
|
||||
.l2_invalidate_noncoh_sysmem = uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem,
|
||||
.access_counter_query_clear_op = uvm_hal_blackwell_access_counter_query_clear_op_gb100,
|
||||
.l2_invalidate = uvm_hal_blackwell_host_l2_invalidate,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -1162,10 +1163,11 @@ void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
|
||||
{
|
||||
}
|
||||
|
||||
void uvm_hal_host_l2_invalidate_noncoh_sysmem_unsupported(uvm_push_t *push)
|
||||
void uvm_hal_host_l2_invalidate_unsupported(uvm_push_t *push, uvm_aperture_t aperture)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
UVM_ERR_PRINT("L2 cache invalidation: Called on unsupported GPU %s (arch: 0x%x, impl: 0x%x)\n",
|
||||
uvm_gpu_name(gpu), gpu->parent->rm_info.gpuArch, gpu->parent->rm_info.gpuImplementation);
|
||||
UVM_ASSERT_MSG(false, "host l2_invalidate_noncoh_sysmem called on unsupported GPU\n");
|
||||
UVM_ASSERT_MSG(false, "L2 invalidate is not supported on %s",
|
||||
uvm_parent_gpu_name(gpu->parent));
|
||||
}
|
||||
@@ -248,11 +248,12 @@ typedef void (*uvm_hal_host_tlb_flush_prefetch_t)(uvm_push_t *push);
|
||||
void uvm_hal_maxwell_host_tlb_flush_prefetch_unsupported(uvm_push_t *push);
|
||||
void uvm_hal_blackwell_host_tlb_flush_prefetch(uvm_push_t *push);
|
||||
|
||||
// L2 cache invalidate for non-coherent sysmem for systems with write back cache.
|
||||
// These are iGPUs as of now.
|
||||
typedef void (*uvm_hal_host_l2_invalidate_noncoh_sysmem_t)(uvm_push_t *push);
|
||||
void uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem(uvm_push_t *push);
|
||||
void uvm_hal_host_l2_invalidate_noncoh_sysmem_unsupported(uvm_push_t *push);
|
||||
// Performs L2 cache invalidation for peer or system memory.
|
||||
typedef void (*uvm_hal_host_l2_invalidate_t)(uvm_push_t *push, uvm_aperture_t aperture);
|
||||
void uvm_hal_blackwell_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture);
|
||||
|
||||
void uvm_hal_ampere_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture);
|
||||
void uvm_hal_host_l2_invalidate_unsupported(uvm_push_t *push, uvm_aperture_t aperture);
|
||||
|
||||
// By default all semaphore release operations include a membar sys before the
|
||||
// operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
|
||||
@@ -822,7 +823,7 @@ struct uvm_host_hal_struct
|
||||
uvm_hal_host_tlb_invalidate_phys_t tlb_invalidate_phys;
|
||||
uvm_hal_host_tlb_invalidate_test_t tlb_invalidate_test;
|
||||
uvm_hal_host_tlb_flush_prefetch_t tlb_flush_prefetch;
|
||||
uvm_hal_host_l2_invalidate_noncoh_sysmem_t l2_invalidate_noncoh_sysmem;
|
||||
uvm_hal_host_l2_invalidate_t l2_invalidate;
|
||||
uvm_hal_fault_buffer_replay_t replay_faults;
|
||||
uvm_hal_fault_cancel_global_t cancel_faults_global;
|
||||
uvm_hal_fault_cancel_targeted_t cancel_faults_targeted;
|
||||
|
||||
@@ -1276,11 +1276,20 @@ void uvm_ext_gpu_map_destroy(uvm_va_range_external_t *external_range,
|
||||
|
||||
range_tree = uvm_ext_gpu_range_tree(external_range, mapped_gpu);
|
||||
|
||||
// Perform L2 cache invalidation for noncoherent sysmem mappings.
|
||||
// This is done only on systems with write-back cache which is iGPUs as of now.
|
||||
// Perform L2 cache invalidation for cached peer and sysmem mappings.
|
||||
if (ext_gpu_map->need_l2_invalidate_at_unmap) {
|
||||
UVM_ASSERT(ext_gpu_map->gpu->parent->is_integrated_gpu);
|
||||
status = uvm_mmu_l2_invalidate_noncoh_sysmem(mapped_gpu);
|
||||
uvm_aperture_t aperture;
|
||||
|
||||
// Peer cache invalidation is not targeted to a specific peer, so we
|
||||
// just use UVM_APERTURE_PEER(0).
|
||||
if (ext_gpu_map->is_egm)
|
||||
aperture = UVM_APERTURE_PEER(0);
|
||||
else if (ext_gpu_map->is_sysmem)
|
||||
aperture = UVM_APERTURE_SYS;
|
||||
else
|
||||
aperture = UVM_APERTURE_PEER(0);
|
||||
|
||||
status = uvm_mmu_l2_invalidate(mapped_gpu, aperture);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
|
||||
@@ -2974,25 +2974,21 @@ NV_STATUS uvm_mmu_tlb_invalidate_phys(uvm_gpu_t *gpu)
|
||||
return uvm_push_end_and_wait(&push);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_mmu_l2_invalidate_noncoh_sysmem(uvm_gpu_t *gpu)
|
||||
NV_STATUS uvm_mmu_l2_invalidate(uvm_gpu_t *gpu, uvm_aperture_t aperture)
|
||||
{
|
||||
uvm_push_t push;
|
||||
NV_STATUS status;
|
||||
|
||||
// L2 cache invalidation is only done for systems with write-back
|
||||
// cache which is iGPUs as of now.
|
||||
UVM_ASSERT(gpu->parent->is_integrated_gpu);
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_MEMOPS,
|
||||
&push,
|
||||
"L2 cache invalidate for sysmem");
|
||||
"L2 cache invalidate");
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("L2 cache invalidation: Failed to begin push, status: %s\n", nvstatusToString(status));
|
||||
return status;
|
||||
}
|
||||
|
||||
gpu->parent->host_hal->l2_invalidate_noncoh_sysmem(&push);
|
||||
gpu->parent->host_hal->l2_invalidate(&push, aperture);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
if (status != NV_OK)
|
||||
|
||||
@@ -722,9 +722,8 @@ uvm_gpu_address_t uvm_mmu_gpu_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t phy
|
||||
// dma addresses, IOVAs, and GPAs). See uvm_dma_map_invalidation_t.
|
||||
NV_STATUS uvm_mmu_tlb_invalidate_phys(uvm_gpu_t *gpu);
|
||||
|
||||
// Invalidate L2 cache when noncoherent sysmem mappings are unmapped.
|
||||
// This is done for systems with write-back cache i.e. iGPUs as of now.
|
||||
NV_STATUS uvm_mmu_l2_invalidate_noncoh_sysmem(uvm_gpu_t *gpu);
|
||||
// Invalidate L2 cache for peer or system memory.
|
||||
NV_STATUS uvm_mmu_l2_invalidate(uvm_gpu_t *gpu, uvm_aperture_t aperture);
|
||||
|
||||
NV_STATUS uvm_test_invalidate_tlb(UVM_TEST_INVALIDATE_TLB_PARAMS *params, struct file *filp);
|
||||
|
||||
|
||||
@@ -204,8 +204,12 @@ typedef struct
|
||||
uvm_deferred_free_object_t deferred_free;
|
||||
|
||||
// Flag indicating whether L2 cache invalidation is needed at unmap time.
|
||||
// This is set by RM during mapping and used during unmap to determine
|
||||
// if L2 cache invalidation should be performed for non coherent sysmem.
|
||||
// This is set by RM during mapping and used during unmap to determine if L2
|
||||
// cache invalidation should be performed. For GPU cached system memory
|
||||
// allocations on systems a write-back cache this is required for
|
||||
// correctness. For GPU cached peer and system memory on systems with a
|
||||
// write-through cache the invalidation could be done by RM at map time
|
||||
// however this introduces overhead during performance sensitive sections.
|
||||
bool need_l2_invalidate_at_unmap;
|
||||
} uvm_ext_gpu_map_t;
|
||||
|
||||
|
||||
@@ -2061,6 +2061,22 @@ NV_STATUS NV_API_CALL os_get_random_bytes
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NvU32 NV_API_CALL os_get_current_process_flags
|
||||
(
|
||||
void
|
||||
)
|
||||
{
|
||||
NvU32 flags = OS_CURRENT_PROCESS_FLAG_NONE;
|
||||
|
||||
if (current->flags & PF_EXITING)
|
||||
flags |= OS_CURRENT_PROCESS_FLAG_EXITING;
|
||||
|
||||
if (current->flags & PF_KTHREAD)
|
||||
flags |= OS_CURRENT_PROCESS_FLAG_KERNEL_THREAD;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
NV_STATUS NV_API_CALL os_alloc_wait_queue
|
||||
(
|
||||
os_wait_queue **wq
|
||||
|
||||
Reference in New Issue
Block a user