mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-05-13 17:26:11 +00:00
595.71.05
This commit is contained in:
@@ -1230,3 +1230,4 @@ module_exit(uvm_exit_entry);
|
||||
MODULE_LICENSE("Dual MIT/GPL");
|
||||
MODULE_INFO(supported, "external");
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
MODULE_DESCRIPTION("NVIDIA Unified Virtual Memory kernel module");
|
||||
|
||||
@@ -1800,6 +1800,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
// ATS faults can't be unserviceable, since unserviceable faults require
|
||||
// GMMU PTEs.
|
||||
UVM_ASSERT(!current_entry->is_fatal);
|
||||
UVM_ASSERT(current_entry->gpu == gpu);
|
||||
|
||||
i++;
|
||||
|
||||
@@ -1849,6 +1850,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
} while (current_entry &&
|
||||
(current_entry->fault_address < outer) &&
|
||||
(previous_entry->gpu == current_entry->gpu) &&
|
||||
(previous_entry->va_space == current_entry->va_space));
|
||||
|
||||
// Service the last sub-batch.
|
||||
|
||||
@@ -1582,6 +1582,31 @@ uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
return uvm_va_block_region_from_start_end(va_block, start, end);
|
||||
}
|
||||
|
||||
uvm_prot_t uvm_hmm_compute_mapping_prot(uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_page_index_t page_index)
|
||||
{
|
||||
if (!uvm_processor_mask_test(&va_block->mapped, UVM_ID_CPU))
|
||||
return UVM_PROT_NONE;
|
||||
|
||||
if (uvm_page_mask_test(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_WRITE], page_index)) {
|
||||
if (uvm_processor_mask_test(&va_block->hmm.va_space->has_native_atomics[uvm_id_value(UVM_ID_CPU)],
|
||||
processor_id))
|
||||
// If the CPU has write access it also has atomic access, so it's
|
||||
// fine for any GPU with HW support to do atomic accesses.
|
||||
return UVM_PROT_READ_WRITE_ATOMIC;
|
||||
else
|
||||
// Otherwise the GPU needs to fault on atomic access to ensure the
|
||||
// CPU is unmapped.
|
||||
return UVM_PROT_READ_WRITE;
|
||||
}
|
||||
|
||||
if (uvm_page_mask_test(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_READ], page_index))
|
||||
return UVM_PROT_READ_ONLY;
|
||||
|
||||
return UVM_PROT_NONE;
|
||||
}
|
||||
|
||||
uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 addr)
|
||||
|
||||
@@ -258,6 +258,14 @@ typedef struct
|
||||
const uvm_va_policy_t *policy,
|
||||
NvU64 address);
|
||||
|
||||
// Return the actual permissions allowed when mapping a page within a
|
||||
// va_block on the given processor_id. This may differ from the logical
|
||||
// permission if for example the kernel has the CPU pages mapped read-only
|
||||
// to do copy-on-write.
|
||||
uvm_prot_t uvm_hmm_compute_mapping_prot(uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Return the logical protection allowed of a HMM va_block for the page at
|
||||
// the given address within the vma which must be valid. This is usually
|
||||
// obtained from uvm_hmm_va_block_find_create()).
|
||||
@@ -561,6 +569,13 @@ typedef struct
|
||||
return (uvm_va_block_region_t){};
|
||||
}
|
||||
|
||||
static uvm_prot_t uvm_hmm_compute_mapping_prot(uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_page_index_t page_index)
|
||||
{
|
||||
return UVM_PROT_NONE;
|
||||
}
|
||||
|
||||
static uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 addr)
|
||||
|
||||
@@ -10991,6 +10991,13 @@ uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block
|
||||
uvm_processor_mask_t *resident_processors = &va_block_context->scratch_processor_mask;
|
||||
NvU32 resident_processors_count;
|
||||
|
||||
// TODO: Bug 5841902
|
||||
// There are several calls to uvm_va_block_is_hmm() which need to be removed
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
return uvm_hmm_compute_mapping_prot(va_block,
|
||||
processor_id,
|
||||
page_index);
|
||||
|
||||
uvm_va_block_page_resident_processors(va_block, page_index, resident_processors);
|
||||
resident_processors_count = uvm_processor_mask_get_count(resident_processors);
|
||||
|
||||
|
||||
@@ -929,8 +929,12 @@ void uvm_va_range_remove_gpu_va_space(uvm_va_range_t *va_range,
|
||||
gpu_va_space->gpu);
|
||||
break;
|
||||
case UVM_VA_RANGE_TYPE_DEVICE_P2P:
|
||||
unmap_mapping_range(va_range->va_space->mapping, va_range->node.start, uvm_va_range_size(va_range), 1);
|
||||
uvm_va_range_deinit_device_p2p(uvm_va_range_to_device_p2p(va_range), deferred_free_list);
|
||||
// Device P2P ranges are associated with a specific GPU so destroy
|
||||
// the range entirely if unregistering the associated GPU.
|
||||
if (uvm_va_range_to_device_p2p(va_range)->gpu == gpu_va_space->gpu) {
|
||||
unmap_mapping_range(va_range->va_space->mapping, va_range->node.start, uvm_va_range_size(va_range), 1);
|
||||
uvm_va_range_deinit_device_p2p(uvm_va_range_to_device_p2p(va_range), deferred_free_list);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
|
||||
@@ -1172,9 +1176,10 @@ void uvm_va_range_unregister_gpu(uvm_va_range_t *va_range,
|
||||
va_range_unregister_gpu_semaphore_pool(uvm_va_range_to_semaphore_pool(va_range), gpu);
|
||||
break;
|
||||
case UVM_VA_RANGE_TYPE_DEVICE_P2P:
|
||||
// All ranges should have been deinited by GPU VA space unregister,
|
||||
// which should have already happened.
|
||||
UVM_ASSERT(!uvm_va_range_to_device_p2p(va_range)->p2p_mem);
|
||||
// All ranges for this GPU should have been deinited by GPU VA space
|
||||
// unregister, which should have already happened.
|
||||
if (uvm_va_range_to_device_p2p(va_range)->p2p_mem != NULL)
|
||||
UVM_ASSERT(uvm_va_range_to_device_p2p(va_range)->gpu != gpu);
|
||||
break;
|
||||
default:
|
||||
UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
|
||||
|
||||
Reference in New Issue
Block a user