530.30.02

This commit is contained in:
Andy Ritger
2023-02-28 11:12:44 -08:00
parent e598191e8e
commit 4397463e73
928 changed files with 124728 additions and 88525 deletions

View File

@@ -35,26 +35,95 @@
#include "uvm_linux_ioctl.h"
#include "uvm_hmm.h"
#include "uvm_mem.h"
#include "uvm_kvmalloc.h"
#define NVIDIA_UVM_DEVICE_NAME "nvidia-uvm"
static dev_t g_uvm_base_dev;
static struct cdev g_uvm_cdev;
static const struct file_operations uvm_fops;
static int uvm_open(struct inode *inode, struct file *filp)
bool uvm_file_is_nvidia_uvm(struct file *filp)
{
NV_STATUS status = uvm_global_get_status();
return (filp != NULL) && (filp->f_op == &uvm_fops);
}
if (status == NV_OK) {
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
return -EAGAIN;
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
{
unsigned long uptr;
uvm_fd_type_t type;
void *ptr;
status = uvm_va_space_create(inode, filp);
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
uvm_up_read(&g_uvm_global.pm.lock);
uptr = atomic_long_read_acquire((atomic_long_t *) (&filp->private_data));
type = (uvm_fd_type_t)(uptr & UVM_FD_TYPE_MASK);
ptr = (void *)(uptr & ~UVM_FD_TYPE_MASK);
BUILD_BUG_ON(UVM_FD_COUNT > UVM_FD_TYPE_MASK + 1);
switch (type) {
case UVM_FD_UNINITIALIZED:
case UVM_FD_INITIALIZING:
UVM_ASSERT(!ptr);
break;
case UVM_FD_VA_SPACE:
UVM_ASSERT(ptr);
BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
break;
default:
UVM_ASSERT(0);
}
return -nv_status_to_errno(status);
if (ptr_val)
*ptr_val = ptr;
return type;
}
// Called when opening /dev/nvidia-uvm. This code doesn't take any UVM locks, so
// there's no need to acquire g_uvm_global.pm.lock, but if that changes the PM
// lock will need to be taken.
static int uvm_open(struct inode *inode, struct file *filp)
{
struct address_space *mapping;
NV_STATUS status = uvm_global_get_status();
if (status != NV_OK)
return -nv_status_to_errno(status);
mapping = uvm_kvmalloc(sizeof(*mapping));
if (!mapping)
return -ENOMEM;
// By default all struct files on the same inode share the same
// address_space structure (the inode's) across all processes. This means
// unmap_mapping_range would unmap virtual mappings across all processes on
// that inode.
//
// Since the UVM driver uses the mapping offset as the VA of the file's
// process, we need to isolate the mappings to each process.
address_space_init_once(mapping);
mapping->host = inode;
// Some paths in the kernel, for example force_page_cache_readahead which
// can be invoked from user-space via madvise MADV_WILLNEED and fadvise
// POSIX_FADV_WILLNEED, check the function pointers within
// file->f_mapping->a_ops for validity. However, those paths assume that a_ops
// itself is always valid. Handle that by using the inode's a_ops pointer,
// which is what f_mapping->a_ops would point to anyway if we weren't re-
// assigning f_mapping.
mapping->a_ops = inode->i_mapping->a_ops;
#if defined(NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO)
mapping->backing_dev_info = inode->i_mapping->backing_dev_info;
#endif
filp->private_data = NULL;
filp->f_mapping = mapping;
return NV_OK;
}
static int uvm_open_entry(struct inode *inode, struct file *filp)
@@ -80,9 +149,18 @@ static void uvm_release_deferred(void *data)
static int uvm_release(struct inode *inode, struct file *filp)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);
uvm_va_space_t *va_space;
uvm_fd_type_t fd_type;
int ret;
fd_type = uvm_fd_type(filp, (void **)&va_space);
UVM_ASSERT(fd_type != UVM_FD_INITIALIZING);
if (fd_type == UVM_FD_UNINITIALIZED) {
uvm_kvfree(filp->f_mapping);
return 0;
}
UVM_ASSERT(fd_type == UVM_FD_VA_SPACE);
filp->private_data = NULL;
filp->f_mapping = NULL;
@@ -100,7 +178,7 @@ static int uvm_release(struct inode *inode, struct file *filp)
// been destroyed, and va_space->mapping won't be used again. Still,
// the va_space survives the inode if its destruction is deferred, in
// which case the references are rendered stale.
address_space_init_once(&va_space->mapping);
address_space_init_once(va_space->mapping);
nv_kthread_q_item_init(&va_space->deferred_release_q_item, uvm_release_deferred, va_space);
ret = nv_kthread_q_schedule_q_item(&g_uvm_global.deferred_release_q, &va_space->deferred_release_q_item);
@@ -363,14 +441,12 @@ static void uvm_vm_open_managed_entry(struct vm_area_struct *vma)
static void uvm_vm_close_managed(struct vm_area_struct *vma)
{
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
uvm_gpu_t *gpu;
uvm_processor_id_t gpu_id;
bool make_zombie = false;
if (current->mm != NULL)
uvm_record_lock_mmap_lock_write(current->mm);
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
// current->mm will be NULL on process teardown, in which case we have
// special handling.
if (current->mm == NULL) {
@@ -400,13 +476,11 @@ static void uvm_vm_close_managed(struct vm_area_struct *vma)
uvm_destroy_vma_managed(vma, make_zombie);
// Notify GPU address spaces that the fault buffer needs to be flushed to avoid finding stale entries
// that can be attributed to new VA ranges reallocated at the same address
for_each_va_space_gpu_in_mask(gpu, va_space, &va_space->registered_gpu_va_spaces) {
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
UVM_ASSERT(gpu_va_space);
gpu_va_space->needs_fault_buffer_flush = true;
// Notify GPU address spaces that the fault buffer needs to be flushed to
// avoid finding stale entries that can be attributed to new VA ranges
// reallocated at the same address.
for_each_gpu_id_in_mask(gpu_id, &va_space->registered_gpu_va_spaces) {
uvm_processor_mask_set_atomic(&va_space->needs_fault_buffer_flush, gpu_id);
}
uvm_va_space_up_write(va_space);
@@ -556,7 +630,7 @@ static struct vm_operations_struct uvm_vm_ops_semaphore_pool =
static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);
uvm_va_space_t *va_space;
uvm_va_range_t *va_range;
NV_STATUS status = uvm_global_get_status();
int ret = 0;
@@ -565,8 +639,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
if (status != NV_OK)
return -nv_status_to_errno(status);
status = uvm_va_space_initialized(va_space);
if (status != NV_OK)
va_space = uvm_fd_va_space(filp);
if (!va_space)
return -EBADFD;
// When the VA space is associated with an mm, all vmas under the VA space
@@ -618,7 +692,11 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
// Using VM_DONTCOPY would be nice, but madvise(MADV_DOFORK) can reset that
// so we have to handle vm_open on fork anyway. We could disable MADV_DOFORK
// with VM_IO, but that causes other mapping issues.
vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
// Make the default behavior be VM_DONTCOPY to avoid the performance impact
// of removing CPU mappings in the parent on fork()+exec(). Users can call
// madvise(MDV_DOFORK) if the child process requires access to the
// allocation.
vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTCOPY;
vma->vm_ops = &uvm_vm_ops_managed;
@@ -685,7 +763,53 @@ static int uvm_mmap_entry(struct file *filp, struct vm_area_struct *vma)
static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *filp)
{
return uvm_va_space_initialize(uvm_va_space_get(filp), params->flags);
uvm_va_space_t *va_space;
NV_STATUS status;
uvm_fd_type_t old_fd_type;
// Normally we expect private_data == UVM_FD_UNINITIALIZED. However multiple
// threads may call this ioctl concurrently so we have to be careful to
// avoid initializing multiple va_spaces and/or leaking memory. To do this
// we do an atomic compare and swap. Only one thread will observe
// UVM_FD_UNINITIALIZED and that thread will allocate and setup the
// va_space.
//
// Other threads will either see UVM_FD_INITIALIZING or UVM_FD_VA_SPACE. In
// the case of UVM_FD_VA_SPACE we return success if and only if the
// initialization flags match. If another thread is still initializing the
// va_space we return NV_ERR_BUSY_RETRY.
//
// If va_space initialization fails we return the failure code and reset the
// FD state back to UVM_FD_UNINITIALIZED to allow another initialization
// attempt to be made. This is safe because other threads will have only had
// a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
// case.
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
old_fd_type &= UVM_FD_TYPE_MASK;
if (old_fd_type == UVM_FD_UNINITIALIZED) {
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
if (status != NV_OK) {
atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
return status;
}
atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)va_space | UVM_FD_VA_SPACE);
}
else if (old_fd_type == UVM_FD_VA_SPACE) {
va_space = uvm_va_space_get(filp);
if (params->flags != va_space->initialization_flags)
status = NV_ERR_INVALID_ARGUMENT;
else
status = NV_OK;
}
else {
UVM_ASSERT(old_fd_type == UVM_FD_INITIALIZING);
status = NV_ERR_BUSY_RETRY;
}
return status;
}
static NV_STATUS uvm_api_pageable_mem_access(UVM_PAGEABLE_MEM_ACCESS_PARAMS *params, struct file *filp)
@@ -782,11 +906,6 @@ static const struct file_operations uvm_fops =
.owner = THIS_MODULE,
};
bool uvm_file_is_nvidia_uvm(struct file *filp)
{
return (filp != NULL) && (filp->f_op == &uvm_fops);
}
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params, struct file *filp)
{
long ret;