mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-07-04 14:26:57 +00:00
272 lines
8.8 KiB
C
272 lines
8.8 KiB
C
/*******************************************************************************
|
|
Copyright (c) 2015-2026 NVIDIA Corporation
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to
|
|
deal in the Software without restriction, including without limitation the
|
|
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
sell copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be
|
|
included in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
DEALINGS IN THE SOFTWARE.
|
|
|
|
*******************************************************************************/
|
|
|
|
#ifndef __UVM_DEVMEM_H__
|
|
#define __UVM_DEVMEM_H__
|
|
|
|
#include "uvm_forward_decl.h"
|
|
#include "uvm_linux.h"
|
|
#include "uvm_types.h"
|
|
#include "uvm_va_space.h"
|
|
|
|
#include <linux/memremap.h>
|
|
|
|
#if defined(CONFIG_PCI_P2PDMA)
|
|
typedef struct
|
|
{
|
|
// For g_uvm_global.pci_p2pdma_devices
|
|
struct list_head list_node;
|
|
|
|
// A reference to the pci device associated with parent gpu
|
|
struct pci_dev *pdev;
|
|
|
|
// PCI attributes to lookup pci_dev during exit. On cleanup, this will be used to find/get a
|
|
// reference on pci_dev
|
|
unsigned int domain;
|
|
unsigned int bus;
|
|
unsigned int func;
|
|
|
|
// Starting pfn for the device, used to detect if a pgmap is already present
|
|
unsigned long dev_start_pfn;
|
|
} uvm_pmm_gpu_pci_dev_list_t;
|
|
|
|
#endif
|
|
void uvm_devmem_pci_p2pdma_cache_exit(void);
|
|
|
|
#if UVM_IS_CONFIG_HMM() || defined(NV_MEMORY_DEVICE_COHERENT_PRESENT)
|
|
struct uvm_pmm_gpu_devmem_struct
|
|
{
|
|
// For g_uvm_global.devmem_ranges
|
|
struct list_head list_node;
|
|
|
|
// Size that was requested when created this region. This may be less than
|
|
// the size actually allocated by the kernel due to alignment contraints.
|
|
// Figuring out the required alignment at compile time is difficult due to
|
|
// unexported macros, so just use the requested size as the search key.
|
|
unsigned long size;
|
|
|
|
struct dev_pagemap pagemap;
|
|
};
|
|
|
|
// Return the GPU chunk for a given device private struct page.
|
|
uvm_gpu_chunk_t *uvm_devmem_page_to_chunk(struct page *page);
|
|
|
|
#endif
|
|
|
|
#if UVM_IS_CONFIG_HMM()
|
|
typedef struct uvm_pmm_gpu_struct uvm_pmm_gpu_t;
|
|
|
|
// Return the va_space for a given device private struct page.
|
|
uvm_va_space_t *uvm_devmem_page_to_va_space(struct page *page);
|
|
|
|
// Return the GPU id for a given device private struct page.
|
|
uvm_gpu_id_t uvm_devmem_page_to_gpu_id(struct page *page);
|
|
|
|
// Return the PFN of the device private struct page for the given GPU chunk.
|
|
unsigned long uvm_devmem_get_gpu_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
|
|
#endif
|
|
|
|
static inline NV_STATUS uvm_devmem_get_page_ref_if_device_coherent(struct page *page)
|
|
{
|
|
#if UVM_CDMM_PAGES_SUPPORTED()
|
|
// RM doesn't use DEVICE_COHERENT pages and therefore won't already hold
|
|
// a reference to them, so take one now if using DEVICE_COHERENT pages.
|
|
if (is_device_coherent_page(page)) {
|
|
if (!page_ref_count(page)) {
|
|
set_page_count(page, 1);
|
|
NV_GET_DEV_PAGEMAP(page_to_pfn(page));
|
|
} else {
|
|
UVM_ASSERT(0);
|
|
// Shared mappings are not supported with managed memory
|
|
return NV_ERR_NOT_SUPPORTED;
|
|
}
|
|
}
|
|
#endif
|
|
return NV_OK;
|
|
}
|
|
|
|
static inline void uvm_devmem_put_page_ref_if_device_coherent(struct page *page)
|
|
{
|
|
#if UVM_CDMM_PAGES_SUPPORTED()
|
|
// Drop the reference taken before vm_insert_page()
|
|
if (is_device_coherent_page(page))
|
|
put_page(page);
|
|
#endif
|
|
}
|
|
|
|
static inline bool uvm_is_device_page(struct page *page)
|
|
{
|
|
#if UVM_CDMM_PAGES_SUPPORTED()
|
|
return is_device_private_page(page) || is_device_coherent_page(page);
|
|
#else
|
|
return is_device_private_page(page);
|
|
#endif
|
|
}
|
|
|
|
static inline bool uvm_devmem_cdmm_present(uvm_parent_gpu_t *parent_gpu)
|
|
{
|
|
if (!parent_gpu->cdmm_enabled)
|
|
return false;
|
|
|
|
#if UVM_CDMM_PAGES_SUPPORTED()
|
|
return parent_gpu->devmem != NULL;
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
// Allocate and initialise struct page data in the kernel to support HMM.
|
|
NV_STATUS uvm_devmem_init(uvm_parent_gpu_t *gpu);
|
|
void uvm_devmem_deinit(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_devmem_device_p2p_init(uvm_parent_gpu_t *gpu);
|
|
void uvm_devmem_device_p2p_deinit(uvm_parent_gpu_t *gpu);
|
|
|
|
// Free unused ZONE_DEVICE pages.
|
|
void uvm_devmem_exit(void);
|
|
|
|
bool uvm_devmem_check_orphan_pages(uvm_pmm_gpu_t *pmm);
|
|
|
|
// This represents a physical RM allocation used for device peer-to-peer access.
|
|
// This is distinct from the va_range type because the physical allocations can be
|
|
// shared by multiple va_ranges.
|
|
typedef struct
|
|
{
|
|
// The physical GPU memory backing device P2P ranges can be referenced by
|
|
// two entities - kernel device drivers and the va_range(s) that UVM uses
|
|
// to create CPU mappings. The physical memory can not be freed until after
|
|
// both entities have finished accessing it. This refcount tracks total
|
|
// kernel users of the memory.
|
|
nv_kref_t refcount;
|
|
|
|
// The number of va ranges referencing this physical range.
|
|
nv_kref_t va_range_count;
|
|
|
|
uvm_gpu_t *gpu;
|
|
NvHandle rm_memory_handle;
|
|
NvU64 *pfns;
|
|
NvU64 pfn_count;
|
|
NvLength page_size;
|
|
uvm_deferred_free_object_t deferred_free;
|
|
struct list_head *deferred_free_list;
|
|
wait_queue_head_t waitq;
|
|
} uvm_device_p2p_mem_t;
|
|
|
|
// Assigned to page->zone_device_data for DEVICE_COHERENT pages where these
|
|
// could be used to track a device p2p allocation made with cudaMalloc() or
|
|
// system allocated memory in a va_block.
|
|
typedef struct {
|
|
uvm_gpu_chunk_t *gpu_chunk;
|
|
uvm_device_p2p_mem_t *p2p_mem;
|
|
} uvm_coherent_devmem_page_t;
|
|
|
|
NV_STATUS uvm_devmem_global_init(void);
|
|
void uvm_devmem_global_deinit(void);
|
|
|
|
#if (defined(CONFIG_PCI_P2PDMA) || UVM_CDMM_PAGES_SUPPORTED()) && defined(NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA)
|
|
extern struct kmem_cache *g_uvm_coherent_devmem_page_cache;
|
|
|
|
// page->zone_device_data does not exist in kernels versions older than v5.3
|
|
// which don't support CONFIG_PCI_P2PDMA. Therefore we need these accessor
|
|
// functions to ensure compilation succeeeds on older kernels.
|
|
static void page_set_zone_device_p2p_data(struct page *page, uvm_device_p2p_mem_t *p2p_mem)
|
|
{
|
|
#if UVM_CDMM_PAGES_SUPPORTED()
|
|
if (is_device_coherent_page(page)) {
|
|
uvm_coherent_devmem_page_t *coherent_devmem_page;
|
|
|
|
if (!page->zone_device_data) {
|
|
coherent_devmem_page = kmem_cache_zalloc(g_uvm_coherent_devmem_page_cache, NV_UVM_GFP_FLAGS);
|
|
page->zone_device_data = coherent_devmem_page;
|
|
WARN_ON(!coherent_devmem_page);
|
|
}
|
|
|
|
coherent_devmem_page->p2p_mem = p2p_mem;
|
|
}
|
|
else {
|
|
page->zone_device_data = p2p_mem;
|
|
}
|
|
#else
|
|
page->zone_device_data = p2p_mem;
|
|
#endif
|
|
}
|
|
|
|
static uvm_device_p2p_mem_t *page_get_zone_device_p2p_data(struct page *page)
|
|
{
|
|
if (!page->zone_device_data)
|
|
return NULL;
|
|
#if UVM_CDMM_PAGES_SUPPORTED()
|
|
else if (is_device_coherent_page(page))
|
|
return ((uvm_coherent_devmem_page_t *) page->zone_device_data)->p2p_mem;
|
|
#endif
|
|
else
|
|
return page->zone_device_data;
|
|
}
|
|
|
|
static void page_set_zone_device_chunk_data(struct page *page, uvm_gpu_chunk_t *gpu_chunk)
|
|
{
|
|
#if UVM_CDMM_PAGES_SUPPORTED()
|
|
if (is_device_coherent_page(page)) {
|
|
uvm_coherent_devmem_page_t *coherent_devmem_page;
|
|
|
|
if (!page->zone_device_data) {
|
|
coherent_devmem_page = kmem_cache_zalloc(g_uvm_coherent_devmem_page_cache, NV_UVM_GFP_FLAGS);
|
|
page->zone_device_data = coherent_devmem_page;
|
|
WARN_ON(!coherent_devmem_page);
|
|
}
|
|
|
|
coherent_devmem_page->gpu_chunk = gpu_chunk;
|
|
}
|
|
else {
|
|
page->zone_device_data = gpu_chunk;
|
|
}
|
|
#else
|
|
page->zone_device_data = gpu_chunk;
|
|
#endif
|
|
}
|
|
|
|
static uvm_gpu_chunk_t *page_get_zone_device_chunk_data(struct page *page)
|
|
{
|
|
if (!page->zone_device_data)
|
|
return NULL;
|
|
#if UVM_CDMM_PAGES_SUPPORTED()
|
|
else if (is_device_coherent_page(page))
|
|
return ((uvm_coherent_devmem_page_t *) page->zone_device_data)->gpu_chunk;
|
|
#endif
|
|
else
|
|
return page->zone_device_data;
|
|
}
|
|
#else
|
|
static void page_set_zone_device_p2p_data(struct page *page, uvm_device_p2p_mem_t *zone_device_data)
|
|
{
|
|
UVM_ASSERT(0);
|
|
}
|
|
|
|
static uvm_device_p2p_mem_t *page_get_zone_device_p2p_data(struct page *page)
|
|
{
|
|
UVM_ASSERT(0);
|
|
return NULL;
|
|
}
|
|
#endif
|
|
#endif
|