mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-31 13:39:47 +00:00
580.95.05
This commit is contained in:
@@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
|
||||
ccflags-y += -I$(src)
|
||||
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
|
||||
ccflags-y += -DNV_VERSION_STRING=\"580.82.09\"
|
||||
ccflags-y += -DNV_VERSION_STRING=\"580.95.05\"
|
||||
|
||||
# Include and link Tegra out-of-tree modules.
|
||||
ifneq ($(wildcard /usr/src/nvidia/nvidia-oot),)
|
||||
|
||||
@@ -32,6 +32,8 @@ typedef struct {
|
||||
NvU8 bus, slot, function;
|
||||
} pci_info;
|
||||
|
||||
NvBool needs_numa_setup;
|
||||
|
||||
/*
|
||||
* opaque OS-specific pointer; on Linux, this is a pointer to the
|
||||
* 'struct device' for the GPU.
|
||||
|
||||
@@ -1711,12 +1711,12 @@ static inline void nv_mutex_destroy(struct mutex *lock)
|
||||
mutex_destroy(lock);
|
||||
}
|
||||
|
||||
static inline NvBool nv_platform_supports_numa(nv_linux_state_t *nvl)
|
||||
static inline NvBool nv_platform_supports_numa(const nv_linux_state_t *nvl)
|
||||
{
|
||||
return nvl->numa_info.node_id != NUMA_NO_NODE;
|
||||
}
|
||||
|
||||
static inline int nv_get_numa_status(nv_linux_state_t *nvl)
|
||||
static inline int nv_get_numa_status(const nv_linux_state_t *nvl)
|
||||
{
|
||||
if (!nv_platform_supports_numa(nvl))
|
||||
{
|
||||
|
||||
@@ -588,8 +588,8 @@ typedef struct nv_state_t
|
||||
/* Console is managed by drm drivers or NVKMS */
|
||||
NvBool client_managed_console;
|
||||
|
||||
/* Bool to check if power management is supported */
|
||||
NvBool is_pm_supported;
|
||||
/* Bool to check if power management is unsupported */
|
||||
NvBool is_pm_unsupported;
|
||||
} nv_state_t;
|
||||
|
||||
#define NVFP_TYPE_NONE 0x0
|
||||
|
||||
@@ -685,6 +685,9 @@ typedef struct UvmGpuInfo_tag
|
||||
|
||||
// GPU supports Non-PASID ATS capability
|
||||
NvBool nonPasidAtsSupport;
|
||||
|
||||
// GPU setup in CDMM mode
|
||||
NvBool cdmmEnabled;
|
||||
} UvmGpuInfo;
|
||||
|
||||
typedef struct UvmGpuFbInfo_tag
|
||||
|
||||
@@ -667,7 +667,9 @@ enum NvKmsInputTf {
|
||||
enum NvKmsOutputColorimetry {
|
||||
NVKMS_OUTPUT_COLORIMETRY_DEFAULT = 0,
|
||||
|
||||
NVKMS_OUTPUT_COLORIMETRY_BT2100 = 1,
|
||||
NVKMS_OUTPUT_COLORIMETRY_BT601 = 1,
|
||||
NVKMS_OUTPUT_COLORIMETRY_BT709 = 2,
|
||||
NVKMS_OUTPUT_COLORIMETRY_BT2100 = 3,
|
||||
};
|
||||
|
||||
enum NvKmsOutputTf {
|
||||
|
||||
@@ -4856,6 +4856,22 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG" "" "types"
|
||||
;;
|
||||
|
||||
memory_device_coherent_present)
|
||||
#
|
||||
# Determine if MEMORY_DEVICE_COHERENT support is present or not
|
||||
#
|
||||
# Added by commit f25cbb7a95a2 ("mm: add zone device coherent type
|
||||
# memory support") in v6.0.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
int memory_device_coherent = MEMORY_DEVICE_COHERENT;
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MEMORY_DEVICE_COHERENT_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
|
||||
# When adding a new conftest entry, please use the correct format for
|
||||
# specifying the relevant upstream Linux kernel commit. Please
|
||||
# avoid specifying -rc kernels, and only use SHAs that actually exist
|
||||
|
||||
@@ -445,6 +445,14 @@ __nv_drm_connector_atomic_check(struct drm_connector *connector,
|
||||
req_config->modeSetConfig.colorimetry =
|
||||
NVKMS_OUTPUT_COLORIMETRY_DEFAULT;
|
||||
break;
|
||||
case DRM_MODE_COLORIMETRY_BT601_YCC:
|
||||
req_config->modeSetConfig.colorimetry =
|
||||
NVKMS_OUTPUT_COLORIMETRY_BT601;
|
||||
break;
|
||||
case DRM_MODE_COLORIMETRY_BT709_YCC:
|
||||
req_config->modeSetConfig.colorimetry =
|
||||
NVKMS_OUTPUT_COLORIMETRY_BT709;
|
||||
break;
|
||||
case DRM_MODE_COLORIMETRY_BT2020_RGB:
|
||||
case DRM_MODE_COLORIMETRY_BT2020_YCC:
|
||||
// Ignore RGB/YCC
|
||||
|
||||
@@ -691,7 +691,16 @@ static int nv_drm_dev_load(struct drm_device *dev)
|
||||
pDevice = nvKms->allocateDevice(&allocateDeviceParams);
|
||||
|
||||
if (pDevice == NULL) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to allocate NvKmsKapiDevice");
|
||||
if (nv_dev->gpu_info.needs_numa_setup) {
|
||||
/*
|
||||
* RM init from a kernel-mode driver may fail on GPUs that require
|
||||
* NUMA setup. Just notify about that specifically rather than
|
||||
* producing a scary-looking error.
|
||||
*/
|
||||
NV_DRM_DEV_LOG_INFO(nv_dev, "NUMA was not set up yet; ignoring this device");
|
||||
} else {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to allocate NvKmsKapiDevice");
|
||||
}
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
@@ -787,6 +796,7 @@ static int nv_drm_dev_load(struct drm_device *dev)
|
||||
}
|
||||
#endif
|
||||
nvKms->freeDevice(nv_dev->pDevice);
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to create DRM properties");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
@@ -1994,7 +2004,6 @@ void nv_drm_register_drm_device(const struct NvKmsKapiGpuInfo *gpu_info)
|
||||
|
||||
/* Load DRM device before registering it */
|
||||
if (nv_drm_dev_load(dev) != 0) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to load device");
|
||||
goto failed_drm_load;
|
||||
}
|
||||
|
||||
|
||||
@@ -114,6 +114,10 @@ MODULE_PARM_DESC(fail_alloc_core_channel, "Control testing for hardware core cha
|
||||
static int fail_alloc_core_channel_method = -1;
|
||||
module_param_named(fail_alloc_core_channel, fail_alloc_core_channel_method, int, 0400);
|
||||
|
||||
MODULE_PARM_DESC(debug, "Enable debug logging");
|
||||
static int debug = 0;
|
||||
module_param_named(debug, debug, int, 0600);
|
||||
|
||||
#if NVKMS_CONFIG_FILE_SUPPORTED
|
||||
/* This parameter is used to find the dpy override conf file */
|
||||
#define NVKMS_CONF_FILE_SPECIFIED (nvkms_conf != NULL)
|
||||
@@ -190,6 +194,11 @@ NvBool nvkms_enable_overlay_layers(void)
|
||||
return enable_overlay_layers;
|
||||
}
|
||||
|
||||
NvBool nvkms_debug_logging(void)
|
||||
{
|
||||
return debug != 0;
|
||||
}
|
||||
|
||||
NvBool nvkms_kernel_supports_syncpts(void)
|
||||
{
|
||||
/*
|
||||
|
||||
@@ -119,6 +119,7 @@ NvBool nvkms_vblank_sem_control(void);
|
||||
NvBool nvkms_opportunistic_display_sync(void);
|
||||
enum NvKmsDebugForceColorSpace nvkms_debug_force_color_space(void);
|
||||
NvBool nvkms_enable_overlay_layers(void);
|
||||
NvBool nvkms_debug_logging(void);
|
||||
|
||||
void nvkms_call_rm (void *ops);
|
||||
void* nvkms_alloc (size_t size,
|
||||
|
||||
@@ -67,6 +67,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += sg_dma_page_iter
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += struct_page_has_zone_device_data
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += memory_device_coherent_present
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
|
||||
|
||||
@@ -143,7 +143,7 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
|
||||
{
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
int residency;
|
||||
bool cdmm_enabled = gpu->mem_info.cdmm_enabled;
|
||||
bool cdmm_enabled = gpu->parent->cdmm_enabled;
|
||||
|
||||
if (gpu->parent->is_integrated_gpu || cdmm_enabled) {
|
||||
residency = gpu->parent->closest_cpu_numa_node;
|
||||
|
||||
@@ -150,13 +150,8 @@ static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
|
||||
|
||||
gpu->mem_info.numa.enabled = true;
|
||||
gpu->mem_info.numa.node_id = gpu_caps.numaNodeId;
|
||||
gpu->mem_info.cdmm_enabled = false;
|
||||
}
|
||||
else {
|
||||
// TODO: Bug 5273146: Use RM control call to detect CDMM mode.
|
||||
if (uvm_parent_gpu_is_coherent(gpu->parent))
|
||||
gpu->mem_info.cdmm_enabled = true;
|
||||
|
||||
gpu->mem_info.numa.node_id = NUMA_NO_NODE;
|
||||
}
|
||||
|
||||
@@ -248,16 +243,15 @@ static NV_STATUS alloc_and_init_address_space(uvm_gpu_t *gpu)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
int uvm_device_p2p_static_bar(uvm_gpu_t *gpu)
|
||||
int uvm_device_p2p_static_bar(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return nv_bar_index_to_os_bar_index(gpu->parent->pci_dev, NV_GPU_BAR_INDEX_FB);
|
||||
return nv_bar_index_to_os_bar_index(parent_gpu->pci_dev, NV_GPU_BAR_INDEX_FB);
|
||||
}
|
||||
|
||||
static NV_STATUS get_gpu_fb_info(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
UvmGpuFbInfo fb_info = {0};
|
||||
unsigned long pci_bar1_addr = pci_resource_start(gpu->parent->pci_dev, uvm_device_p2p_static_bar(gpu));
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceGetFbInfo(uvm_gpu_device_handle(gpu), &fb_info));
|
||||
if (status != NV_OK)
|
||||
@@ -270,9 +264,6 @@ static NV_STATUS get_gpu_fb_info(uvm_gpu_t *gpu)
|
||||
}
|
||||
|
||||
gpu->mem_info.max_vidmem_page_size = fb_info.maxVidmemPageSize;
|
||||
gpu->mem_info.static_bar1_start = pci_bar1_addr + fb_info.staticBar1StartOffset;
|
||||
gpu->mem_info.static_bar1_size = fb_info.staticBar1Size;
|
||||
gpu->mem_info.static_bar1_write_combined = fb_info.bStaticBar1WriteCombined;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
@@ -1443,8 +1434,16 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
if (!fb_info.bZeroFb)
|
||||
if (!fb_info.bZeroFb) {
|
||||
unsigned long pci_bar1_addr = pci_resource_start(parent_gpu->pci_dev, uvm_device_p2p_static_bar(parent_gpu));
|
||||
|
||||
parent_gpu->max_allocatable_address = fb_info.maxAllocatableAddress;
|
||||
parent_gpu->static_bar1_start = pci_bar1_addr + fb_info.staticBar1StartOffset;
|
||||
parent_gpu->static_bar1_size = fb_info.staticBar1Size;
|
||||
parent_gpu->static_bar1_write_combined = fb_info.bStaticBar1WriteCombined;
|
||||
}
|
||||
|
||||
parent_gpu->cdmm_enabled = gpu_info->cdmmEnabled;
|
||||
|
||||
parent_gpu->virt_mode = gpu_info->virtMode;
|
||||
if (parent_gpu->virt_mode == UVM_VIRT_MODE_LEGACY) {
|
||||
@@ -1493,6 +1492,8 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_pmm_gpu_device_p2p_init(parent_gpu);
|
||||
|
||||
status = uvm_ats_add_gpu(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_ats_add_gpu failed: %s, GPU %s\n",
|
||||
@@ -1597,7 +1598,7 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_pmm_gpu_device_p2p_init(gpu);
|
||||
uvm_mutex_init(&gpu->device_p2p_lock, UVM_LOCK_ORDER_GLOBAL);
|
||||
|
||||
status = init_semaphore_pools(gpu);
|
||||
if (status != NV_OK) {
|
||||
@@ -1731,6 +1732,8 @@ static void deinit_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
// Return ownership to RM
|
||||
uvm_parent_gpu_deinit_isr(parent_gpu);
|
||||
|
||||
uvm_pmm_gpu_device_p2p_deinit(parent_gpu);
|
||||
|
||||
uvm_pmm_devmem_deinit(parent_gpu);
|
||||
uvm_ats_remove_gpu(parent_gpu);
|
||||
|
||||
@@ -1786,8 +1789,6 @@ static void deinit_gpu(uvm_gpu_t *gpu)
|
||||
|
||||
deinit_semaphore_pools(gpu);
|
||||
|
||||
uvm_pmm_gpu_device_p2p_deinit(gpu);
|
||||
|
||||
uvm_pmm_gpu_deinit(&gpu->pmm);
|
||||
|
||||
if (gpu->rm_address_space != 0)
|
||||
|
||||
@@ -711,21 +711,6 @@ struct uvm_gpu_struct
|
||||
int node_id;
|
||||
} numa;
|
||||
|
||||
// Coherent Driver-based Memory Management (CDMM) is a mode that allows
|
||||
// coherent GPU memory to be managed by the driver and not the OS. This
|
||||
// is done by the driver not onlining the memory as NUMA nodes. Having
|
||||
// the field provides the most flexibility and is sync with the numa
|
||||
// properties above. CDMM as a property applies to the entire system.
|
||||
bool cdmm_enabled;
|
||||
|
||||
// Physical address of the start of statically mapped fb memory in BAR1
|
||||
NvU64 static_bar1_start;
|
||||
|
||||
// Size of statically mapped fb memory in BAR1.
|
||||
NvU64 static_bar1_size;
|
||||
|
||||
// Whether or not RM has iomapped the region write combined.
|
||||
NvBool static_bar1_write_combined;
|
||||
} mem_info;
|
||||
|
||||
struct
|
||||
@@ -941,9 +926,6 @@ struct uvm_gpu_struct
|
||||
// Force pushbuffer's GPU VA to be >= 1TB; used only for testing purposes.
|
||||
bool uvm_test_force_upper_pushbuffer_segment;
|
||||
|
||||
// Have we initialised device p2p pages.
|
||||
bool device_p2p_initialised;
|
||||
|
||||
// Used to protect allocation of p2p_mem and assignment of the page
|
||||
// zone_device_data fields.
|
||||
uvm_mutex_t device_p2p_lock;
|
||||
@@ -1014,10 +996,28 @@ struct uvm_parent_gpu_struct
|
||||
// Total amount of physical memory available on the parent GPU.
|
||||
NvU64 max_allocatable_address;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
#if UVM_IS_CONFIG_HMM() || defined(NV_MEMORY_DEVICE_COHERENT_PRESENT)
|
||||
uvm_pmm_gpu_devmem_t *devmem;
|
||||
#endif
|
||||
|
||||
// Physical address of the start of statically mapped fb memory in BAR1
|
||||
NvU64 static_bar1_start;
|
||||
|
||||
// Size of statically mapped fb memory in BAR1.
|
||||
NvU64 static_bar1_size;
|
||||
|
||||
// Whether or not RM has iomapped the region write combined.
|
||||
NvBool static_bar1_write_combined;
|
||||
|
||||
// Have we initialised device p2p pages.
|
||||
bool device_p2p_initialised;
|
||||
|
||||
// Coherent Driver-based Memory Management (CDMM) is a mode that allows
|
||||
// coherent GPU memory to be managed by the driver and not the OS. This
|
||||
// is done by the driver not onlining the memory as NUMA nodes. CDMM as a
|
||||
// property applies to the entire system.
|
||||
bool cdmm_enabled;
|
||||
|
||||
// The physical address range addressable by the GPU
|
||||
//
|
||||
// The GPU has its NV_PFB_XV_UPPER_ADDR register set by RM to
|
||||
@@ -1867,6 +1867,6 @@ typedef enum
|
||||
} uvm_gpu_buffer_flush_mode_t;
|
||||
|
||||
// PCIe BAR containing static framebuffer memory mappings for PCIe P2P
|
||||
int uvm_device_p2p_static_bar(uvm_gpu_t *gpu);
|
||||
int uvm_device_p2p_static_bar(uvm_parent_gpu_t *gpu);
|
||||
|
||||
#endif // __UVM_GPU_H__
|
||||
|
||||
@@ -1576,7 +1576,7 @@ static NV_STATUS service_notifications_batch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
}
|
||||
}
|
||||
else if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
|
||||
if (!gpu_va_space->gpu->mem_info.cdmm_enabled) {
|
||||
if (!gpu_va_space->gpu->parent->cdmm_enabled) {
|
||||
status = service_notification_ats(gpu_va_space, mm, access_counters, index, out_index);
|
||||
}
|
||||
else {
|
||||
|
||||
@@ -85,6 +85,12 @@
|
||||
|
||||
#define UVM_THREAD_AFFINITY_SUPPORTED() 1
|
||||
|
||||
#if defined(CONFIG_ZONE_DEVICE) && defined(NV_MEMORY_DEVICE_COHERENT_PRESENT)
|
||||
#define UVM_CDMM_PAGES_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_CDMM_PAGES_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_HMM_MIRROR) && defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MIGRATE_DEVICE_RANGE_PRESENT)
|
||||
#define UVM_IS_CONFIG_HMM() 1
|
||||
#else
|
||||
|
||||
@@ -1020,7 +1020,7 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
|
||||
.fail_on_unresolved_sto_errors = false,
|
||||
};
|
||||
|
||||
if (dest_gpu && dest_gpu->mem_info.cdmm_enabled) {
|
||||
if (dest_gpu && dest_gpu->parent->cdmm_enabled) {
|
||||
uvm_migrate_args.dst_id = UVM_ID_CPU;
|
||||
uvm_migrate_args.dst_node_id = dest_gpu->parent->closest_cpu_numa_node;
|
||||
uvm_migrate_args.populate_on_cpu_alloc_failures = true;
|
||||
|
||||
@@ -295,7 +295,7 @@ static void *uvm_mmu_page_table_cpu_map(uvm_gpu_t *gpu, uvm_mmu_page_table_alloc
|
||||
{
|
||||
// CDMM implies there are no struct pages corresponding to the
|
||||
// GPU memory physical address.
|
||||
if (gpu->mem_info.cdmm_enabled) {
|
||||
if (gpu->parent->cdmm_enabled) {
|
||||
NvU64 addr = uvm_gpu_chunk_to_sys_addr(&gpu->pmm, phys_alloc->handle.chunk);
|
||||
// Using cached access for coherent systems, there should be no conflicts
|
||||
// for the vidmem region
|
||||
@@ -312,7 +312,7 @@ static void *uvm_mmu_page_table_cpu_map(uvm_gpu_t *gpu, uvm_mmu_page_table_alloc
|
||||
|
||||
static void uvm_mmu_page_table_cpu_unmap(uvm_gpu_t *gpu, uvm_mmu_page_table_alloc_t *phys_alloc, void *ptr)
|
||||
{
|
||||
if (gpu->mem_info.cdmm_enabled)
|
||||
if (gpu->parent->cdmm_enabled)
|
||||
nv_iounmap(ptr, PAGE_SIZE);
|
||||
else
|
||||
kunmap(uvm_mmu_page_table_page(gpu, phys_alloc));
|
||||
|
||||
@@ -3246,11 +3246,117 @@ err:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
unsigned long devmem_start = gpu->parent->devmem->pagemap.range.start;
|
||||
|
||||
return (devmem_start + chunk->address) >> PAGE_SHIFT;
|
||||
}
|
||||
#else // UVM_IS_CONFIG_HMM()
|
||||
static void *devmem_alloc_pagemap(unsigned long size) { return NULL; }
|
||||
static void *devmem_reuse_pagemap(unsigned long size) { return NULL; }
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
|
||||
#if (UVM_CDMM_PAGES_SUPPORTED() || defined(CONFIG_PCI_P2PDMA)) && defined(NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA)
|
||||
static void device_p2p_page_free_wake(struct nv_kref *ref)
|
||||
{
|
||||
uvm_device_p2p_mem_t *p2p_mem = container_of(ref, uvm_device_p2p_mem_t, refcount);
|
||||
wake_up(&p2p_mem->waitq);
|
||||
}
|
||||
|
||||
static void device_p2p_page_free(struct page *page)
|
||||
{
|
||||
uvm_device_p2p_mem_t *p2p_mem = page->zone_device_data;
|
||||
|
||||
page->zone_device_data = NULL;
|
||||
nv_kref_put(&p2p_mem->refcount, device_p2p_page_free_wake);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if UVM_CDMM_PAGES_SUPPORTED()
|
||||
static void device_coherent_page_free(struct page *page)
|
||||
{
|
||||
device_p2p_page_free(page);
|
||||
}
|
||||
|
||||
static const struct dev_pagemap_ops uvm_device_coherent_pgmap_ops =
|
||||
{
|
||||
.page_free = device_coherent_page_free,
|
||||
};
|
||||
|
||||
static NV_STATUS uvm_pmm_cdmm_init(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_pmm_gpu_devmem_t *devmem;
|
||||
void *ptr;
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(!uvm_hmm_is_enabled_system_wide());
|
||||
|
||||
list_for_each_entry(devmem, &g_uvm_global.devmem_ranges.list, list_node) {
|
||||
if (devmem->pagemap.range.start == parent_gpu->system_bus.memory_window_start) {
|
||||
UVM_ASSERT(devmem->pagemap.type == MEMORY_DEVICE_COHERENT);
|
||||
UVM_ASSERT(devmem->pagemap.range.end ==
|
||||
SUBSECTION_ALIGN_UP(parent_gpu->system_bus.memory_window_end >> PAGE_SHIFT) << PAGE_SHIFT);
|
||||
list_del(&devmem->list_node);
|
||||
parent_gpu->devmem = devmem;
|
||||
parent_gpu->device_p2p_initialised = true;
|
||||
return NV_OK;
|
||||
}
|
||||
}
|
||||
|
||||
devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
|
||||
if (!devmem)
|
||||
goto err;
|
||||
|
||||
devmem->size = parent_gpu->system_bus.memory_window_end - parent_gpu->system_bus.memory_window_start;
|
||||
devmem->pagemap.type = MEMORY_DEVICE_COHERENT;
|
||||
devmem->pagemap.range.start = parent_gpu->system_bus.memory_window_start;
|
||||
devmem->pagemap.range.end = SUBSECTION_ALIGN_UP(parent_gpu->system_bus.memory_window_end >> PAGE_SHIFT) << PAGE_SHIFT;
|
||||
devmem->pagemap.nr_range = 1;
|
||||
devmem->pagemap.ops = &uvm_device_coherent_pgmap_ops;
|
||||
devmem->pagemap.owner = &g_uvm_global;
|
||||
|
||||
// Numa node ID doesn't matter for ZONE_DEVICE coherent pages.
|
||||
ptr = memremap_pages(&devmem->pagemap, NUMA_NO_NODE);
|
||||
if (IS_ERR(ptr)) {
|
||||
UVM_ERR_PRINT("memremap_pages() err %ld\n", PTR_ERR(ptr));
|
||||
status = errno_to_nv_status(PTR_ERR(ptr));
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
parent_gpu->devmem = devmem;
|
||||
parent_gpu->device_p2p_initialised = true;
|
||||
|
||||
return NV_OK;
|
||||
|
||||
err_free:
|
||||
kfree(devmem);
|
||||
|
||||
err:
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
static void uvm_pmm_cdmm_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
parent_gpu->device_p2p_initialised = false;
|
||||
list_add_tail(&parent_gpu->devmem->list_node, &g_uvm_global.devmem_ranges.list);
|
||||
parent_gpu->devmem = NULL;
|
||||
}
|
||||
#else // UVM_CDMM_PAGES_SUPPORTED
|
||||
static NV_STATUS uvm_pmm_cdmm_init(uvm_parent_gpu_t *parent_gpu) { return NV_OK; }
|
||||
static void uvm_pmm_cdmm_deinit(uvm_parent_gpu_t *parent_gpu) {}
|
||||
#endif // UVM_CDMM_PAGES_SUPPORTED
|
||||
|
||||
#if UVM_IS_CONFIG_HMM() || UVM_CDMM_PAGES_SUPPORTED()
|
||||
NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu)
|
||||
{
|
||||
// Create a DEVICE_PRIVATE page for every GPU page available on the parent.
|
||||
unsigned long size = gpu->max_allocatable_address;
|
||||
|
||||
if (gpu->cdmm_enabled)
|
||||
return uvm_pmm_cdmm_init(gpu);
|
||||
|
||||
if (!uvm_hmm_is_enabled_system_wide()) {
|
||||
gpu->devmem = NULL;
|
||||
return NV_OK;
|
||||
@@ -3268,6 +3374,11 @@ NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu)
|
||||
|
||||
void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *gpu)
|
||||
{
|
||||
if (gpu->cdmm_enabled && gpu->devmem) {
|
||||
uvm_pmm_cdmm_deinit(gpu);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!gpu->devmem)
|
||||
return;
|
||||
|
||||
@@ -3282,31 +3393,18 @@ void uvm_pmm_devmem_exit(void)
|
||||
list_for_each_entry_safe(devmem, devmem_next, &g_uvm_global.devmem_ranges.list, list_node) {
|
||||
list_del(&devmem->list_node);
|
||||
memunmap_pages(&devmem->pagemap);
|
||||
release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range));
|
||||
if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
|
||||
release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range));
|
||||
kfree(devmem);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
unsigned long devmem_start = gpu->parent->devmem->pagemap.range.start;
|
||||
|
||||
return (devmem_start + chunk->address) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
#else
|
||||
NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu) { return NV_OK; }
|
||||
void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *gpu) {}
|
||||
void uvm_pmm_devmem_exit(void) {}
|
||||
#endif
|
||||
|
||||
#if !UVM_IS_CONFIG_HMM()
|
||||
NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *gpu)
|
||||
{
|
||||
}
|
||||
|
||||
static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
|
||||
{
|
||||
return true;
|
||||
@@ -3318,41 +3416,32 @@ static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
|
||||
// TODO: Bug 5303506: ARM64: P2PDMA pages cannot be accessed from the CPU on
|
||||
// ARM
|
||||
#if defined(CONFIG_PCI_P2PDMA) && defined(NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA) && !defined(NVCPU_AARCH64)
|
||||
static void device_p2p_page_free_wake(struct nv_kref *ref)
|
||||
{
|
||||
uvm_device_p2p_mem_t *p2p_mem = container_of(ref, uvm_device_p2p_mem_t, refcount);
|
||||
wake_up(&p2p_mem->waitq);
|
||||
}
|
||||
|
||||
static void device_p2p_page_free(struct page *page)
|
||||
{
|
||||
uvm_device_p2p_mem_t *p2p_mem = page->zone_device_data;
|
||||
|
||||
page->zone_device_data = NULL;
|
||||
nv_kref_put(&p2p_mem->refcount, device_p2p_page_free_wake);
|
||||
}
|
||||
|
||||
static const struct dev_pagemap_ops uvm_device_p2p_pgmap_ops =
|
||||
{
|
||||
.page_free = device_p2p_page_free,
|
||||
};
|
||||
|
||||
void uvm_pmm_gpu_device_p2p_init(uvm_gpu_t *gpu)
|
||||
void uvm_pmm_gpu_device_p2p_init(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
unsigned long pci_start_pfn = pci_resource_start(gpu->parent->pci_dev,
|
||||
uvm_device_p2p_static_bar(gpu)) >> PAGE_SHIFT;
|
||||
unsigned long pci_end_pfn = pci_start_pfn + (gpu->mem_info.static_bar1_size >> PAGE_SHIFT);
|
||||
unsigned long pci_start_pfn = pci_resource_start(parent_gpu->pci_dev,
|
||||
uvm_device_p2p_static_bar(parent_gpu)) >> PAGE_SHIFT;
|
||||
unsigned long pci_end_pfn = pci_start_pfn + (parent_gpu->static_bar1_size >> PAGE_SHIFT);
|
||||
struct page *p2p_page;
|
||||
|
||||
gpu->device_p2p_initialised = false;
|
||||
uvm_mutex_init(&gpu->device_p2p_lock, UVM_LOCK_ORDER_GLOBAL);
|
||||
if (uvm_parent_gpu_is_coherent(parent_gpu)) {
|
||||
// P2PDMA support with CDMM enabled requires special
|
||||
// MEMORY_DEVICE_COHERENT pages to have been allocated which will have
|
||||
// also set the p2p initialised state if successful.
|
||||
if (parent_gpu->cdmm_enabled)
|
||||
return;
|
||||
|
||||
if (uvm_parent_gpu_is_coherent(gpu->parent)) {
|
||||
// A coherent system uses normal struct pages.
|
||||
gpu->device_p2p_initialised = true;
|
||||
parent_gpu->device_p2p_initialised = true;
|
||||
return;
|
||||
}
|
||||
|
||||
parent_gpu->device_p2p_initialised = false;
|
||||
|
||||
// RM sets static_bar1_size when it has created a contiguous BAR mapping
|
||||
// large enough to cover all of GPU memory that will be allocated to
|
||||
// userspace buffers. This is required to support the P2PDMA feature to
|
||||
@@ -3364,10 +3453,10 @@ void uvm_pmm_gpu_device_p2p_init(uvm_gpu_t *gpu)
|
||||
// can be disabled by setting the RmForceDisableIomapWC regkey which allows
|
||||
// creation of the P2PDMA pages.
|
||||
// TODO: Bug 5044562: P2PDMA pages require the PCIe BAR to be mapped UC
|
||||
if (!gpu->mem_info.static_bar1_size || gpu->mem_info.static_bar1_write_combined)
|
||||
if (!parent_gpu->static_bar1_size || parent_gpu->static_bar1_write_combined)
|
||||
return;
|
||||
|
||||
if (pci_p2pdma_add_resource(gpu->parent->pci_dev, uvm_device_p2p_static_bar(gpu), 0, 0)) {
|
||||
if (pci_p2pdma_add_resource(parent_gpu->pci_dev, uvm_device_p2p_static_bar(parent_gpu), 0, 0)) {
|
||||
UVM_ERR_PRINT("Unable to initialse PCI P2PDMA pages\n");
|
||||
return;
|
||||
}
|
||||
@@ -3383,46 +3472,40 @@ void uvm_pmm_gpu_device_p2p_init(uvm_gpu_t *gpu)
|
||||
for (; page_to_pfn(p2p_page) < pci_end_pfn; p2p_page++)
|
||||
p2p_page->zone_device_data = NULL;
|
||||
|
||||
gpu->device_p2p_initialised = true;
|
||||
parent_gpu->device_p2p_initialised = true;
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_device_p2p_deinit(uvm_gpu_t *gpu)
|
||||
void uvm_pmm_gpu_device_p2p_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
unsigned long pci_start_pfn = pci_resource_start(gpu->parent->pci_dev,
|
||||
uvm_device_p2p_static_bar(gpu)) >> PAGE_SHIFT;
|
||||
unsigned long pci_start_pfn = pci_resource_start(parent_gpu->pci_dev,
|
||||
uvm_device_p2p_static_bar(parent_gpu)) >> PAGE_SHIFT;
|
||||
struct page *p2p_page;
|
||||
|
||||
if (gpu->device_p2p_initialised && !uvm_parent_gpu_is_coherent(gpu->parent)) {
|
||||
if (parent_gpu->device_p2p_initialised && !uvm_parent_gpu_is_coherent(parent_gpu)) {
|
||||
p2p_page = pfn_to_page(pci_start_pfn);
|
||||
devm_memunmap_pages(&gpu->parent->pci_dev->dev, page_pgmap(p2p_page));
|
||||
devm_memunmap_pages(&parent_gpu->pci_dev->dev, page_pgmap(p2p_page));
|
||||
}
|
||||
|
||||
gpu->device_p2p_initialised = false;
|
||||
parent_gpu->device_p2p_initialised = false;
|
||||
}
|
||||
#else // CONFIG_PCI_P2PDMA
|
||||
|
||||
// Coherent platforms can do P2PDMA without CONFIG_PCI_P2PDMA
|
||||
void uvm_pmm_gpu_device_p2p_init(uvm_gpu_t *gpu)
|
||||
void uvm_pmm_gpu_device_p2p_init(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
gpu->device_p2p_initialised = false;
|
||||
uvm_mutex_init(&gpu->device_p2p_lock, UVM_LOCK_ORDER_GLOBAL);
|
||||
|
||||
if (uvm_parent_gpu_is_coherent(gpu->parent)) {
|
||||
// CDMM implies that there are no struct pages corresponding to
|
||||
// the GPU memory. P2PDMA struct pages which are required for
|
||||
// device P2P mappings are not currently supported on ARM.
|
||||
if (gpu->mem_info.cdmm_enabled)
|
||||
if (uvm_parent_gpu_is_coherent(parent_gpu)) {
|
||||
if (parent_gpu->cdmm_enabled)
|
||||
return;
|
||||
|
||||
// A coherent system uses normal struct pages.
|
||||
gpu->device_p2p_initialised = true;
|
||||
parent_gpu->device_p2p_initialised = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_device_p2p_deinit(uvm_gpu_t *gpu)
|
||||
void uvm_pmm_gpu_device_p2p_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
gpu->device_p2p_initialised = false;
|
||||
parent_gpu->device_p2p_initialised = false;
|
||||
}
|
||||
#endif // CONFIG_PCI_P2PDMA
|
||||
|
||||
|
||||
@@ -190,8 +190,7 @@ typedef uvm_chunk_size_t uvm_chunk_sizes_mask_t;
|
||||
|
||||
typedef struct uvm_pmm_gpu_chunk_suballoc_struct uvm_pmm_gpu_chunk_suballoc_t;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
||||
#if UVM_IS_CONFIG_HMM() || defined(NV_MEMORY_DEVICE_COHERENT_PRESENT)
|
||||
typedef struct
|
||||
{
|
||||
// For g_uvm_global.devmem_ranges
|
||||
@@ -205,7 +204,9 @@ typedef struct
|
||||
|
||||
struct dev_pagemap pagemap;
|
||||
} uvm_pmm_gpu_devmem_t;
|
||||
#endif
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
typedef struct uvm_pmm_gpu_struct uvm_pmm_gpu_t;
|
||||
|
||||
// Return the GPU chunk for a given device private struct page.
|
||||
@@ -219,19 +220,18 @@ uvm_gpu_id_t uvm_pmm_devmem_page_to_gpu_id(struct page *page);
|
||||
|
||||
// Return the PFN of the device private struct page for the given GPU chunk.
|
||||
unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
|
||||
#endif
|
||||
|
||||
// Allocate and initialise struct page data in the kernel to support HMM.
|
||||
NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu);
|
||||
void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
void uvm_pmm_gpu_device_p2p_init(uvm_parent_gpu_t *gpu);
|
||||
void uvm_pmm_gpu_device_p2p_deinit(uvm_parent_gpu_t *gpu);
|
||||
|
||||
// Free unused ZONE_DEVICE pages.
|
||||
void uvm_pmm_devmem_exit(void);
|
||||
|
||||
#else
|
||||
static inline void uvm_pmm_devmem_exit(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
void uvm_pmm_gpu_device_p2p_init(uvm_gpu_t *gpu);
|
||||
void uvm_pmm_gpu_device_p2p_deinit(uvm_gpu_t *gpu);
|
||||
|
||||
struct uvm_gpu_chunk_struct
|
||||
{
|
||||
// Physical address of GPU chunk. This may be removed to save memory
|
||||
@@ -627,10 +627,6 @@ static uvm_chunk_size_t uvm_chunk_find_prev_size(uvm_chunk_sizes_mask_t chunk_si
|
||||
// retained, and it's up to the caller to release them.
|
||||
NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region_size, uvm_reverse_map_t *out_mappings);
|
||||
|
||||
// Allocate and initialise struct page data in the kernel to support HMM.
|
||||
NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu);
|
||||
void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Iterates over every size in the input mask from smallest to largest
|
||||
#define for_each_chunk_size(__size, __chunk_sizes) \
|
||||
for ((__size) = (__chunk_sizes) ? uvm_chunk_find_first_size(__chunk_sizes) : \
|
||||
|
||||
@@ -47,6 +47,11 @@ void uvm_va_range_device_p2p_exit(void)
|
||||
kmem_cache_destroy_safe(&g_uvm_va_range_device_p2p_cache);
|
||||
}
|
||||
|
||||
static bool device_p2p_uses_zone_device(uvm_parent_gpu_t *gpu)
|
||||
{
|
||||
return !uvm_parent_gpu_is_coherent(gpu) || gpu->cdmm_enabled;
|
||||
}
|
||||
|
||||
static NvU64 p2p_mem_page_count(uvm_device_p2p_mem_t *p2p_mem)
|
||||
{
|
||||
return (p2p_mem->pfn_count * p2p_mem->page_size) >> PAGE_SHIFT;
|
||||
@@ -74,9 +79,11 @@ void uvm_va_range_free_device_p2p_mem(uvm_device_p2p_mem_t *p2p_mem)
|
||||
NvU64 i;
|
||||
uvm_gpu_t *gpu = p2p_mem->gpu;
|
||||
|
||||
// In the coherent case we don't hold references on the page because RM does
|
||||
// via the duplicated handle.
|
||||
if (!uvm_parent_gpu_is_coherent(gpu->parent)) {
|
||||
// If normal, non-zone-device pages are used for providing device p2p
|
||||
// functionality RM will already hold a reference on the page via the
|
||||
// duplicated handle. Therefore UVM skipped taking a reference on the page
|
||||
// so don't need to return one.
|
||||
if (device_p2p_uses_zone_device(gpu->parent)) {
|
||||
uvm_mutex_lock(&gpu->device_p2p_lock);
|
||||
|
||||
// It's possible that another range has been setup for the handle since
|
||||
@@ -128,13 +135,13 @@ static void deinit_device_p2p_mem(uvm_device_p2p_mem_t *p2p_mem, struct list_hea
|
||||
// scheduling work which may not happen holding the va_space lock. Coherent
|
||||
// systems don't need to take the lock because the p2p_mem objects are not
|
||||
// shared between multiple va_ranges.
|
||||
if (!uvm_parent_gpu_is_coherent(p2p_mem->gpu->parent))
|
||||
if (device_p2p_uses_zone_device(p2p_mem->gpu->parent))
|
||||
uvm_mutex_lock_nested(&p2p_mem->gpu->device_p2p_lock);
|
||||
|
||||
p2p_mem->deferred_free_list = deferred_free_list;
|
||||
nv_kref_put(&p2p_mem->va_range_count, put_device_p2p_mem);
|
||||
|
||||
if (!uvm_parent_gpu_is_coherent(p2p_mem->gpu->parent))
|
||||
if (device_p2p_uses_zone_device(p2p_mem->gpu->parent))
|
||||
uvm_mutex_unlock_nested(&p2p_mem->gpu->device_p2p_lock);
|
||||
}
|
||||
|
||||
@@ -225,6 +232,8 @@ static NV_STATUS get_gpu_pfns(uvm_gpu_t *gpu,
|
||||
// start address or system memory start address and right shifting by
|
||||
// PAGE_SHIFT.
|
||||
for (i = 0; i < ext_mapping_info.numWrittenPhysAddrs; i++)
|
||||
// MEMORY_DEVICE_COHERENT pages are in the system memory window so are
|
||||
// the same as normal struct pages for the purposes of calculating pfn.
|
||||
if (uvm_parent_gpu_is_coherent(gpu->parent)) {
|
||||
NvU64 last_pfn = gpu->parent->system_bus.memory_window_end >> PAGE_SHIFT;
|
||||
|
||||
@@ -234,9 +243,9 @@ static NV_STATUS get_gpu_pfns(uvm_gpu_t *gpu,
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
else {
|
||||
NvU64 last_pfn = ((gpu->mem_info.static_bar1_start + gpu->mem_info.static_bar1_size) >> PAGE_SHIFT) - 1;
|
||||
NvU64 last_pfn = ((gpu->parent->static_bar1_start + gpu->parent->static_bar1_size) >> PAGE_SHIFT) - 1;
|
||||
|
||||
pfns[i] = (gpu->mem_info.static_bar1_start + pfns[i]) >> PAGE_SHIFT;
|
||||
pfns[i] = (gpu->parent->static_bar1_start + pfns[i]) >> PAGE_SHIFT;
|
||||
UVM_ASSERT(pfns[i] <= last_pfn);
|
||||
if (pfns[i] > last_pfn)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
@@ -253,27 +262,26 @@ static bool pci_p2pdma_page_free(struct page *page) {
|
||||
// page->zone_device_data does not exist in kernels versions older than v5.3
|
||||
// which don't support CONFIG_PCI_P2PDMA. Therefore we need these accessor
|
||||
// functions to ensure compilation succeeeds on older kernels.
|
||||
static void pci_p2pdma_page_set_zone_device_data(struct page *page, void *zone_device_data)
|
||||
static void page_set_zone_device_data(struct page *page, void *zone_device_data)
|
||||
{
|
||||
page->zone_device_data = zone_device_data;
|
||||
}
|
||||
|
||||
static void *pci_p2pdma_page_get_zone_device_data(struct page *page)
|
||||
static void *page_get_zone_device_data(struct page *page)
|
||||
{
|
||||
return page->zone_device_data;
|
||||
}
|
||||
#else
|
||||
static bool pci_p2pdma_page_free(struct page *page) {
|
||||
UVM_ASSERT(0);
|
||||
return false;
|
||||
}
|
||||
|
||||
static void pci_p2pdma_page_set_zone_device_data(struct page *page, void *zone_device_data)
|
||||
static void page_set_zone_device_data(struct page *page, void *zone_device_data)
|
||||
{
|
||||
UVM_ASSERT(0);
|
||||
}
|
||||
|
||||
static void *pci_p2pdma_page_get_zone_device_data(struct page *page)
|
||||
static void *page_get_zone_device_data(struct page *page)
|
||||
{
|
||||
UVM_ASSERT(0);
|
||||
return NULL;
|
||||
@@ -335,7 +343,7 @@ static NV_STATUS alloc_device_p2p_mem(uvm_gpu_t *gpu,
|
||||
for (i = 0; i < p2p_mem_page_count(p2p_mem); i++) {
|
||||
struct page *page = p2p_mem_get_page(p2p_mem, i);
|
||||
|
||||
if (!pci_p2pdma_page_free(page)) {
|
||||
if (!gpu->parent->cdmm_enabled && !pci_p2pdma_page_free(page)) {
|
||||
UVM_ASSERT(0);
|
||||
|
||||
// This will leak the RM handle because we don't release it.
|
||||
@@ -345,7 +353,23 @@ static NV_STATUS alloc_device_p2p_mem(uvm_gpu_t *gpu,
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
pci_p2pdma_page_set_zone_device_data(page, p2p_mem);
|
||||
page_set_zone_device_data(page, p2p_mem);
|
||||
|
||||
#if UVM_CDMM_PAGES_SUPPORTED()
|
||||
// RM doesn't use DEVICE_COHERENT pages and therefore won't already hold
|
||||
// a reference to them, so take one now if using DEVICE_COHERENT pages.
|
||||
if (gpu->parent->cdmm_enabled) {
|
||||
get_page(page);
|
||||
get_dev_pagemap(page_to_pfn(page), NULL);
|
||||
}
|
||||
#else
|
||||
// CDMM P2PDMA will never be enabled for this case
|
||||
if (gpu->parent->cdmm_enabled) {
|
||||
UVM_ASSERT(0);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
nv_kref_get(&p2p_mem->refcount);
|
||||
}
|
||||
|
||||
@@ -400,7 +424,7 @@ static NV_STATUS alloc_pci_device_p2p(uvm_gpu_t *gpu,
|
||||
// also ensures if we don't find a p2p_mem object that we don't race with
|
||||
// some other thread assigning or clearing zone_device_data.
|
||||
uvm_mutex_lock(&gpu->device_p2p_lock);
|
||||
p2p_mem = pci_p2pdma_page_get_zone_device_data(pfn_to_page(pfn));
|
||||
p2p_mem = page_get_zone_device_data(pfn_to_page(pfn));
|
||||
if (!p2p_mem) {
|
||||
// We have not previously allocated p2pdma pages for this RM handle so do
|
||||
// so now.
|
||||
@@ -513,12 +537,12 @@ NV_STATUS uvm_api_alloc_device_p2p(UVM_ALLOC_DEVICE_P2P_PARAMS *params, struct f
|
||||
if (!gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
if (!gpu->device_p2p_initialised) {
|
||||
if (!gpu->parent->device_p2p_initialised) {
|
||||
status = NV_ERR_NOT_SUPPORTED;
|
||||
goto out_release;
|
||||
}
|
||||
|
||||
if (uvm_parent_gpu_is_coherent(gpu->parent)) {
|
||||
if (uvm_parent_gpu_is_coherent(gpu->parent) && !device_p2p_uses_zone_device(gpu->parent)) {
|
||||
status = alloc_coherent_device_p2p(gpu, params->hClient, params->hMemory, &p2p_mem);
|
||||
if (status != NV_OK)
|
||||
goto out_release;
|
||||
|
||||
@@ -940,7 +940,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
*numa_enabled = NV_TRUE;
|
||||
*numa_node_id = (NvS32)uvm_gpu_numa_node(gpu);
|
||||
}
|
||||
else if (gpu->parent->is_integrated_gpu || gpu->mem_info.cdmm_enabled) {
|
||||
else if (gpu->parent->is_integrated_gpu || gpu->parent->cdmm_enabled) {
|
||||
*numa_enabled = NV_FALSE;
|
||||
*numa_node_id = (NvS32)gpu->parent->closest_cpu_numa_node;
|
||||
}
|
||||
|
||||
@@ -83,6 +83,7 @@ static NvU32 nvidia_modeset_enumerate_gpus(nv_gpu_info_t *gpu_info)
|
||||
for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next)
|
||||
{
|
||||
nv_state_t *nv = NV_STATE_PTR(nvl);
|
||||
int numa_status = nv_get_numa_status(nvl);
|
||||
|
||||
/*
|
||||
* The gpu_info[] array has NV_MAX_GPUS elements. Fail if there
|
||||
@@ -102,6 +103,10 @@ static NvU32 nvidia_modeset_enumerate_gpus(nv_gpu_info_t *gpu_info)
|
||||
gpu_info[count].pci_info.slot = nv->pci_info.slot;
|
||||
gpu_info[count].pci_info.function = nv->pci_info.function;
|
||||
|
||||
gpu_info->needs_numa_setup =
|
||||
numa_status != NV_IOCTL_NUMA_STATUS_DISABLED &&
|
||||
numa_status != NV_IOCTL_NUMA_STATUS_ONLINE;
|
||||
|
||||
gpu_info[count].os_device_ptr = nvl->dev;
|
||||
|
||||
count++;
|
||||
|
||||
@@ -2458,6 +2458,12 @@ nvidia_ioctl(
|
||||
{
|
||||
nv_ioctl_wait_open_complete_t *params = arg_copy;
|
||||
|
||||
if (arg_size != sizeof(nv_ioctl_wait_open_complete_t))
|
||||
{
|
||||
status = -EINVAL;
|
||||
goto done_early;
|
||||
}
|
||||
|
||||
params->rc = nvlfp->open_rc;
|
||||
params->adapterStatus = nvlfp->adapter_status;
|
||||
goto done_early;
|
||||
@@ -2538,8 +2544,12 @@ nvidia_ioctl(
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* atomically check and alloc attached_gpus */
|
||||
down(&nvl->ldata_lock);
|
||||
|
||||
if (nvlfp->num_attached_gpus != 0)
|
||||
{
|
||||
up(&nvl->ldata_lock);
|
||||
status = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
@@ -2547,12 +2557,15 @@ nvidia_ioctl(
|
||||
NV_KMALLOC(nvlfp->attached_gpus, arg_size);
|
||||
if (nvlfp->attached_gpus == NULL)
|
||||
{
|
||||
up(&nvl->ldata_lock);
|
||||
status = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
memcpy(nvlfp->attached_gpus, arg_copy, arg_size);
|
||||
nvlfp->num_attached_gpus = num_arg_gpus;
|
||||
|
||||
up(&nvl->ldata_lock);
|
||||
|
||||
for (i = 0; i < nvlfp->num_attached_gpus; i++)
|
||||
{
|
||||
if (nvlfp->attached_gpus[i] == 0)
|
||||
@@ -2568,9 +2581,14 @@ nvidia_ioctl(
|
||||
nvidia_dev_put(nvlfp->attached_gpus[i], sp);
|
||||
}
|
||||
|
||||
/* atomically free attached_gpus */
|
||||
down(&nvl->ldata_lock);
|
||||
|
||||
NV_KFREE(nvlfp->attached_gpus, arg_size);
|
||||
nvlfp->num_attached_gpus = 0;
|
||||
|
||||
up(&nvl->ldata_lock);
|
||||
|
||||
status = -EINVAL;
|
||||
break;
|
||||
}
|
||||
@@ -4504,18 +4522,18 @@ nvidia_suspend(
|
||||
|
||||
down(&nvl->ldata_lock);
|
||||
|
||||
if (!nv->is_pm_supported)
|
||||
{
|
||||
status = NV_ERR_NOT_SUPPORTED;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (((nv->flags & NV_FLAG_OPEN) == 0) &&
|
||||
((nv->flags & NV_FLAG_PERSISTENT_SW_STATE) == 0))
|
||||
{
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (nv->is_pm_unsupported)
|
||||
{
|
||||
status = NV_ERR_NOT_SUPPORTED;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if ((nv->flags & NV_FLAG_SUSPENDED) != 0)
|
||||
{
|
||||
nvl->suspend_count++;
|
||||
|
||||
Reference in New Issue
Block a user