mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-02-07 00:29:58 +00:00
570.124.04
This commit is contained in:
@@ -612,6 +612,42 @@ nv_dma_buf_unmap_pfns(
|
||||
}
|
||||
}
|
||||
|
||||
static NvU32
|
||||
nv_dma_buf_get_sg_count (
|
||||
struct device *dev,
|
||||
nv_dma_buf_file_private_t *priv,
|
||||
NvU32 *max_seg_size
|
||||
)
|
||||
{
|
||||
NvU32 dma_max_seg_size, i;
|
||||
NvU32 nents = 0;
|
||||
|
||||
dma_max_seg_size = NV_ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE);
|
||||
if (dma_max_seg_size < PAGE_SIZE)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Calculate nents needed to allocate sg_table
|
||||
for (i = 0; i < priv->num_objects; i++)
|
||||
{
|
||||
NvU32 range_count = priv->handles[i].memArea.numRanges;
|
||||
NvU32 index;
|
||||
|
||||
for (index = 0; index < range_count; index++)
|
||||
{
|
||||
NvU64 length = priv->handles[i].memArea.pRanges[index].size;
|
||||
NvU64 count = length + dma_max_seg_size - 1;
|
||||
do_div(count, dma_max_seg_size);
|
||||
nents += count;
|
||||
}
|
||||
}
|
||||
|
||||
*max_seg_size = dma_max_seg_size;
|
||||
|
||||
return nents;
|
||||
}
|
||||
|
||||
static struct sg_table*
|
||||
nv_dma_buf_map_pages (
|
||||
struct device *dev,
|
||||
@@ -620,15 +656,11 @@ nv_dma_buf_map_pages (
|
||||
{
|
||||
struct sg_table *sgt = NULL;
|
||||
struct scatterlist *sg;
|
||||
NvU32 nents = 0;
|
||||
NvU32 i;
|
||||
NvU32 dma_max_seg_size = 0;
|
||||
NvU32 i, nents;
|
||||
int rc;
|
||||
|
||||
// Calculate nents needed to allocate sg_table
|
||||
for (i = 0; i < priv->num_objects; i++)
|
||||
{
|
||||
nents += priv->handles[i].memArea.numRanges;
|
||||
}
|
||||
nents = nv_dma_buf_get_sg_count(dev, priv, &dma_max_seg_size);
|
||||
|
||||
NV_KZALLOC(sgt, sizeof(struct sg_table));
|
||||
if (sgt == NULL)
|
||||
@@ -650,20 +682,30 @@ nv_dma_buf_map_pages (
|
||||
NvU32 index = 0;
|
||||
for (index = 0; index < range_count; index++)
|
||||
{
|
||||
NvU64 addr = priv->handles[i].memArea.pRanges[index].start;
|
||||
NvU64 len = priv->handles[i].memArea.pRanges[index].size;
|
||||
struct page *page = NV_GET_PAGE_STRUCT(addr);
|
||||
NvU64 dma_addr = priv->handles[i].memArea.pRanges[index].start;
|
||||
NvU64 dma_len = priv->handles[i].memArea.pRanges[index].size;
|
||||
|
||||
if ((page == NULL) || (sg == NULL))
|
||||
// Split each range into dma_max_seg_size chunks
|
||||
while(dma_len != 0)
|
||||
{
|
||||
goto free_table;
|
||||
}
|
||||
NvU32 sg_len = NV_MIN(dma_len, dma_max_seg_size);
|
||||
struct page *page = NV_GET_PAGE_STRUCT(dma_addr);
|
||||
|
||||
sg_set_page(sg, page, len, NV_GET_OFFSET_IN_PAGE(addr));
|
||||
sg = sg_next(sg);
|
||||
if ((page == NULL) || (sg == NULL))
|
||||
{
|
||||
goto free_table;
|
||||
}
|
||||
|
||||
sg_set_page(sg, page, sg_len, NV_GET_OFFSET_IN_PAGE(dma_addr));
|
||||
dma_addr += sg_len;
|
||||
dma_len -= sg_len;
|
||||
sg = sg_next(sg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
WARN_ON(sg != NULL);
|
||||
|
||||
// DMA map the sg_table
|
||||
rc = dma_map_sg(dev, sgt->sgl, sgt->orig_nents, DMA_BIDIRECTIONAL);
|
||||
if (rc <= 0)
|
||||
@@ -693,36 +735,16 @@ nv_dma_buf_map_pfns (
|
||||
struct sg_table *sgt = NULL;
|
||||
struct scatterlist *sg;
|
||||
nv_dma_device_t peer_dma_dev = {{ 0 }};
|
||||
NvU32 dma_max_seg_size;
|
||||
NvU32 nents = 0;
|
||||
NvU32 dma_max_seg_size = 0;
|
||||
NvU32 mapped_nents = 0;
|
||||
NvU32 i = 0;
|
||||
NvU32 nents;
|
||||
int rc = 0;
|
||||
|
||||
peer_dma_dev.dev = dev;
|
||||
peer_dma_dev.addressable_range.limit = (NvU64)dev->dma_mask;
|
||||
|
||||
dma_max_seg_size = NV_ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE);
|
||||
|
||||
if (dma_max_seg_size < PAGE_SIZE)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Calculate nents needed to allocate sg_table
|
||||
for (i = 0; i < priv->num_objects; i++)
|
||||
{
|
||||
NvU32 range_count = priv->handles[i].memArea.numRanges;
|
||||
NvU32 index;
|
||||
|
||||
for (index = 0; index < range_count; index++)
|
||||
{
|
||||
NvU64 length = priv->handles[i].memArea.pRanges[index].size;
|
||||
NvU64 count = length + dma_max_seg_size - 1;
|
||||
do_div(count, dma_max_seg_size);
|
||||
nents += count;
|
||||
}
|
||||
}
|
||||
nents = nv_dma_buf_get_sg_count(dev, priv, &dma_max_seg_size);
|
||||
|
||||
NV_KZALLOC(sgt, sizeof(struct sg_table));
|
||||
if (sgt == NULL)
|
||||
@@ -777,6 +799,9 @@ nv_dma_buf_map_pfns (
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
WARN_ON(sg != NULL);
|
||||
|
||||
sgt->nents = mapped_nents;
|
||||
|
||||
WARN_ON(sgt->nents != sgt->orig_nents);
|
||||
|
||||
@@ -445,7 +445,9 @@ static int nvidia_mmap_sysmem(
|
||||
}
|
||||
else
|
||||
{
|
||||
vma->vm_page_prot = nv_adjust_pgprot(vma->vm_page_prot, 0);
|
||||
if (at->flags.unencrypted)
|
||||
vma->vm_page_prot = nv_adjust_pgprot(vma->vm_page_prot, 0);
|
||||
|
||||
ret = vm_insert_page(vma, start,
|
||||
NV_GET_PAGE_STRUCT(at->page_table[j]->phys_addr));
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -308,6 +308,15 @@ static NvU32 find_gpu_numa_nodes_in_srat(nv_linux_state_t *nvl)
|
||||
struct acpi_srat_generic_affinity *gi;
|
||||
NvU32 numa_node = NUMA_NO_NODE;
|
||||
|
||||
if (NV_PCI_DEVFN(nvl->pci_dev) != 0)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: Failing to parse SRAT GI for %04x:%02x:%02x.%x "
|
||||
"since non-zero device function is not supported.\n",
|
||||
NV_PCI_DOMAIN_NUMBER(nvl->pci_dev), NV_PCI_BUS_NUMBER(nvl->pci_dev),
|
||||
NV_PCI_SLOT_NUMBER(nvl->pci_dev), PCI_FUNC(nvl->pci_dev->devfn));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (acpi_get_table(ACPI_SIG_SRAT, 0, &table_header)) {
|
||||
nv_printf(NV_DBG_INFO, "NVRM: Failed to parse the SRAT table.\n");
|
||||
return 0;
|
||||
@@ -331,9 +340,14 @@ static NvU32 find_gpu_numa_nodes_in_srat(nv_linux_state_t *nvl)
|
||||
(((unsigned long)subtable_header) + subtable_header_length < table_end)) {
|
||||
|
||||
if (subtable_header->type == ACPI_SRAT_TYPE_GENERIC_AFFINITY) {
|
||||
NvU8 busAtByte2, busAtByte3;
|
||||
gi = (struct acpi_srat_generic_affinity *) subtable_header;
|
||||
busAtByte2 = gi->device_handle[2];
|
||||
busAtByte3 = gi->device_handle[3];
|
||||
|
||||
// Device and function should be zero enforced by above check
|
||||
gi_dbdf = *((NvU16 *)(&gi->device_handle[0])) << 16 |
|
||||
*((NvU16 *)(&gi->device_handle[2]));
|
||||
(busAtByte2 != 0 ? busAtByte2 : busAtByte3) << 8;
|
||||
|
||||
if (gi_dbdf == dev_dbdf) {
|
||||
numa_node = pxm_to_node(gi->proximity_domain);
|
||||
@@ -347,6 +361,31 @@ static NvU32 find_gpu_numa_nodes_in_srat(nv_linux_state_t *nvl)
|
||||
pxm_count = 0;
|
||||
goto exit;
|
||||
}
|
||||
nv_printf(NV_DBG_INFO,
|
||||
"NVRM: matching SRAT GI entry: 0x%x 0x%x 0x%x 0x%x PXM: %d\n",
|
||||
gi->device_handle[3],
|
||||
gi->device_handle[2],
|
||||
gi->device_handle[1],
|
||||
gi->device_handle[0],
|
||||
gi->proximity_domain);
|
||||
if ((busAtByte2) == 0 &&
|
||||
(busAtByte3) != 0)
|
||||
{
|
||||
/*
|
||||
* TODO: Remove this WAR once Hypervisor stack is updated
|
||||
* to fix this bug and after all CSPs have moved to using
|
||||
* the updated Hypervisor stack with fix.
|
||||
*/
|
||||
nv_printf(NV_DBG_WARNINGS,
|
||||
"NVRM: PCIe bus value picked from byte 3 offset in SRAT GI entry: 0x%x 0x%x 0x%x 0x%x PXM: %d\n"
|
||||
"NVRM: Hypervisor stack is old and not following ACPI spec defined offset.\n"
|
||||
"NVRM: Please consider upgrading the Hypervisor stack as this workaround will be removed in future release.\n",
|
||||
gi->device_handle[3],
|
||||
gi->device_handle[2],
|
||||
gi->device_handle[1],
|
||||
gi->device_handle[0],
|
||||
gi->proximity_domain);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -792,7 +831,10 @@ next_bar:
|
||||
NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_DISABLED);
|
||||
nvl->numa_info.node_id = NUMA_NO_NODE;
|
||||
|
||||
nv_init_coherent_link_info(nv);
|
||||
if (pci_devid_is_self_hosted(pci_dev->device))
|
||||
{
|
||||
nv_init_coherent_link_info(nv);
|
||||
}
|
||||
|
||||
#if defined(NVCPU_PPC64LE)
|
||||
// Use HW NUMA support as a proxy for ATS support. This is true in the only
|
||||
|
||||
@@ -1630,17 +1630,25 @@ static void nv_init_mapping_revocation(nv_linux_state_t *nvl,
|
||||
nv_linux_file_private_t *nvlfp,
|
||||
struct inode *inode)
|
||||
{
|
||||
down(&nvl->mmap_lock);
|
||||
|
||||
/* Set up struct address_space for use with unmap_mapping_range() */
|
||||
address_space_init_once(&nvlfp->mapping);
|
||||
nvlfp->mapping.host = inode;
|
||||
nvlfp->mapping.a_ops = inode->i_mapping->a_ops;
|
||||
file->f_mapping = &nvlfp->mapping;
|
||||
}
|
||||
|
||||
/* Add nvlfp to list of open files in nvl for mapping revocation */
|
||||
/* Adds nvlfp to list of open files for mapping revocation */
|
||||
static void nv_add_open_file(nv_linux_state_t *nvl,
|
||||
nv_linux_file_private_t *nvlfp)
|
||||
{
|
||||
nvlfp->nvptr = nvl;
|
||||
|
||||
/*
|
||||
* nvl->open_files and other mapping revocation members in nv_linux_state_t
|
||||
* are protected by nvl->mmap_lock instead of nvl->ldata_lock.
|
||||
*/
|
||||
down(&nvl->mmap_lock);
|
||||
list_add(&nvlfp->entry, &nvl->open_files);
|
||||
|
||||
up(&nvl->mmap_lock);
|
||||
}
|
||||
|
||||
@@ -1690,11 +1698,12 @@ static void nvidia_open_deferred(void *nvlfp_raw)
|
||||
*/
|
||||
down(&nvl->ldata_lock);
|
||||
rc = nv_open_device_for_nvlfp(NV_STATE_PTR(nvl), nvlfp->sp, nvlfp);
|
||||
up(&nvl->ldata_lock);
|
||||
|
||||
/* Set nvptr only upon success (where nvl->usage_count is incremented) */
|
||||
/* Only add open file tracking where nvl->usage_count is incremented */
|
||||
if (rc == 0)
|
||||
nvlfp->nvptr = nvl;
|
||||
nv_add_open_file(nvl, nvlfp);
|
||||
|
||||
up(&nvl->ldata_lock);
|
||||
|
||||
complete_all(&nvlfp->open_complete);
|
||||
}
|
||||
@@ -1813,6 +1822,7 @@ nvidia_open(
|
||||
}
|
||||
|
||||
nv = NV_STATE_PTR(nvl);
|
||||
nv_init_mapping_revocation(nvl, file, nvlfp, inode);
|
||||
|
||||
if (nv_try_lock_foreground_open(file, nvl) == 0)
|
||||
{
|
||||
@@ -1823,11 +1833,11 @@ nvidia_open(
|
||||
|
||||
rc = nv_open_device_for_nvlfp(nv, nvlfp->sp, nvlfp);
|
||||
|
||||
up(&nvl->ldata_lock);
|
||||
|
||||
/* Set nvptr only upon success (where nvl->usage_count is incremented) */
|
||||
/* Only add open file tracking where nvl->usage_count is incremented */
|
||||
if (rc == 0)
|
||||
nvlfp->nvptr = nvl;
|
||||
nv_add_open_file(nvl, nvlfp);
|
||||
|
||||
up(&nvl->ldata_lock);
|
||||
|
||||
complete_all(&nvlfp->open_complete);
|
||||
}
|
||||
@@ -1882,10 +1892,6 @@ failed:
|
||||
NV_SET_FILE_PRIVATE(file, NULL);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nv_init_mapping_revocation(nvl, file, nvlfp, inode);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -1672,7 +1672,7 @@ NV_STATUS NV_API_CALL os_alloc_pages_node
|
||||
NV_STATUS status = NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
#if defined(__GFP_THISNODE) && defined(GFP_HIGHUSER_MOVABLE) && \
|
||||
defined(__GFP_COMP) && defined(__GFP_NORETRY) && defined(__GFP_NOWARN)
|
||||
defined(__GFP_COMP) && defined(__GFP_NOWARN)
|
||||
gfp_t gfp_mask;
|
||||
struct page *alloc_addr;
|
||||
unsigned int order = get_order(size);
|
||||
@@ -1689,13 +1689,11 @@ NV_STATUS NV_API_CALL os_alloc_pages_node
|
||||
* pages, which is needed in order to use
|
||||
* vm_insert_page API.
|
||||
*
|
||||
* 4. __GFP_NORETRY: Used to avoid the Linux kernel OOM killer.
|
||||
*
|
||||
* 5. __GFP_NOWARN: Used to avoid a WARN_ON in the slowpath if
|
||||
* 4. __GFP_NOWARN: Used to avoid a WARN_ON in the slowpath if
|
||||
* the requested order is too large (just fail
|
||||
* instead).
|
||||
*
|
||||
* 6. (Optional) __GFP_RECLAIM: Used to allow/forbid reclaim.
|
||||
* 5. (Optional) __GFP_RECLAIM: Used to allow/forbid reclaim.
|
||||
* This is part of GFP_USER and consequently
|
||||
* GFP_HIGHUSER_MOVABLE.
|
||||
*
|
||||
@@ -1709,7 +1707,30 @@ NV_STATUS NV_API_CALL os_alloc_pages_node
|
||||
*/
|
||||
|
||||
gfp_mask = __GFP_THISNODE | GFP_HIGHUSER_MOVABLE | __GFP_COMP |
|
||||
__GFP_NORETRY | __GFP_NOWARN;
|
||||
__GFP_NOWARN;
|
||||
|
||||
#if defined(__GFP_RETRY_MAYFAIL)
|
||||
|
||||
/*
|
||||
* __GFP_RETRY_MAYFAIL : Used to avoid the Linux kernel OOM killer.
|
||||
* To help PMA on paths where UVM might be
|
||||
* in memory over subscription. This gives UVM
|
||||
* a chance to free memory before invoking any
|
||||
* action from the OOM killer.
|
||||
* Freeing non-essential memory will also benefit
|
||||
* the system as a whole.
|
||||
*/
|
||||
|
||||
gfp_mask |= __GFP_RETRY_MAYFAIL;
|
||||
#elif defined(__GFP_NORETRY)
|
||||
|
||||
/*
|
||||
* __GFP_NORETRY : Use __GFP_NORETRY on older kernels where
|
||||
* __GFP_RETRY_MAYFAIL is not present.
|
||||
*/
|
||||
|
||||
gfp_mask |= __GFP_NORETRY;
|
||||
#endif
|
||||
|
||||
#if defined(__GFP_RECLAIM)
|
||||
if (flag & NV_ALLOC_PAGES_NODE_SKIP_RECLAIM)
|
||||
|
||||
Reference in New Issue
Block a user