570.124.04

2026-02-07 00:29:58 +00:00 · 2025-02-27 17:32:23 +01:00
parent 81fe4fb417
commit 129479b1b7
141 changed files with 102245 additions and 100070 deletions
--- a/kernel-open/nvidia/nv-dmabuf.c
+++ b/kernel-open/nvidia/nv-dmabuf.c
@@ -612,6 +612,42 @@ nv_dma_buf_unmap_pfns(
    }
 }

+static NvU32
+nv_dma_buf_get_sg_count (
+    struct device *dev,
+    nv_dma_buf_file_private_t *priv,
+    NvU32  *max_seg_size
+)
+{
+    NvU32 dma_max_seg_size, i;
+    NvU32 nents = 0;
+
+    dma_max_seg_size = NV_ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE);
+    if (dma_max_seg_size < PAGE_SIZE)
+    {
+        return 0;
+    }
+
+    // Calculate nents needed to allocate sg_table
+    for (i = 0; i < priv->num_objects; i++)
+    {
+        NvU32 range_count = priv->handles[i].memArea.numRanges;
+        NvU32 index;
+
+        for (index = 0; index < range_count; index++)
+        {
+            NvU64 length = priv->handles[i].memArea.pRanges[index].size;
+            NvU64 count = length + dma_max_seg_size - 1;
+            do_div(count, dma_max_seg_size);
+            nents += count;
+        }
+    }
+
+    *max_seg_size = dma_max_seg_size;
+
+    return nents;
+}
+
 static struct sg_table*
 nv_dma_buf_map_pages (
    struct device *dev,
@@ -620,15 +656,11 @@ nv_dma_buf_map_pages (
 {
    struct sg_table *sgt = NULL;
    struct scatterlist *sg;
-    NvU32 nents = 0;
-    NvU32 i;
+    NvU32 dma_max_seg_size = 0;
+    NvU32 i, nents;
    int rc;

-    // Calculate nents needed to allocate sg_table
-    for (i = 0; i < priv->num_objects; i++)
-    {
-        nents += priv->handles[i].memArea.numRanges;
-    }
+    nents = nv_dma_buf_get_sg_count(dev, priv, &dma_max_seg_size);

    NV_KZALLOC(sgt, sizeof(struct sg_table));
    if (sgt == NULL)
@@ -650,20 +682,30 @@ nv_dma_buf_map_pages (
        NvU32 index = 0;
        for (index = 0; index < range_count; index++)
        {
-            NvU64 addr = priv->handles[i].memArea.pRanges[index].start;
-            NvU64 len  = priv->handles[i].memArea.pRanges[index].size;
-            struct page *page = NV_GET_PAGE_STRUCT(addr);
+            NvU64 dma_addr = priv->handles[i].memArea.pRanges[index].start;
+            NvU64 dma_len  = priv->handles[i].memArea.pRanges[index].size;

-            if ((page == NULL) || (sg == NULL))
+            // Split each range into dma_max_seg_size chunks
+            while(dma_len != 0)
            {
-                goto free_table;
-            }
+                NvU32 sg_len = NV_MIN(dma_len, dma_max_seg_size);
+                struct page *page = NV_GET_PAGE_STRUCT(dma_addr);

-            sg_set_page(sg, page, len, NV_GET_OFFSET_IN_PAGE(addr));
-            sg = sg_next(sg);
+                if ((page == NULL) || (sg == NULL))
+                {
+                    goto free_table;
+                }
+
+                sg_set_page(sg, page, sg_len, NV_GET_OFFSET_IN_PAGE(dma_addr));
+                dma_addr += sg_len;
+                dma_len -= sg_len;
+                sg = sg_next(sg);
+            }
        }
    }

+    WARN_ON(sg != NULL);
+
    // DMA map the sg_table
    rc = dma_map_sg(dev, sgt->sgl, sgt->orig_nents, DMA_BIDIRECTIONAL);
    if (rc <= 0)
@@ -693,36 +735,16 @@ nv_dma_buf_map_pfns (
    struct sg_table *sgt = NULL;
    struct scatterlist *sg;
    nv_dma_device_t peer_dma_dev = {{ 0 }};
-    NvU32 dma_max_seg_size;
-    NvU32 nents = 0;
+    NvU32 dma_max_seg_size = 0;
    NvU32 mapped_nents = 0;
    NvU32 i = 0;
+    NvU32 nents;
    int rc = 0;

    peer_dma_dev.dev = dev;
    peer_dma_dev.addressable_range.limit = (NvU64)dev->dma_mask;

-    dma_max_seg_size = NV_ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE);
-
-    if (dma_max_seg_size < PAGE_SIZE)
-    {
-        return NULL;
-    }
-
-    // Calculate nents needed to allocate sg_table
-    for (i = 0; i < priv->num_objects; i++)
-    {
-        NvU32 range_count = priv->handles[i].memArea.numRanges;
-        NvU32 index;
-
-        for (index = 0; index < range_count; index++)
-        {
-            NvU64 length = priv->handles[i].memArea.pRanges[index].size;
-            NvU64 count = length + dma_max_seg_size - 1;
-            do_div(count, dma_max_seg_size);
-            nents += count;
-        }
-    }
+    nents = nv_dma_buf_get_sg_count(dev, priv, &dma_max_seg_size);

    NV_KZALLOC(sgt, sizeof(struct sg_table));
    if (sgt == NULL)
@@ -777,6 +799,9 @@ nv_dma_buf_map_pfns (
            }
        }
    }
+
+    WARN_ON(sg != NULL);
+
    sgt->nents = mapped_nents;

    WARN_ON(sgt->nents != sgt->orig_nents);
--- a/kernel-open/nvidia/nv-mmap.c
+++ b/kernel-open/nvidia/nv-mmap.c
@@ -445,7 +445,9 @@ static int nvidia_mmap_sysmem(
        }
        else
        {
-            vma->vm_page_prot = nv_adjust_pgprot(vma->vm_page_prot, 0);
+            if (at->flags.unencrypted)
+                vma->vm_page_prot = nv_adjust_pgprot(vma->vm_page_prot, 0);
+
            ret = vm_insert_page(vma, start,
                                 NV_GET_PAGE_STRUCT(at->page_table[j]->phys_addr));
        }
--- a/kernel-open/nvidia/nv-pci.c
+++ b/kernel-open/nvidia/nv-pci.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -308,6 +308,15 @@ static NvU32 find_gpu_numa_nodes_in_srat(nv_linux_state_t *nvl)
    struct acpi_srat_generic_affinity *gi;
    NvU32 numa_node = NUMA_NO_NODE;

+    if (NV_PCI_DEVFN(nvl->pci_dev) != 0)
+    {
+        nv_printf(NV_DBG_ERRORS, "NVRM: Failing to parse SRAT GI for %04x:%02x:%02x.%x "
+                  "since non-zero device function is not supported.\n",
+                  NV_PCI_DOMAIN_NUMBER(nvl->pci_dev), NV_PCI_BUS_NUMBER(nvl->pci_dev),
+                  NV_PCI_SLOT_NUMBER(nvl->pci_dev), PCI_FUNC(nvl->pci_dev->devfn));
+        return 0;
+    }
+
    if (acpi_get_table(ACPI_SIG_SRAT, 0, &table_header)) {
        nv_printf(NV_DBG_INFO, "NVRM: Failed to parse the SRAT table.\n");
        return 0;
@@ -331,9 +340,14 @@ static NvU32 find_gpu_numa_nodes_in_srat(nv_linux_state_t *nvl)
           (((unsigned long)subtable_header) + subtable_header_length < table_end)) {

        if (subtable_header->type == ACPI_SRAT_TYPE_GENERIC_AFFINITY) {
+            NvU8 busAtByte2, busAtByte3;
            gi = (struct acpi_srat_generic_affinity *) subtable_header;
+            busAtByte2 = gi->device_handle[2];
+            busAtByte3 = gi->device_handle[3];
+
+            // Device and function should be zero enforced by above check
            gi_dbdf = *((NvU16 *)(&gi->device_handle[0])) << 16 |
-                      *((NvU16 *)(&gi->device_handle[2]));
+                (busAtByte2 != 0 ? busAtByte2 : busAtByte3) << 8;
            
            if (gi_dbdf == dev_dbdf) {
                numa_node = pxm_to_node(gi->proximity_domain);
@@ -347,6 +361,31 @@ static NvU32 find_gpu_numa_nodes_in_srat(nv_linux_state_t *nvl)
                    pxm_count = 0;
                    goto exit;
                }
+                nv_printf(NV_DBG_INFO,
+                          "NVRM: matching SRAT GI entry: 0x%x 0x%x 0x%x 0x%x  PXM: %d\n",
+                          gi->device_handle[3],
+                          gi->device_handle[2],
+                          gi->device_handle[1],
+                          gi->device_handle[0],
+                          gi->proximity_domain);
+                if ((busAtByte2) == 0 &&
+                    (busAtByte3) != 0)
+                {
+                    /*
+                     * TODO: Remove this WAR once Hypervisor stack is updated
+                     * to fix this bug and after all CSPs have moved to using
+                     * the updated Hypervisor stack with fix.
+                     */
+                    nv_printf(NV_DBG_WARNINGS,
+                              "NVRM: PCIe bus value picked from byte 3 offset in SRAT GI entry: 0x%x 0x%x 0x%x 0x%x  PXM: %d\n"
+                              "NVRM: Hypervisor stack is old and not following ACPI spec defined offset.\n"
+                              "NVRM: Please consider upgrading the Hypervisor stack as this workaround will be removed in future release.\n",
+                              gi->device_handle[3],
+                              gi->device_handle[2],
+                              gi->device_handle[1],
+                              gi->device_handle[0],
+                              gi->proximity_domain);
+                }
            }
        }

@@ -792,7 +831,10 @@ next_bar:
    NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_DISABLED);
    nvl->numa_info.node_id = NUMA_NO_NODE;

-    nv_init_coherent_link_info(nv);
+    if (pci_devid_is_self_hosted(pci_dev->device))
+    {
+        nv_init_coherent_link_info(nv);
+    }

 #if defined(NVCPU_PPC64LE)
    // Use HW NUMA support as a proxy for ATS support. This is true in the only
--- a/kernel-open/nvidia/nv.c
+++ b/kernel-open/nvidia/nv.c
@@ -1630,17 +1630,25 @@ static void nv_init_mapping_revocation(nv_linux_state_t *nvl,
                                       nv_linux_file_private_t *nvlfp,
                                       struct inode *inode)
 {
-    down(&nvl->mmap_lock);
-
    /* Set up struct address_space for use with unmap_mapping_range() */
    address_space_init_once(&nvlfp->mapping);
    nvlfp->mapping.host = inode;
    nvlfp->mapping.a_ops = inode->i_mapping->a_ops;
    file->f_mapping = &nvlfp->mapping;
+}

-    /* Add nvlfp to list of open files in nvl for mapping revocation */
+/* Adds nvlfp to list of open files for mapping revocation */
+static void nv_add_open_file(nv_linux_state_t *nvl,
+                             nv_linux_file_private_t *nvlfp)
+{
+    nvlfp->nvptr = nvl;
+
+    /*
+     * nvl->open_files and other mapping revocation members in nv_linux_state_t
+     * are protected by nvl->mmap_lock instead of nvl->ldata_lock.
+     */
+    down(&nvl->mmap_lock);
    list_add(&nvlfp->entry, &nvl->open_files);
-
    up(&nvl->mmap_lock);
 }

@@ -1690,11 +1698,12 @@ static void nvidia_open_deferred(void *nvlfp_raw)
     */
    down(&nvl->ldata_lock);
    rc = nv_open_device_for_nvlfp(NV_STATE_PTR(nvl), nvlfp->sp, nvlfp);
-    up(&nvl->ldata_lock);

-    /* Set nvptr only upon success (where nvl->usage_count is incremented) */
+    /* Only add open file tracking where nvl->usage_count is incremented */
    if (rc == 0)
-        nvlfp->nvptr = nvl;
+        nv_add_open_file(nvl, nvlfp);
+
+    up(&nvl->ldata_lock);

    complete_all(&nvlfp->open_complete);
 }
@@ -1813,6 +1822,7 @@ nvidia_open(
    }

    nv = NV_STATE_PTR(nvl);
+    nv_init_mapping_revocation(nvl, file, nvlfp, inode);

    if (nv_try_lock_foreground_open(file, nvl) == 0)
    {
@@ -1823,11 +1833,11 @@ nvidia_open(

        rc = nv_open_device_for_nvlfp(nv, nvlfp->sp, nvlfp);

-        up(&nvl->ldata_lock);
-
-        /* Set nvptr only upon success (where nvl->usage_count is incremented) */
+        /* Only add open file tracking where nvl->usage_count is incremented */
        if (rc == 0)
-            nvlfp->nvptr = nvl;
+            nv_add_open_file(nvl, nvlfp);
+
+        up(&nvl->ldata_lock);

        complete_all(&nvlfp->open_complete);
    }
@@ -1882,10 +1892,6 @@ failed:
            NV_SET_FILE_PRIVATE(file, NULL);
        }
    }
-    else
-    {
-        nv_init_mapping_revocation(nvl, file, nvlfp, inode);
-    }

    return rc;
 }
--- a/kernel-open/nvidia/os-interface.c
+++ b/kernel-open/nvidia/os-interface.c
@@ -1672,7 +1672,7 @@ NV_STATUS NV_API_CALL os_alloc_pages_node
    NV_STATUS status = NV_ERR_NOT_SUPPORTED;

 #if defined(__GFP_THISNODE) && defined(GFP_HIGHUSER_MOVABLE) && \
-    defined(__GFP_COMP) && defined(__GFP_NORETRY) && defined(__GFP_NOWARN)
+    defined(__GFP_COMP) && defined(__GFP_NOWARN)
    gfp_t gfp_mask;
    struct page *alloc_addr;
    unsigned int order = get_order(size);
@@ -1689,13 +1689,11 @@ NV_STATUS NV_API_CALL os_alloc_pages_node
     *                              pages, which is needed in order to use
     *                              vm_insert_page API.
     *
-     * 4. __GFP_NORETRY:            Used to avoid the Linux kernel OOM killer.
-     *
-     * 5. __GFP_NOWARN:             Used to avoid a WARN_ON in the slowpath if
+     * 4. __GFP_NOWARN:             Used to avoid a WARN_ON in the slowpath if
     *                              the requested order is too large (just fail
     *                              instead).
     *
-     * 6. (Optional) __GFP_RECLAIM: Used to allow/forbid reclaim.
+     * 5. (Optional) __GFP_RECLAIM: Used to allow/forbid reclaim.
     *                              This is part of GFP_USER and consequently
     *                              GFP_HIGHUSER_MOVABLE.
     *
@@ -1709,7 +1707,30 @@ NV_STATUS NV_API_CALL os_alloc_pages_node
     */

    gfp_mask = __GFP_THISNODE | GFP_HIGHUSER_MOVABLE | __GFP_COMP |
-               __GFP_NORETRY | __GFP_NOWARN;
+               __GFP_NOWARN;
+    
+#if defined(__GFP_RETRY_MAYFAIL)
+
+    /*
+     * __GFP_RETRY_MAYFAIL :  Used to avoid the Linux kernel OOM killer.
+     *                        To help PMA on paths where UVM might be
+     *                        in memory over subscription. This gives UVM 
+     *                        a chance to free memory before invoking any 
+     *                        action from the OOM killer.
+     *                        Freeing non-essential memory will also benefit 
+     *                        the system as a whole.
+     */
+
+    gfp_mask |= __GFP_RETRY_MAYFAIL;
+#elif defined(__GFP_NORETRY)
+
+    /*
+     *  __GFP_NORETRY :       Use __GFP_NORETRY on older kernels where
+     *                        __GFP_RETRY_MAYFAIL is not present.
+     */
+
+    gfp_mask |= __GFP_NORETRY;
+#endif

 #if defined(__GFP_RECLAIM)
    if (flag & NV_ALLOC_PAGES_NODE_SKIP_RECLAIM)